Repository: IntelligentSoftwareSystems/Galois Branch: master Commit: b67f94206a8c Files: 1074 Total size: 8.2 MB Directory structure: gitextract_7r5c247e/ ├── .circleci/ │ ├── config.yml │ └── longest_common_path.sh ├── .clang-format ├── .clang-tidy ├── .git-blame-ignore-revs ├── .gitignore ├── .gitmodules ├── .travis.yml ├── CMakeLists.txt ├── COPYRIGHT ├── LICENSE.txt ├── README.md ├── cmake/ │ ├── GaloisConfig.cmake.in │ └── Modules/ │ ├── CheckArchFlags.cmake │ ├── CheckCilk.cmake │ ├── CheckEndian.cmake │ ├── CheckHugePages.cmake │ ├── CheckMmap.cmake │ ├── CheckSchedSetAffinity.cmake │ ├── FindCBLAS.cmake │ ├── FindFortran.cmake │ ├── FindGASNET.cmake │ ├── FindGMP.cmake │ ├── FindGit.cmake │ ├── FindNUMA.cmake │ ├── FindOpenCL.cmake │ ├── FindPAPI.cmake │ ├── FindQGLViewer.cmake │ ├── FindTBB.cmake │ ├── FindVTune.cmake │ ├── GetGitVersion-write.cmake │ ├── GetGitVersion.cmake │ ├── HandleSanitizer.cmake │ ├── LibFindMacros.cmake │ └── UseStdMacro.cmake ├── config/ │ ├── sanitizers/ │ │ └── ubsan_blacklist.txt.in │ └── version.txt ├── external/ │ └── bliss/ │ └── bliss/ │ ├── COPYING │ ├── COPYING.LESSER │ ├── abgraph.hh │ ├── bignum.hh │ ├── defs.hh │ ├── graph.hh │ ├── heap.hh │ ├── kqueue.hh │ ├── kstack.hh │ ├── orbit.hh │ ├── partition.hh │ ├── search.h │ ├── uintseqhash.hh │ └── utils.hh ├── inputs/ │ ├── CMakeLists.txt │ └── cholesky/ │ ├── matrix1.txt │ ├── matrix1.txt.choleskyedges │ ├── matrix1.txt.dep │ ├── matrix1.txt.filled │ ├── very-sparse.txt │ ├── very-sparse.txt.choleskyedges │ ├── very-sparse.txt.dep │ └── very-sparse.txt.filled ├── libcusp/ │ ├── CMakeLists.txt │ └── include/ │ └── galois/ │ └── graphs/ │ ├── BasePolicies.h │ ├── CuSPPartitioner.h │ ├── DistributedGraph.h │ ├── GenericPartitioners.h │ ├── MiningPartitioner.h │ └── NewGeneric.h ├── libdist/ │ ├── CMakeLists.txt │ ├── include/ │ │ └── galois/ │ │ ├── DReducible.h │ │ ├── DTerminationDetector.h │ │ ├── DistGalois.h │ │ └── runtime/ │ │ ├── BareMPI.h │ │ ├── DistStats.h │ │ ├── LWCI.h │ │ ├── MemUsage.h │ │ ├── Network.h │ │ ├── NetworkIO.h │ │ └── Serialize.h │ └── src/ │ ├── Barrier.cpp │ ├── DistGalois.cpp │ ├── DistStats.cpp │ ├── Network.cpp │ ├── NetworkBuffered.cpp │ ├── NetworkIOMPI.cpp │ └── NetworkLCI.cpp ├── libgalois/ │ ├── CMakeLists.txt │ ├── include/ │ │ └── galois/ │ │ ├── ArrayWrapper.h │ │ ├── AtomicHelpers.h │ │ ├── AtomicWrapper.h │ │ ├── Bag.h │ │ ├── CheckedObject.h │ │ ├── CopyableTuple.h │ │ ├── DynamicBitset.h │ │ ├── Endian.h │ │ ├── FixedSizeRing.h │ │ ├── FlatMap.h │ │ ├── Galois.h │ │ ├── GaloisForwardDecl.h │ │ ├── LargeArray.h │ │ ├── LazyArray.h │ │ ├── LazyObject.h │ │ ├── Loops.h │ │ ├── Mem.h │ │ ├── MethodFlags.h │ │ ├── NoDerefIterator.h │ │ ├── PODResizeableArray.h │ │ ├── ParallelSTL.h │ │ ├── PerThreadContainer.h │ │ ├── PriorityQueue.h │ │ ├── Reduction.h │ │ ├── SharedMemSys.h │ │ ├── Threads.h │ │ ├── Timer.h │ │ ├── Traits.h │ │ ├── TwoLevelIterator.h │ │ ├── TwoLevelIteratorA.h │ │ ├── UnionFind.h │ │ ├── UserContext.h │ │ ├── Version.h │ │ ├── config.h.in │ │ ├── gIO.h │ │ ├── gdeque.h │ │ ├── graphs/ │ │ │ ├── BufferedGraph.h │ │ │ ├── Details.h │ │ │ ├── FileGraph.h │ │ │ ├── Graph.h │ │ │ ├── GraphHelpers.h │ │ │ ├── LCGraph.h │ │ │ ├── LC_Adaptor_Graph.h │ │ │ ├── LC_CSR_CSC_Graph.h │ │ │ ├── LC_CSR_Graph.h │ │ │ ├── LC_CSR_Hypergraph.h │ │ │ ├── LC_InOut_Graph.h │ │ │ ├── LC_InlineEdge_Graph.h │ │ │ ├── LC_Linear_Graph.h │ │ │ ├── LC_Morph_Graph.h │ │ │ ├── MorphGraph.h │ │ │ ├── MorphHyperGraph.h │ │ │ ├── Morph_SepInOut_Graph.h │ │ │ ├── OCGraph.h │ │ │ ├── OfflineGraph.h │ │ │ ├── ReadGraph.h │ │ │ ├── SpatialTree.h │ │ │ └── TypeTraits.h │ │ ├── gslist.h │ │ ├── gstl.h │ │ ├── optional.h │ │ ├── runtime/ │ │ │ ├── Context.h │ │ │ ├── Executor_Deterministic.h │ │ │ ├── Executor_DoAll.h │ │ │ ├── Executor_ForEach.h │ │ │ ├── Executor_OnEach.h │ │ │ ├── Executor_Ordered.h │ │ │ ├── Executor_ParaMeter.h │ │ │ ├── ExtraTraits.h │ │ │ ├── Iterable.h │ │ │ ├── LoopStatistics.h │ │ │ ├── Mem.h │ │ │ ├── OperatorReferenceTypes.h │ │ │ ├── PagePool.h │ │ │ ├── Profile.h │ │ │ ├── Range.h │ │ │ ├── SharedMem.h │ │ │ ├── Statistics.h │ │ │ ├── Substrate.h │ │ │ ├── ThreadTimer.h │ │ │ ├── TiledExecutor.h │ │ │ ├── Tracer.h │ │ │ └── UserContextAccess.h │ │ ├── substrate/ │ │ │ ├── Barrier.h │ │ │ ├── CacheLineStorage.h │ │ │ ├── CompilerSpecific.h │ │ │ ├── EnvCheck.h │ │ │ ├── HWTopo.h │ │ │ ├── NumaMem.h │ │ │ ├── PaddedLock.h │ │ │ ├── PageAlloc.h │ │ │ ├── PerThreadStorage.h │ │ │ ├── PtrLock.h │ │ │ ├── SharedMem.h │ │ │ ├── SimpleLock.h │ │ │ ├── StaticInstance.h │ │ │ ├── Termination.h │ │ │ ├── ThreadPool.h │ │ │ └── ThreadRWlock.h │ │ └── worklists/ │ │ ├── AdaptiveObim.h │ │ ├── BulkSynchronous.h │ │ ├── Chunk.h │ │ ├── ExternalReference.h │ │ ├── LocalQueue.h │ │ ├── Obim.h │ │ ├── OrderedList.h │ │ ├── OwnerComputes.h │ │ ├── PerThreadChunk.h │ │ ├── Simple.h │ │ ├── StableIterator.h │ │ ├── WLCompileCheck.h │ │ ├── WorkList.h │ │ └── WorkListHelpers.h │ ├── src/ │ │ ├── Barrier.cpp │ │ ├── Barrier_Counting.cpp │ │ ├── Barrier_Dissemination.cpp │ │ ├── Barrier_MCS.cpp │ │ ├── Barrier_Pthread.cpp │ │ ├── Barrier_Simple.cpp │ │ ├── Barrier_Topo.cpp │ │ ├── Context.cpp │ │ ├── Deterministic.cpp │ │ ├── DynamicBitset.cpp │ │ ├── EnvCheck.cpp │ │ ├── FileGraph.cpp │ │ ├── FileGraphParallel.cpp │ │ ├── GraphHelpers.cpp │ │ ├── HWTopo.cpp │ │ ├── HWTopoDarwin.cpp │ │ ├── HWTopoLinux.cpp │ │ ├── Mem.cpp │ │ ├── NumaMem.cpp │ │ ├── OCFileGraph.cpp │ │ ├── PageAlloc.cpp │ │ ├── PagePool.cpp │ │ ├── ParaMeter.cpp │ │ ├── PerThreadStorage.cpp │ │ ├── PreAlloc.cpp │ │ ├── Profile.cpp │ │ ├── PtrLock.cpp │ │ ├── SharedMem.cpp │ │ ├── SharedMemSys.cpp │ │ ├── SimpleLock.cpp │ │ ├── Statistics.cpp │ │ ├── Substrate.cpp │ │ ├── Support.cpp │ │ ├── Termination.cpp │ │ ├── ThreadPool.cpp │ │ ├── ThreadTimer.cpp │ │ ├── Threads.cpp │ │ ├── Timer.cpp │ │ ├── Tracer.cpp │ │ ├── Version.cpp.in │ │ └── gIO.cpp │ └── test/ │ ├── CMakeLists.txt │ ├── README.md │ ├── acquire.cpp │ ├── bandwidth.cpp │ ├── barriers.cpp │ ├── empty-member-lcgraph.cpp │ ├── flatmap.cpp │ ├── floatingPointErrors.cpp │ ├── foreach.cpp │ ├── forward-declare-graph.cpp │ ├── gcollections.cpp │ ├── graph-compile.cpp │ ├── graph.cpp │ ├── gslist.cpp │ ├── hwtopo.cpp │ ├── lc-adaptor.cpp │ ├── lock.cpp │ ├── lockmgr.cpp │ ├── loop-overhead.cpp │ ├── mem.cpp │ ├── morphgraph-removal.cpp │ ├── morphgraph.cpp │ ├── move.cpp │ ├── oneach.cpp │ ├── papi.cpp │ ├── pc.cpp │ ├── reduction.cpp │ ├── sort.cpp │ ├── static.cpp │ ├── traits.cpp │ ├── twoleveliteratora.cpp │ ├── wakeup-overhead.cpp │ └── worklists-compile.cpp ├── libgluon/ │ ├── CMakeLists.txt │ ├── include/ │ │ └── galois/ │ │ ├── cuda/ │ │ │ ├── Context.h │ │ │ ├── DynamicBitset.h │ │ │ ├── EdgeContext.h │ │ │ ├── EdgeHostDecls.h │ │ │ └── HostDecls.h │ │ ├── graphs/ │ │ │ ├── GluonEdgeSubstrate.h │ │ │ └── GluonSubstrate.h │ │ └── runtime/ │ │ ├── DataCommMode.h │ │ ├── GlobalObj.h │ │ ├── SyncStructures.h │ │ └── cuda/ │ │ ├── DeviceEdgeSync.h │ │ └── DeviceSync.h │ └── src/ │ ├── GlobalObj.cpp │ ├── GluonSubstrate.cpp │ ├── SyncStructures.cpp │ └── cuda_device.cpp ├── libgpu/ │ ├── CMakeLists.txt │ ├── include/ │ │ ├── Timer.h │ │ ├── abitset.h │ │ ├── aolist.h │ │ ├── atomic_helpers.h │ │ ├── bmk2.h │ │ ├── component.h │ │ ├── counter.h │ │ ├── csr_graph.h │ │ ├── cuda_launch_config.hpp │ │ ├── cutil_subset.h │ │ ├── exclusive.h │ │ ├── failfast.h │ │ ├── gbar.cuh │ │ ├── gg.h │ │ ├── ggc_rt.h │ │ ├── ggcuda.h │ │ ├── instr.h │ │ ├── internal.h │ │ ├── lockarray.h │ │ ├── pipe.h │ │ ├── rv.h │ │ ├── sharedptr.h │ │ ├── snfile.h │ │ ├── thread_work.h │ │ └── worklist.h │ └── src/ │ ├── bmk2.c │ ├── csr_graph.cu │ ├── ggc_rt.cu │ ├── instr.cu │ ├── skelapp/ │ │ └── skel.cu │ ├── snappy.c │ └── snappy_test.c ├── libpangolin/ │ ├── CMakeLists.txt │ ├── README.md │ ├── gpu/ │ │ └── pangolin/ │ │ ├── bitsets.h │ │ ├── checker.h │ │ ├── cutils.h │ │ ├── element.cuh │ │ ├── embedding.cuh │ │ ├── graph_gpu.h │ │ ├── miner.cuh │ │ ├── timer.h │ │ └── types.cuh │ ├── include/ │ │ └── pangolin/ │ │ ├── BfsMining/ │ │ │ ├── edge_miner.h │ │ │ ├── edge_miner_api.h │ │ │ ├── embedding_list.h │ │ │ ├── engine.h │ │ │ ├── vertex_miner.h │ │ │ └── vertex_miner_api.h │ │ ├── base_embedding.h │ │ ├── canonical_graph.h │ │ ├── core.h │ │ ├── domain_support.h │ │ ├── edge_embedding.h │ │ ├── edge_type.h │ │ ├── element.h │ │ ├── embedding.h │ │ ├── embedding_queue.h │ │ ├── equivalence.h │ │ ├── gtypes.h │ │ ├── mgraph.h │ │ ├── miner.h │ │ ├── ptypes.h │ │ ├── quick_pattern.h │ │ ├── res_man.h │ │ ├── scan.h │ │ ├── types.h │ │ ├── util.h │ │ └── vertex_embedding.h │ └── src/ │ ├── BfsMining/ │ │ └── embedding_list.cpp │ ├── base_embedding.cpp │ ├── equivalence.cpp │ ├── quick_pattern.cpp │ └── vertex_embedding.cpp ├── libpygalois/ │ ├── CMakeLists.txt │ └── include/ │ └── galois/ │ └── Constants.h ├── libsupport/ │ ├── CMakeLists.txt │ ├── include/ │ │ └── galois/ │ │ ├── GetEnv.h │ │ └── Logging.h │ ├── src/ │ │ ├── GetEnv.cpp │ │ └── Logging.cpp │ └── test/ │ ├── CMakeLists.txt │ ├── getenv.cpp │ └── logging.cpp ├── lonestar/ │ ├── CMakeLists.txt │ ├── analytics/ │ │ ├── CMakeLists.txt │ │ ├── cpu/ │ │ │ ├── CMakeLists.txt │ │ │ ├── betweennesscentrality/ │ │ │ │ ├── AsyncStructs.h │ │ │ │ ├── BCEdge.h │ │ │ │ ├── BCNode.h │ │ │ │ ├── BetweennessCentrality.cpp │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── LevelStructs.h │ │ │ │ ├── OuterStructs.h │ │ │ │ ├── README.md │ │ │ │ └── control.h │ │ │ ├── bfs/ │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── README.md │ │ │ │ ├── bfs.cpp │ │ │ │ └── bfsDirectionOpt.cpp │ │ │ ├── bipart/ │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── Coarsening.cpp │ │ │ │ ├── Metric.cpp │ │ │ │ ├── Partitioning.cpp │ │ │ │ ├── README.md │ │ │ │ ├── Refine.cpp │ │ │ │ ├── bipart.cpp │ │ │ │ └── bipart.h │ │ │ ├── clustering/ │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── README.md │ │ │ │ ├── clustering.h │ │ │ │ ├── leidenClustering.cpp │ │ │ │ └── louvainClustering.cpp │ │ │ ├── connected-components/ │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── ConnectedComponents.cpp │ │ │ │ └── README.md │ │ │ ├── gmetis/ │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── Coarsening.cpp │ │ │ │ ├── GMetis.cpp │ │ │ │ ├── GraphReader.h │ │ │ │ ├── Metis.h │ │ │ │ ├── Metric.cpp │ │ │ │ ├── Partitioning.cpp │ │ │ │ ├── README.md │ │ │ │ └── Refine.cpp │ │ │ ├── independentset/ │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── IndependentSet.cpp │ │ │ │ └── README.md │ │ │ ├── k-core/ │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── README.md │ │ │ │ └── kcore.cpp │ │ │ ├── k-truss/ │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── K-Truss.cpp │ │ │ │ ├── README.md │ │ │ │ ├── Verify.cpp │ │ │ │ └── bmktest2.py │ │ │ ├── matching/ │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── README.md │ │ │ │ └── bipartite-mcm.cpp │ │ │ ├── matrixcompletion/ │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── README.md │ │ │ │ ├── bipartite-gen.py │ │ │ │ ├── commandLineParam.h │ │ │ │ ├── matrixCompletion.cpp │ │ │ │ ├── matrixCompletion.h │ │ │ │ ├── parselog.sh │ │ │ │ ├── plot.R │ │ │ │ └── runexp.py │ │ │ ├── pagerank/ │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── PageRank-constants.h │ │ │ │ ├── PageRank-pull.cpp │ │ │ │ ├── PageRank-push.cpp │ │ │ │ └── README.md │ │ │ ├── pointstoanalysis/ │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── PointsTo.cpp │ │ │ │ ├── README.md │ │ │ │ └── SparseBitVector.h │ │ │ ├── preflowpush/ │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── Preflowpush.cpp │ │ │ │ └── README.md │ │ │ ├── spanningtree/ │ │ │ │ ├── Boruvka.cpp │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── README.md │ │ │ │ └── UnionFind.h │ │ │ ├── sssp/ │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── README.md │ │ │ │ └── SSSP.cpp │ │ │ └── triangle-counting/ │ │ │ ├── CMakeLists.txt │ │ │ ├── README.md │ │ │ └── Triangles.cpp │ │ ├── distributed/ │ │ │ ├── CMakeLists.txt │ │ │ ├── README.md │ │ │ ├── betweennesscentrality/ │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── README.md │ │ │ │ ├── bc_level.cpp │ │ │ │ ├── bc_level_cuda.cu │ │ │ │ ├── bc_level_cuda.cuh │ │ │ │ ├── bc_level_cuda.h │ │ │ │ ├── bc_level_cuda.py │ │ │ │ ├── bc_level_sync.hh │ │ │ │ ├── bc_mr.cpp │ │ │ │ ├── mrbc_bitset.hh │ │ │ │ ├── mrbc_sync.hh │ │ │ │ └── mrbc_tree.h │ │ │ ├── bfs/ │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── README.md │ │ │ │ ├── bfs_pull.cpp │ │ │ │ ├── bfs_pull_cuda.cu │ │ │ │ ├── bfs_pull_cuda.cuh │ │ │ │ ├── bfs_pull_cuda.h │ │ │ │ ├── bfs_pull_cuda.py │ │ │ │ ├── bfs_pull_sync.hh │ │ │ │ ├── bfs_push.cpp │ │ │ │ ├── bfs_push_cuda.cu │ │ │ │ ├── bfs_push_cuda.cuh │ │ │ │ ├── bfs_push_cuda.h │ │ │ │ ├── bfs_push_cuda.py │ │ │ │ └── bfs_push_sync.hh │ │ │ ├── connected-components/ │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── README.md │ │ │ │ ├── cc_pull.cpp │ │ │ │ ├── cc_pull_cuda.cu │ │ │ │ ├── cc_pull_cuda.cuh │ │ │ │ ├── cc_pull_cuda.h │ │ │ │ ├── cc_pull_cuda.py │ │ │ │ ├── cc_pull_sync.hh │ │ │ │ ├── cc_push.cpp │ │ │ │ ├── cc_push_cuda.cu │ │ │ │ ├── cc_push_cuda.cuh │ │ │ │ ├── cc_push_cuda.h │ │ │ │ ├── cc_push_cuda.py │ │ │ │ └── cc_push_sync.hh │ │ │ ├── k-core/ │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── README.md │ │ │ │ ├── kcore_pull.cpp │ │ │ │ ├── kcore_pull_cuda.cu │ │ │ │ ├── kcore_pull_cuda.cuh │ │ │ │ ├── kcore_pull_cuda.h │ │ │ │ ├── kcore_pull_cuda.py │ │ │ │ ├── kcore_pull_sync.hh │ │ │ │ ├── kcore_push.cpp │ │ │ │ ├── kcore_push_cuda.cu │ │ │ │ ├── kcore_push_cuda.cuh │ │ │ │ ├── kcore_push_cuda.h │ │ │ │ ├── kcore_push_cuda.py │ │ │ │ └── kcore_push_sync.hh │ │ │ ├── matrixcompletion/ │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── README.md │ │ │ │ ├── matrixCompletion.cpp │ │ │ │ └── matrixCompletion_sync.hh │ │ │ ├── pagerank/ │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── README.md │ │ │ │ ├── pagerank_pull.cpp │ │ │ │ ├── pagerank_pull_cuda.cu │ │ │ │ ├── pagerank_pull_cuda.cuh │ │ │ │ ├── pagerank_pull_cuda.h │ │ │ │ ├── pagerank_pull_cuda.py │ │ │ │ ├── pagerank_pull_sync.hh │ │ │ │ ├── pagerank_push.cpp │ │ │ │ ├── pagerank_push_cuda.cu │ │ │ │ ├── pagerank_push_cuda.cuh │ │ │ │ ├── pagerank_push_cuda.h │ │ │ │ ├── pagerank_push_cuda.py │ │ │ │ └── pagerank_push_sync.hh │ │ │ ├── partition/ │ │ │ │ ├── CMakeLists.txt │ │ │ │ └── partition.cpp │ │ │ ├── sssp/ │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── README.md │ │ │ │ ├── sssp_pull.cpp │ │ │ │ ├── sssp_pull_cuda.cu │ │ │ │ ├── sssp_pull_cuda.cuh │ │ │ │ ├── sssp_pull_cuda.h │ │ │ │ ├── sssp_pull_cuda.py │ │ │ │ ├── sssp_pull_sync.hh │ │ │ │ ├── sssp_push.cpp │ │ │ │ ├── sssp_push_cuda.cu │ │ │ │ ├── sssp_push_cuda.cuh │ │ │ │ ├── sssp_push_cuda.h │ │ │ │ ├── sssp_push_cuda.py │ │ │ │ └── sssp_push_sync.hh │ │ │ └── triangle-counting/ │ │ │ ├── CMakeLists.txt │ │ │ ├── README.md │ │ │ ├── tc.cpp │ │ │ ├── tc_cuda.cu │ │ │ ├── tc_cuda.cuh │ │ │ ├── tc_cuda.h │ │ │ └── tc_cuda.py │ │ └── gpu/ │ │ ├── CMakeLists.txt │ │ ├── README.md │ │ ├── bfs/ │ │ │ ├── CMakeLists.txt │ │ │ ├── README.md │ │ │ ├── bfs.cu │ │ │ └── support.cu │ │ ├── connected-components/ │ │ │ ├── CMakeLists.txt │ │ │ ├── README.md │ │ │ ├── cc.cu │ │ │ └── support.cu │ │ ├── independentset/ │ │ │ ├── CMakeLists.txt │ │ │ ├── README.md │ │ │ ├── mis.cu │ │ │ └── support.cu │ │ ├── matrixcompletion/ │ │ │ ├── CMakeLists.txt │ │ │ ├── README.md │ │ │ ├── SGDAsyncEdgeCu.h │ │ │ ├── SGDCommonCu.h │ │ │ ├── SGDGraphCu.h │ │ │ ├── sgd.cu │ │ │ └── support.cu │ │ ├── pagerank/ │ │ │ ├── CMakeLists.txt │ │ │ ├── README.md │ │ │ ├── pagerank.cu │ │ │ └── support.cu │ │ ├── pointstoanalysis/ │ │ │ ├── CMakeLists.txt │ │ │ ├── README.md │ │ │ ├── andersen.cu │ │ │ ├── andersen.h │ │ │ ├── pta.cu │ │ │ ├── pta_tuning.h │ │ │ └── support.cu │ │ ├── spanningtree/ │ │ │ ├── CMakeLists.txt │ │ │ ├── README.md │ │ │ ├── mst-tex.cu │ │ │ ├── mst.cu │ │ │ ├── mst.h │ │ │ └── support.cu │ │ ├── sssp/ │ │ │ ├── CMakeLists.txt │ │ │ ├── README.md │ │ │ ├── sssp.cu │ │ │ └── support.cu │ │ └── triangle-counting/ │ │ ├── CMakeLists.txt │ │ ├── README.md │ │ ├── support.cu │ │ └── tc.cu │ ├── eda/ │ │ ├── CMakeLists.txt │ │ └── cpu/ │ │ ├── CMakeLists.txt │ │ ├── aig-rewriting/ │ │ │ ├── CMakeLists.txt │ │ │ ├── README.md │ │ │ ├── algorithms/ │ │ │ │ ├── ChoiceManager.cpp │ │ │ │ ├── ChoiceManager.h │ │ │ │ ├── CutManager.cpp │ │ │ │ ├── CutManager.h │ │ │ │ ├── CutPool.cpp │ │ │ │ ├── CutPool.h │ │ │ │ ├── NPNManager.cpp │ │ │ │ ├── NPNManager.h │ │ │ │ ├── PreCompGraphManager.cpp │ │ │ │ ├── PreCompGraphManager.h │ │ │ │ ├── PriorityCutManager.cpp │ │ │ │ ├── PriorityCutManager.h │ │ │ │ ├── PriorityCutPool.cpp │ │ │ │ ├── PriorityCutPool.h │ │ │ │ ├── ReconvDrivenCut.cpp │ │ │ │ ├── ReconvDrivenCut.h │ │ │ │ ├── RewriteManager.cpp │ │ │ │ └── RewriteManager.h │ │ │ ├── functional/ │ │ │ │ ├── BitVectorPool.cpp │ │ │ │ ├── BitVectorPool.h │ │ │ │ ├── FunctionHandler.h │ │ │ │ ├── FunctionHandler32.h │ │ │ │ ├── FunctionUtil.cpp │ │ │ │ └── FunctionUtil.h │ │ │ ├── main.cpp │ │ │ ├── misc/ │ │ │ │ └── util/ │ │ │ │ ├── utilString.cpp │ │ │ │ └── utilString.h │ │ │ ├── parsers/ │ │ │ │ ├── AigParser.cpp │ │ │ │ ├── AigParser.h │ │ │ │ ├── LookupTableParser.cpp │ │ │ │ ├── LookupTableParser.h │ │ │ │ ├── semantic_error.cpp │ │ │ │ ├── semantic_error.h │ │ │ │ ├── syntax_error.cpp │ │ │ │ ├── syntax_error.h │ │ │ │ ├── unexpected_eof.cpp │ │ │ │ └── unexpected_eof.h │ │ │ ├── subjectgraph/ │ │ │ │ └── aig/ │ │ │ │ ├── Aig.cpp │ │ │ │ └── Aig.h │ │ │ ├── writers/ │ │ │ │ ├── AigWriter.cpp │ │ │ │ ├── AigWriter.h │ │ │ │ ├── BlifWriter.cpp │ │ │ │ └── BlifWriter.h │ │ │ └── xxHash/ │ │ │ ├── xxhash.c │ │ │ └── xxhash.h │ │ └── sproute/ │ │ ├── BoilerPlate.h │ │ ├── CMakeLists.txt │ │ ├── DataProc.h │ │ ├── DataType.h │ │ ├── EdgeShift.h │ │ ├── LICENSE │ │ ├── README.md │ │ ├── RSMT.h │ │ ├── RipUp.h │ │ ├── bitmap_image.hpp │ │ ├── bitmap_test.cpp │ │ ├── bookshelf_IO.c │ │ ├── bookshelf_IO.h │ │ ├── cong.c │ │ ├── cong.h │ │ ├── dist.c │ │ ├── dist.h │ │ ├── dl.c │ │ ├── dl.h │ │ ├── err.c │ │ ├── err.h │ │ ├── flute-ckt │ │ ├── flute-ckt.c │ │ ├── flute-net │ │ ├── flute-net.c │ │ ├── flute.h │ │ ├── flute_mst.h │ │ ├── global.h │ │ ├── heap.c │ │ ├── heap.h │ │ ├── main.cpp │ │ ├── maze.h │ │ ├── maze3D.h │ │ ├── maze_finegrain.h │ │ ├── maze_finegrain_concurrent.h │ │ ├── maze_finegrain_lateupdate.h │ │ ├── maze_lock.h │ │ ├── memAlloc.c │ │ ├── memAlloc.h │ │ ├── mst2.c │ │ ├── mst2.h │ │ ├── neighbors.c │ │ ├── neighbors.h │ │ ├── parallel_router_morphgraph.cpp │ │ ├── rand-pts.c │ │ ├── route.h │ │ └── utility.h │ ├── libdistbench/ │ │ ├── CMakeLists.txt │ │ ├── include/ │ │ │ └── DistBench/ │ │ │ ├── Input.h │ │ │ ├── MiningStart.h │ │ │ ├── Output.h │ │ │ └── Start.h │ │ └── src/ │ │ ├── Input.cpp │ │ ├── Output.cpp │ │ └── Start.cpp │ ├── liblonestar/ │ │ ├── CMakeLists.txt │ │ ├── include/ │ │ │ └── Lonestar/ │ │ │ ├── BFS_SSSP.h │ │ │ ├── BoilerPlate.h │ │ │ └── Utils.h │ │ └── src/ │ │ └── BoilerPlate.cpp │ ├── mining/ │ │ ├── CMakeLists.txt │ │ ├── README.md │ │ ├── cpu/ │ │ │ ├── CMakeLists.txt │ │ │ ├── frequent-subgraph-mining/ │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── README.md │ │ │ │ ├── fsm.cpp │ │ │ │ └── fsm.h │ │ │ ├── k-clique-listing/ │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── README.md │ │ │ │ ├── kcl.cpp │ │ │ │ └── kcl.h │ │ │ ├── motif-counting/ │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── README.md │ │ │ │ ├── motif.cpp │ │ │ │ └── motif.h │ │ │ ├── subgraph-listing/ │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── README.mb │ │ │ │ ├── sgl_cycle.cpp │ │ │ │ └── sgl_diamond.cpp │ │ │ └── triangle-counting/ │ │ │ ├── CMakeLists.txt │ │ │ ├── README.md │ │ │ ├── tc.h │ │ │ └── tc_mine.cpp │ │ ├── gpu/ │ │ │ ├── CMakeLists.txt │ │ │ ├── frequent-subgraph-mining/ │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── README.md │ │ │ │ ├── fsm.cu │ │ │ │ ├── fsm.h │ │ │ │ └── fsm_gpu.cpp │ │ │ ├── k-clique-listing/ │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── README.md │ │ │ │ ├── kcl.cu │ │ │ │ ├── kcl.h │ │ │ │ └── kcl_gpu.cpp │ │ │ ├── motif-counting/ │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── README.md │ │ │ │ ├── motif.cu │ │ │ │ ├── motif.h │ │ │ │ └── motif_gpu.cpp │ │ │ └── triangle-counting/ │ │ │ ├── CMakeLists.txt │ │ │ ├── README.md │ │ │ ├── tc.h │ │ │ ├── tc_mine.cu │ │ │ └── tc_mine_gpu.cpp │ │ └── libminingbench/ │ │ ├── CMakeLists.txt │ │ ├── include/ │ │ │ └── MiningBench/ │ │ │ └── Start.h │ │ └── src/ │ │ ├── Input.cpp │ │ └── Start.cpp │ ├── scientific/ │ │ ├── CMakeLists.txt │ │ ├── cpu/ │ │ │ ├── CMakeLists.txt │ │ │ ├── barneshut/ │ │ │ │ ├── Barneshut.cpp │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── Point.h │ │ │ │ └── README.md │ │ │ ├── delaunayrefinement/ │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── Cavity.h │ │ │ │ ├── DelaunayRefinement.cpp │ │ │ │ ├── Edge.h │ │ │ │ ├── Element.h │ │ │ │ ├── Mesh.h │ │ │ │ ├── README.md │ │ │ │ ├── Subgraph.h │ │ │ │ ├── Tuple.h │ │ │ │ └── Verifier.h │ │ │ ├── delaunaytriangulation/ │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── Cavity.h │ │ │ │ ├── DelaunayTriangulation.cpp │ │ │ │ ├── DelaunayTriangulationDet.cpp │ │ │ │ ├── Element.cpp │ │ │ │ ├── Element.h │ │ │ │ ├── Graph.h │ │ │ │ ├── Point.h │ │ │ │ ├── QuadTree.h │ │ │ │ ├── README.md │ │ │ │ ├── Tuple.h │ │ │ │ └── Verifier.h │ │ │ └── longestedge/ │ │ │ ├── CMakeLists.txt │ │ │ ├── README.md │ │ │ ├── out/ │ │ │ │ └── .gitignore │ │ │ ├── src/ │ │ │ │ ├── LongestEdge.cpp │ │ │ │ ├── conditions/ │ │ │ │ │ ├── ConditionChecker.h │ │ │ │ │ ├── DummyConditionChecker.h │ │ │ │ │ └── TerrainConditionChecker.h │ │ │ │ ├── libmgrs/ │ │ │ │ │ ├── LICENSE │ │ │ │ │ ├── mgrs.c │ │ │ │ │ ├── mgrs.h │ │ │ │ │ ├── polarst.c │ │ │ │ │ ├── polarst.h │ │ │ │ │ ├── tranmerc.c │ │ │ │ │ ├── tranmerc.h │ │ │ │ │ ├── ups.c │ │ │ │ │ ├── ups.h │ │ │ │ │ ├── utm.c │ │ │ │ │ └── utm.h │ │ │ │ ├── model/ │ │ │ │ │ ├── Coordinates.h │ │ │ │ │ ├── EdgeData.h │ │ │ │ │ ├── Graph.h │ │ │ │ │ ├── Map.cpp │ │ │ │ │ ├── Map.h │ │ │ │ │ ├── NodeData.h │ │ │ │ │ └── ProductionState.h │ │ │ │ ├── productions/ │ │ │ │ │ ├── Production.h │ │ │ │ │ ├── Production1.h │ │ │ │ │ ├── Production2.h │ │ │ │ │ ├── Production3.h │ │ │ │ │ ├── Production4.h │ │ │ │ │ ├── Production5.h │ │ │ │ │ └── Production6.h │ │ │ │ ├── readers/ │ │ │ │ │ ├── AsciiReader.cpp │ │ │ │ │ ├── AsciiReader.h │ │ │ │ │ ├── InpReader.cpp │ │ │ │ │ ├── InpReader.h │ │ │ │ │ ├── SrtmReader.cpp │ │ │ │ │ └── SrtmReader.h │ │ │ │ ├── utils/ │ │ │ │ │ ├── ConnectivityManager.h │ │ │ │ │ ├── GaloisUtils.h │ │ │ │ │ ├── GraphGenerator.h │ │ │ │ │ ├── MyGraphFormatWriter.h │ │ │ │ │ ├── Utils.cpp │ │ │ │ │ └── Utils.h │ │ │ │ └── writers/ │ │ │ │ ├── InpWriter.cpp │ │ │ │ ├── InpWriter.h │ │ │ │ ├── TriangleFormatWriter.cpp │ │ │ │ └── TriangleFormatWriter.h │ │ │ └── test/ │ │ │ ├── TestMain.cpp │ │ │ ├── catch.hpp │ │ │ ├── model/ │ │ │ │ ├── MapTest.cpp │ │ │ │ └── ProductionStateTest.cpp │ │ │ ├── productions/ │ │ │ │ └── Production1Test.cpp │ │ │ ├── testUtils.cpp │ │ │ └── utils/ │ │ │ ├── ConnectivityManagerTest.cpp │ │ │ └── UtilsTest.cpp │ │ └── gpu/ │ │ ├── CMakeLists.txt │ │ ├── README.md │ │ ├── barneshut/ │ │ │ ├── CMakeLists.txt │ │ │ ├── LICENSE.md │ │ │ ├── README.md │ │ │ ├── bh.cu │ │ │ └── bh_tuning.h │ │ └── delaunayrefinement/ │ │ ├── CMakeLists.txt │ │ ├── README.md │ │ ├── devel.h │ │ ├── dmr-nontex.cu │ │ ├── dmr.cu │ │ ├── dmr.h │ │ ├── dmrggc.inc │ │ ├── geomprim.h │ │ ├── main.inc │ │ └── meshfiles.h │ └── tutorial_examples/ │ ├── CMakeLists.txt │ ├── ConflictAwareTorus.cpp │ ├── CountLevels.cpp │ ├── ExampleWrappedWorklist.cpp │ ├── GraphTraversalPullOperator.cpp │ ├── GraphTraversalPushOperator.cpp │ ├── GraphTraversalSerial.cpp │ ├── HelloWorld.cpp │ ├── SSSPPullSimple.cpp │ ├── SSSPPushSimple.cpp │ ├── SSSPsimple.cpp │ ├── SpanningTree.cpp │ ├── ThirdPartyMalloc.cpp │ ├── Torus.cpp │ ├── TorusConstruction.cpp │ └── TorusImproved.cpp ├── pyproject.toml ├── python/ │ ├── CMakeLists.txt │ └── galois/ │ ├── CMakeLists.txt │ ├── __init__.py │ ├── _bfs.pyx │ ├── _connected_components.pyx │ ├── _pagerank.pyx │ ├── _sssp.pyx │ ├── bfs.py │ ├── connected_components.py │ ├── cpp/ │ │ ├── __init__.pxd │ │ ├── libgalois/ │ │ │ ├── Galois.pxd │ │ │ ├── Timer.pxd │ │ │ ├── Worklist.pxd │ │ │ ├── __init__.pxd │ │ │ └── graphs/ │ │ │ ├── Graph.pxd │ │ │ ├── Util.pxd │ │ │ └── __init__.pxd │ │ └── libstd/ │ │ ├── __init__.pxd │ │ └── atomic.pxd │ ├── pagerank.py │ ├── shmem.pxd │ └── shmem.pyx ├── scripts/ │ ├── CMakeLists.txt │ ├── abelian_log_parser.py │ ├── check_format.sh │ ├── constraints_checking/ │ │ └── parse_dump.rb │ ├── docker/ │ │ ├── Dockerfile │ │ ├── Dockerfile.msan │ │ ├── README.md │ │ ├── msan/ │ │ │ ├── build-boost.sh │ │ │ ├── build-llvm.sh │ │ │ └── config-galois.sh │ │ └── run-image.sh │ ├── experimental/ │ │ ├── abelian_log_parser_analysis.py │ │ ├── abelian_log_parser_deprecated.py │ │ ├── abelian_log_parser_multipleRuns.py │ │ ├── abelian_log_parser_multipleRuns2.py │ │ ├── bmk2/ │ │ │ ├── __init__.py │ │ │ ├── bispec.py │ │ │ ├── bmk2.py │ │ │ ├── checkers.py │ │ │ ├── collect.py │ │ │ ├── collect_multi.py │ │ │ ├── common.py │ │ │ ├── config.py │ │ │ ├── convert.py │ │ │ ├── convgraph.py │ │ │ ├── core.py │ │ │ ├── extras.py │ │ │ ├── inputdb.py │ │ │ ├── inputprops.py │ │ │ ├── logproc.py │ │ │ ├── mapfile.py │ │ │ ├── measure_energy.py │ │ │ ├── opdb.py │ │ │ ├── overlays.py │ │ │ ├── perf.py │ │ │ ├── rsinfo.py │ │ │ ├── sconvert.py │ │ │ ├── summlog.py │ │ │ └── test2.py │ │ ├── buildFunc.sh │ │ ├── buildMultiCompiler.sh │ │ ├── buildMultiVersion.sh │ │ ├── buildOnce.sh │ │ ├── distbmk2/ │ │ │ ├── README │ │ │ ├── bmk2.cfg │ │ │ ├── bmkprops.py │ │ │ ├── bmktest2.py │ │ │ ├── dist.bispec │ │ │ ├── dist.inputdb │ │ │ └── dist.inputprops │ │ ├── galois_license_fixer.py │ │ ├── githubbmk2_setup/ │ │ │ ├── README │ │ │ ├── bmk2.cfg │ │ │ ├── bmkprops.py │ │ │ ├── bmktest2.py │ │ │ ├── defaultrunscript.sh │ │ │ ├── lonestar.bispec │ │ │ ├── lonestar.inputdb │ │ │ └── lonestar.inputprops │ │ ├── heterogeneousGalois/ │ │ │ ├── CPU_run_scripts_stampede/ │ │ │ │ ├── ruby_BFS_CC_SSSP_rmat_USA_twitter_Pull.sh │ │ │ │ ├── ruby_BFS_CC_SSSP_rmat_USA_twitter_Pull_Vcut.sh │ │ │ │ ├── ruby_BFS_CC_SSSP_rmat_USA_twitter_Push.sh │ │ │ │ └── ruby_BFS_CC_SSSP_rmat_USA_twitter_Push_Vcut.sh │ │ │ ├── README_compiler │ │ │ ├── batch_bridges_all.sh │ │ │ ├── batch_single-host_multi-device_all.sh │ │ │ ├── batch_stampede_all.sh │ │ │ ├── batch_verify.sh │ │ │ ├── compile.sh │ │ │ ├── compile_all.sh │ │ │ ├── cuda_compile.sh │ │ │ ├── run_bridges.template.sbatch │ │ │ ├── run_bridges_all.sh │ │ │ ├── run_single-host_multi-device_all.sh │ │ │ ├── run_stampede.template.sbatch │ │ │ ├── run_stampede_all.sh │ │ │ └── verify.sh │ │ ├── lonestarbmk2/ │ │ │ ├── README │ │ │ ├── bmk2.cfg │ │ │ ├── bmkprops.py │ │ │ ├── bmktest2.py │ │ │ ├── defaultrunscript.sh │ │ │ ├── lonestar.bispec │ │ │ ├── lonestar.inputdb │ │ │ └── lonestar.inputprops │ │ ├── older/ │ │ │ ├── backend.pl │ │ │ ├── prune_headers_function.pl │ │ │ ├── prune_headers_line.pl │ │ │ ├── report.pl │ │ │ ├── report_vtune.pl │ │ │ ├── run_boruvka.pl │ │ │ ├── run_clustering.pl │ │ │ ├── run_delaunayrefinement.pl │ │ │ ├── run_sssp.pl │ │ │ └── vtune_sssp.pl │ │ ├── pangolin/ │ │ │ ├── batch_verify.sh │ │ │ ├── fsm.citeseer.2.300 │ │ │ ├── fsm.citeseer.2.500 │ │ │ ├── fsm.patent.2.1000 │ │ │ ├── fsm.patent.2.300 │ │ │ ├── fsm.patent.2.500 │ │ │ ├── fsm.patent.2.5000 │ │ │ ├── kcl.citeseer.4 │ │ │ ├── kcl.citeseer.5 │ │ │ ├── kcl.mico.4 │ │ │ ├── kcl.mico.5 │ │ │ ├── kcl.patent.3 │ │ │ ├── kcl.patent.4 │ │ │ ├── kcl.patent.5 │ │ │ ├── motif.citeseer.3 │ │ │ ├── motif.citeseer.4 │ │ │ ├── motif.mico.3 │ │ │ ├── motif.mico.4 │ │ │ ├── motif.patent.3 │ │ │ ├── motif.patent.4 │ │ │ ├── result_checker.py │ │ │ └── verify.sh │ │ ├── runBFS.sh │ │ └── runSSSP.sh │ ├── find_ifdefs.sh │ ├── galois_log_parser.R │ ├── galois_log_parser_minimal.R │ ├── gitFindBigCommits.sh │ ├── hcompiler.sh │ ├── intel_study_scripts/ │ │ ├── README.md │ │ ├── download_inputs.sh │ │ ├── run_bc.sh │ │ ├── run_bfs.sh │ │ ├── run_cc.sh │ │ ├── run_pr.sh │ │ ├── run_sssp.sh │ │ └── run_tc.sh │ ├── iss_load_modules.sh │ ├── make_dist.sh.in │ ├── merge_vtune.pl │ ├── plot_lonestar_apps.R │ ├── quick_plot.pl │ ├── rcat.py │ ├── report.py │ ├── report_vtune.pl │ ├── result_checker.py │ ├── run.py │ ├── run_vtune.pl │ ├── sparse-matrices/ │ │ ├── diff_edgelists.py │ │ ├── iperm2order.pl │ │ ├── mtx2edgelist.pl │ │ └── reorder.pl │ ├── tcp_starter.py │ ├── visual/ │ │ ├── plot2Dmesh.m │ │ ├── plotGraph.R │ │ ├── plotGraph3d.R │ │ ├── plotTimeStamps.m │ │ └── triplot.m │ └── vtune.sh ├── setup.py ├── tests/ │ └── test_imports.py └── tools/ ├── CMakeLists.txt ├── dist-graph-convert/ │ ├── CMakeLists.txt │ ├── dist-graph-convert-helpers.cpp │ ├── dist-graph-convert-helpers.h │ └── dist-graph-convert.cpp ├── graph-convert/ │ ├── CMakeLists.txt │ ├── graph-convert-huge.cpp │ ├── graph-convert.cpp │ └── test-inputs/ │ ├── sample.csv │ ├── with-blank-lines.edgelist │ ├── with-blank-lines.edgelist.expected │ ├── with-comments.edgelist │ └── with-comments.edgelist.expected ├── graph-remap/ │ ├── CMakeLists.txt │ └── graph-remap.cpp └── graph-stats/ ├── CMakeLists.txt └── graph-stats.cpp ================================================ FILE CONTENTS ================================================ ================================================ FILE: .circleci/config.yml ================================================ version: 2.1 common_step: &cmake_build_test - run: | cmake --build /tmp/build --target input if [ -n "$CIRCLE_PULL_REQUEST" ]; then \ subset=$(/bin/bash .circleci/longest_common_path.sh); \ echo "Changes of ${CIRCLE_SHA1} are all under $subset"; \ fi cmake --build /tmp/build/${subset:-.} --parallel 2 # Run tests as non-root otherwise MPI will complain (cd /tmp/build/${subset:-.} \ && chown -R runner . \ && su runner -c "ctest --output-on-failure --label-regex quick --parallel 2") # TODO: These builds are currently configured to # install the needed dependencies in each container # at the start of each build. The dependencies aren't huge, # but that is slower and does waste some bandwidth. # We should eventually roll the set up for each # container into a separate dockerfile and push custom # build images to dockerhub so that setting up packages # during the actual CI testing is no longer necessary. jobs: "CheckFormat": docker: - image: ubuntu:bionic steps: - checkout - run: | apt-get -q update -y apt-get -q install -y apt-transport-https ca-certificates git gnupg software-properties-common wget wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key 2>/dev/null | apt-key add - apt-add-repository -y 'deb https://apt.llvm.org/bionic/ llvm-toolchain-bionic-10 main' apt-get -q update -y - run: git submodule sync - run: git submodule update --init - run: | apt-get -q install -y \ clang-format-10 update-alternatives --install /usr/bin/clang-format clang-format /usr/bin/clang-format-10 50 if [ -n "$CIRCLE_PULL_REQUEST" ]; then \ subset=$(/bin/bash .circleci/longest_common_path.sh); \ echo "Changes of ${CIRCLE_SHA1} are all under $subset"; \ fi scripts/check_format.sh ${subset:-.} "Sanitize": docker: - image: ubuntu:bionic steps: - checkout - run: | apt-get -q update -y apt-get -q install -y apt-transport-https ca-certificates git gnupg software-properties-common wget wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | apt-key add - wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key 2>/dev/null | apt-key add - apt-add-repository -y 'ppa:ubuntu-toolchain-r/test' apt-add-repository -y 'deb https://apt.llvm.org/bionic/ llvm-toolchain-bionic-10 main' apt-add-repository -y 'deb https://apt.kitware.com/ubuntu/ bionic main' apt-add-repository -y 'ppa:mhier/libboost-latest' apt-get -q update -y - run: git submodule sync - run: git submodule update --init - run: | apt-get -q install -y \ clang-10 \ cmake \ libboost1.70-dev \ libeigen3-dev \ openmpi-bin \ libopenmpi-dev \ llvm-7-dev \ libz-dev \ libfmt-dev update-alternatives --install /usr/bin/clang clang /usr/bin/clang-10 50 update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-10 50 chmod 755 /root useradd runner mkdir -p /tmp/build cmake -S . -B /tmp/build \ -DCMAKE_C_COMPILER=clang \ -DCMAKE_CXX_COMPILER=clang++ \ -DGALOIS_USE_SANITIZER="Address;Undefined" - <<: *cmake_build_test "Debian": docker: - image: debian:10 steps: - checkout - run: | apt-get -q update -y apt-get -q install -y git - run: git submodule sync - run: git submodule update --init - run: | apt-get -q install -y \ cmake \ g++ \ gcc \ libboost-iostreams-dev \ libboost-serialization-dev \ libeigen3-dev \ libmpich-dev \ llvm-7-dev \ mpich \ zlib1g-dev \ libfmt-dev chmod 755 /root useradd runner mkdir -p /tmp/build cmake -S . -B /tmp/build \ -DGALOIS_ENABLE_DIST=ON - <<: *cmake_build_test "Ubuntu-18_04": docker: - image: ubuntu:18.04 steps: - checkout - run: | apt-get -q update -y apt-get -q install -y apt-transport-https ca-certificates git gnupg software-properties-common wget wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | apt-key add - apt-add-repository 'deb https://apt.kitware.com/ubuntu/ bionic main' apt-get -q update -y - run: git submodule sync - run: git submodule update --init - run: | apt-get -q install -y \ cmake \ g++ \ gcc \ libboost-all-dev \ libeigen3-dev \ libopenmpi-dev \ llvm-7-dev \ openmpi-bin \ ssh \ libfmt-dev chmod 755 /root useradd runner mkdir -p /tmp/build cmake -S . -B /tmp/build \ -DGALOIS_ENABLE_DIST=ON - <<: *cmake_build_test "Ubuntu-18_04-cuda11_0_3-build-only": docker: - image: nvidia/cuda:11.0.3-devel-ubuntu18.04 steps: - checkout - run: | apt-get -q update -y apt-get -q install -y apt-transport-https ca-certificates git gnupg software-properties-common wget wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | apt-key add - apt-add-repository 'deb https://apt.kitware.com/ubuntu/ bionic main' apt-get -q update -y - run: git submodule sync - run: git submodule update --init - run: | apt-get -q install -y \ cmake \ g++ \ gcc \ libboost-all-dev \ libeigen3-dev \ libopenmpi-dev \ llvm-7-dev \ openmpi-bin \ ssh \ libfmt-dev cmake -S . -B /tmp/build \ -DGALOIS_ENABLE_DIST=ON \ -DGALOIS_ENABLE_GPU=ON cmake --build /tmp/build --target input cmake --build /tmp/build --parallel 2 "Ubuntu-18_04-cuda11_1_1-build-only": docker: - image: nvidia/cuda:11.1.1-devel-ubuntu18.04 steps: - checkout - run: | apt-get -q update -y apt-get -q install -y apt-transport-https ca-certificates git gnupg software-properties-common wget wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | apt-key add - apt-add-repository 'deb https://apt.kitware.com/ubuntu/ bionic main' apt-get -q update -y - run: git submodule sync - run: git submodule update --init - run: | apt-get -q install -y \ cmake \ g++ \ gcc \ libboost-all-dev \ libeigen3-dev \ libopenmpi-dev \ llvm-7-dev \ openmpi-bin \ ssh \ libfmt-dev cmake -S . -B /tmp/build \ -DGALOIS_ENABLE_DIST=ON \ -DGALOIS_ENABLE_GPU=ON cmake --build /tmp/build --target input cmake --build /tmp/build --parallel 2 "CentOS-8-gcc": docker: - image: centos:8 steps: - checkout - run: | # CentOS Linux 8 has reached End Of Life (EOL) on December 31st, 2021 ls /etc/yum.repos.d/ > /dev/null 2>&1 sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-* sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-* - run: | # fmt-devel is in EPEL yum -y -q install https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm # eigen3-devel needs PowerTools packages yum -y -q install dnf-plugins-core yum -y -q config-manager --set-enabled powertools yum -y -q install git - run: git submodule sync - run: git submodule update --init - run: | yum -y -q install \ boost-devel \ cmake \ eigen3-devel \ gcc \ gcc-c++ \ llvm-devel \ llvm-static \ make \ mpich-devel \ ncurses-devel \ wget \ zlib-devel \ fmt-devel wget -O - https://github.com/Kitware/CMake/releases/download/v3.17.0/cmake-3.17.0-Linux-x86_64.tar.gz | tar -xz -f - -C /usr/local ln -s /usr/local/cmake-3.17.0-Linux-x86_64/bin/cmake /usr/local/bin/cmake ln -s /usr/local/cmake-3.17.0-Linux-x86_64/bin/ctest /usr/local/bin/ctest # Make the "module" command work in the subsequent shell sessions. cat /etc/profile.d/modules.sh >> $BASH_ENV echo "module load mpi" >> $BASH_ENV - run: | chmod 755 /root useradd runner mkdir -p /tmp/build cmake -S . -B /tmp/build \ -DGALOIS_ENABLE_DIST=ON - <<: *cmake_build_test "CentOS-8-clang": docker: - image: centos:8 steps: - checkout - run: | # CentOS Linux 8 has reached End Of Life (EOL) on December 31st, 2021 ls /etc/yum.repos.d/ > /dev/null 2>&1 sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-* sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-* - run: | # fmt-devel is in EPEL yum -y -q install https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm # eigen3-devel needs PowerTools packages yum -y -q install dnf-plugins-core yum -y -q config-manager --set-enabled powertools yum -y -q install git - run: git submodule sync - run: git submodule update --init - run: | yum -y -q install \ boost-devel \ eigen3-devel \ llvm-devel \ llvm-static \ llvm-toolset \ make \ openmpi-devel \ ncurses-devel \ wget \ zlib-devel \ fmt-devel wget -O - https://github.com/Kitware/CMake/releases/download/v3.17.0/cmake-3.17.0-Linux-x86_64.tar.gz | tar -xz -f - -C /usr/local ln -s /usr/local/cmake-3.17.0-Linux-x86_64/bin/cmake /usr/local/bin/cmake ln -s /usr/local/cmake-3.17.0-Linux-x86_64/bin/ctest /usr/local/bin/ctest # Make the "module" command work in the subsequent shell sessions. cat /etc/profile.d/modules.sh >> $BASH_ENV echo "module load mpi" >> $BASH_ENV - run: | chmod 755 /root useradd runner mkdir -p /tmp/build cmake -S . -B /tmp/build \ -DCMAKE_C_COMPILER=clang \ -DCMAKE_CXX_COMPILER=clang++ \ # -DGALOIS_ENABLE_DIST=ON - <<: *cmake_build_test "Arch": docker: - image: archlinux:base steps: - checkout - run: | pacman -Syu --noconfirm pacman -q -S --noconfirm git - run: git submodule sync - run: git submodule update --init - run: | # NB(ddn): make requires libffi but its package doesn't depend on it. pacman -q -S --noconfirm \ boost \ cmake \ eigen \ gcc \ libffi \ llvm \ make \ openmpi \ fmt chmod 755 /root useradd runner mkdir -p /tmp/build cmake -S . -B /tmp/build \ -DGALOIS_ENABLE_DIST=ON - <<: *cmake_build_test "Alpine": docker: - image: alpine:latest steps: - checkout - run: | apk add --no-cache --no-progress git bash - run: git submodule sync - run: git submodule update --init - run: | apk add --no-cache --no-progress \ boost-dev \ cmake \ eigen \ g++ \ gcc \ llvm14-dev \ llvm14-static \ make \ musl-dev \ openssh-client \ zlib-dev \ fmt-dev chmod 755 /root adduser -D runner mkdir -p /tmp/build cmake -S . -B /tmp/build - <<: *cmake_build_test "Fedora-gcc": docker: - image: fedora:latest steps: - checkout - run: | yum -y -q install git - run: git submodule sync - run: git submodule update --init - run: | yum -y -q install \ boost-devel \ cmake \ eigen3-devel \ gcc-c++ \ llvm-devel \ llvm-static \ make \ mpich-devel \ wget \ zlib-devel \ fmt-devel chmod 755 /root useradd runner mkdir -p /tmp/build # Get the "module" function set up before loading MPI. cat /etc/profile.d/modules.sh >> $BASH_ENV echo "module load mpi" >> $BASH_ENV - run: | cmake -S . -B /tmp/build \ -DGALOIS_ENABLE_DIST=ON - <<: *cmake_build_test "Fedora-clang": docker: - image: fedora:latest steps: - checkout - run: | yum -y -q install git - run: git submodule sync - run: git submodule update --init - run: | yum -y -q install \ boost-devel \ clang \ cmake \ eigen3-devel \ llvm-devel \ llvm-static \ make \ openmpi-devel \ wget \ zlib-devel \ fmt-devel chmod 755 /root useradd runner mkdir -p /tmp/build # Get the "module" function set up before loading MPI. cat /etc/profile.d/modules.sh >> $BASH_ENV echo "module load mpi" >> $BASH_ENV - run: | cmake -S . -B /tmp/build \ -DCMAKE_C_COMPILER=clang \ -DCMAKE_CXX_COMPILER=clang++ \ # -DGALOIS_ENABLE_DIST=ON - <<: *cmake_build_test workflows: build: jobs: - "CheckFormat" - "Sanitize" - "Alpine": requires: - "CheckFormat" # - "Sanitize" - "Arch": requires: - "CheckFormat" # - "Sanitize" - "CentOS-8-clang": requires: - "CheckFormat" # - "Sanitize" - "CentOS-8-gcc": requires: - "CheckFormat" # - "Sanitize" - "Debian": requires: - "CheckFormat" # - "Sanitize" - "Fedora-clang": requires: - "CheckFormat" # - "Sanitize" - "Fedora-gcc": requires: - "CheckFormat" # - "Sanitize" - "Ubuntu-18_04": requires: - "CheckFormat" # - "Sanitize" - "Ubuntu-18_04-cuda11_1_1-build-only": requires: - "CheckFormat" # - "Sanitize" - "Ubuntu-18_04-cuda11_0_3-build-only": requires: - "CheckFormat" # - "Sanitize" ================================================ FILE: .circleci/longest_common_path.sh ================================================ #!/bin/bash # For PR build only; find the longest common path prefix as the build and test subset longest_common_prefix() { declare -a possible_prefix declare i=0 path="${1%/}" while [ "$path" != "." ]; do if [[ -d $path && -f "$path/CMakeLists.txt" ]]; then possible_prefix[$i]="$path" fi i=$(($i + 1)) path=$(dirname "$path"); done lcp="." for prefix in "${possible_prefix[@]}"; do for path in $@; do if [ "${path#$prefix}" = "${path}" ]; then continue 2 fi done lcp="$prefix" break done echo $lcp } base=$( \ wget -q -O - "https://api.github.com/repos/$(echo ${CIRCLE_PULL_REQUEST:19} | sed "s/\/pull\//\/pulls\//")" \ | sed -n -e "s/^.*IntelligentSoftwareSystems://p" \ | sed -n -e "s/\".*$//p" \ ) longest_common_prefix $(git -c core.quotepath=false diff --name-only $base $CIRCLE_SHA1) ================================================ FILE: .clang-format ================================================ --- Language: Cpp BasedOnStyle: LLVM AccessModifierOffset: -2 AlignAfterOpenBracket: true AlignConsecutiveAssignments: true AlignEscapedNewlinesLeft: false AlignOperands: true AlignTrailingComments: true AllowAllParametersOfDeclarationOnNextLine: true AllowShortBlocksOnASingleLine: false AllowShortCaseLabelsOnASingleLine: false AllowShortFunctionsOnASingleLine: All AllowShortIfStatementsOnASingleLine: false AllowShortLoopsOnASingleLine: false AlwaysBreakAfterDefinitionReturnType: None AlwaysBreakBeforeMultilineStrings: false AlwaysBreakTemplateDeclarations: true BinPackArguments: true BinPackParameters: true BreakBeforeBinaryOperators: None BreakBeforeBraces: Attach BreakBeforeTernaryOperators: true BreakConstructorInitializersBeforeComma: false ColumnLimit: 80 CommentPragmas: '^ IWYU pragma:' ConstructorInitializerAllOnOneLineOrOnePerLine: false ConstructorInitializerIndentWidth: 4 ContinuationIndentWidth: 4 Cpp11BracedListStyle: true DeriveLineEnding: false DerivePointerAlignment: false DisableFormat: false ExperimentalAutoDetectBinPacking: false ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] IndentCaseLabels: false IndentWidth: 2 IndentWrappedFunctionNames: false KeepEmptyLinesAtTheStartOfBlocks: true MacroBlockBegin: '' MacroBlockEnd: '' MaxEmptyLinesToKeep: 1 NamespaceIndentation: None ObjCBlockIndentWidth: 2 ObjCSpaceAfterProperty: false ObjCSpaceBeforeProtocolList: true PenaltyBreakBeforeFirstCallParameter: 19 PenaltyBreakComment: 300 PenaltyBreakFirstLessLess: 120 PenaltyBreakString: 1000 PenaltyExcessCharacter: 1000000 PenaltyReturnTypeOnItsOwnLine: 60 PointerAlignment: Left SortIncludes: false SpaceAfterCStyleCast: false SpaceBeforeAssignmentOperators: true SpaceBeforeParens: ControlStatements SpaceInEmptyParentheses: false SpacesBeforeTrailingComments: 1 SpacesInAngles: false SpacesInContainerLiterals: true SpacesInCStyleCastParentheses: false SpacesInParentheses: false SpacesInSquareBrackets: false Standard: Cpp11 TabWidth: 4 UseTab: Never ... ================================================ FILE: .clang-tidy ================================================ --- # Enable most checks then disable (-) problematics ones: # # Some checks are good in principle but cannot be applied automatically either # because they require taste or the autofix can generate wrong code: # # - cppcoreguidelines-pro-type-member-init: wrong code sometimes # - google-explicit-constructor: libllvm has implicit conversions # - modernize-use-no-discard # - modernize-use-transparent-functors # - modernize-use-using: autofix doesn't handle dependent type templates # - readability-static-accessed-through-instance: wrong code sometimes # # No consensus: # # - modernize-use-trailing-return-type: also huge code churn # - readability-convert-member-functions-to-static # - readability-implicit-bool-conversion Checks: | abseil-*, boost-*, bugprone-*, clang-analyzer-*, clang-diagnostic-*, cppcoreguidelines-*, -cppcoreguidelines-pro-type-member-init, google-*, -google-explicit-constructor, modernize-*, -modernize-use-nodiscard, -modernize-use-trailing-return-type, -modernize-use-transparent-functors, -modernize-use-using, mpi-*, openmp-*, performance-*, readability-*, -readability-convert-member-functions-to-static, -readability-static-accessed-through-instance, -readability-implicit-bool-conversion, WarningsAsErrors: '' HeaderFilterRegex: '' AnalyzeTemporaryDtors: false FormatStyle: file CheckOptions: - key: cert-dcl16-c.NewSuffixes value: 'L;LL;LU;LLU' - key: cert-oop54-cpp.WarnOnlyIfThisHasSuspiciousField value: '0' - key: cppcoreguidelines-explicit-virtual-functions.IgnoreDestructors value: '1' - key: cppcoreguidelines-non-private-member-variables-in-classes.IgnoreClassesWithAllMemberVariablesBeingPublic value: '1' - key: google-readability-braces-around-statements.ShortStatementLines value: '1' - key: google-readability-function-size.StatementThreshold value: '800' - key: google-readability-namespace-comments.ShortNamespaceLines value: '10' - key: google-readability-namespace-comments.SpacesBeforeComments value: '2' - key: modernize-loop-convert.MaxCopySize value: '16' - key: modernize-loop-convert.MinConfidence value: reasonable - key: modernize-loop-convert.NamingStyle value: CamelCase - key: modernize-pass-by-value.IncludeStyle value: llvm - key: modernize-replace-auto-ptr.IncludeStyle value: llvm - key: modernize-use-nullptr.NullMacros value: 'NULL' ... ================================================ FILE: .git-blame-ignore-revs ================================================ # Bulk-change revisions to ignore in git blame # # Requires git v2.23 # # To use: # # git blame --ignore-revs-file .git-blame-ignore-revs # # or more permanently: # # git config blame.ignoreRevsFile .git-blame-ignore-revs # Run clang-format. 02ecf4f4ea6ed8618a3826f98c3ea192ee38ca2d # Re-run clang-format. 47ddbe14de2e61b87749cd20bd368f07ef3c322f # Reorganize the lonestar directories. 6ade1c5ac3cf0c261aff7bee863e46b2c124d174 # Run clang-format. 517fca343c75f842096b661e3ff883bb93f5c09e # Another round of clang-format 2264b05ece3f9ec2b9bf397594cc14ef99f498de # Fix endlines for barneshut app 558ccb83ab2e388c1202396f42d0881912e6393d ================================================ FILE: .gitignore ================================================ # no editor files *~ *.backup /.dir-locals.el *.orig *.patch /.project .settings .*.swo *.swp .vscode # no tool generated files .clang-complete .clangd compile_commands.json cscope.out /GPATH /GRTAGS /GTAGS .tags* tags .ycm_extra_conf.py # no build files /build* # no python build artifacts *.pyc /python/galois.egg-info /python/galois/*.so /_skbuild ================================================ FILE: .gitmodules ================================================ [submodule "moderngpu"] path = external/moderngpu url = https://github.com/moderngpu/moderngpu.git [submodule "cub"] path = external/cub url = https://github.com/NVlabs/cub.git [submodule "docs"] path = docs url = https://github.com/IntelligentSoftwareSystems/Galois-docs.git ================================================ FILE: .travis.yml ================================================ dist: bionic language: c++ git: submodules: true matrix: include: - os: osx osx_image: xcode11.3 before_script: - export CC=clang - export CXX=clang++ - brew install openmpi llvm fmt - mkdir build - export PATH=$PATH:/usr/local/opt/llvm/bin - cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGALOIS_ENABLE_DIST=ON || exit 1 - env: - GCC_VER=7 addons: apt: sources: - sourceline: 'ppa:ubuntu-toolchain-r/test' - sourceline: 'deb https://apt.kitware.com/ubuntu/ bionic main' key_url: 'https://apt.kitware.com/keys/kitware-archive-latest.asc' - sourceline: 'ppa:mhier/libboost-latest' packages: - gcc-7 - g++-7 - cmake - libboost1.70-dev - libeigen3-dev - openmpi-bin - libopenmpi-dev - llvm-7-dev - libz-dev - libfmt-dev - env: - GCC_VER=8 addons: apt: sources: - sourceline: 'ppa:ubuntu-toolchain-r/test' - sourceline: 'deb https://apt.kitware.com/ubuntu/ bionic main' key_url: 'https://apt.kitware.com/keys/kitware-archive-latest.asc' - sourceline: 'ppa:mhier/libboost-latest' packages: - gcc-8 - g++-8 - cmake - libboost1.70-dev - libeigen3-dev - openmpi-bin - libopenmpi-dev - llvm-7-dev - libz-dev - libfmt-dev - env: - GCC_VER=9 addons: apt: sources: - sourceline: 'ppa:ubuntu-toolchain-r/test' - sourceline: 'deb https://apt.kitware.com/ubuntu/ bionic main' key_url: 'https://apt.kitware.com/keys/kitware-archive-latest.asc' - sourceline: 'ppa:mhier/libboost-latest' packages: - gcc-9 - g++-9 - cmake - libboost1.70-dev - libeigen3-dev - openmpi-bin - libopenmpi-dev - llvm-7-dev - libz-dev - libfmt-dev - env: - GCC_VER=10 addons: apt: sources: - sourceline: 'ppa:ubuntu-toolchain-r/test' - sourceline: 'deb https://apt.kitware.com/ubuntu/ bionic main' key_url: 'https://apt.kitware.com/keys/kitware-archive-latest.asc' - sourceline: 'ppa:mhier/libboost-latest' packages: - gcc-10 - g++-10 - cmake - libboost1.70-dev - libeigen3-dev - openmpi-bin - libopenmpi-dev - llvm-7-dev - libz-dev - libfmt-dev - env: - GCC_VER=10 - BUILD_TYPE=Debug addons: apt: sources: - sourceline: 'ppa:ubuntu-toolchain-r/test' - sourceline: 'deb https://apt.kitware.com/ubuntu/ bionic main' key_url: 'https://apt.kitware.com/keys/kitware-archive-latest.asc' - sourceline: 'ppa:mhier/libboost-latest' packages: - gcc-10 - g++-10 - cmake - libboost1.70-dev - libeigen3-dev - openmpi-bin - libopenmpi-dev - llvm-7-dev - libz-dev - libfmt-dev - env: - CLANG_VER=7 addons: apt: sources: - sourceline: 'ppa:ubuntu-toolchain-r/test' - sourceline: 'deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-7 main' key_url: 'https://apt.llvm.org/llvm-snapshot.gpg.key' - sourceline: 'deb https://apt.kitware.com/ubuntu/ bionic main' key_url: 'https://apt.kitware.com/keys/kitware-archive-latest.asc' - sourceline: 'ppa:mhier/libboost-latest' packages: - clang-7 - cmake - libboost1.70-dev - libeigen3-dev - openmpi-bin - libopenmpi-dev - llvm-7-dev - libz-dev - libfmt-dev - env: - CLANG_VER=8 addons: apt: sources: - sourceline: 'ppa:ubuntu-toolchain-r/test' - sourceline: 'deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-8 main' key_url: 'https://apt.llvm.org/llvm-snapshot.gpg.key' - sourceline: 'deb https://apt.kitware.com/ubuntu/ bionic main' key_url: 'https://apt.kitware.com/keys/kitware-archive-latest.asc' - sourceline: 'ppa:mhier/libboost-latest' packages: - clang-8 - cmake - libboost1.70-dev - libeigen3-dev - openmpi-bin - libopenmpi-dev - llvm-7-dev - libz-dev - libfmt-dev - env: - CLANG_VER=9 addons: apt: sources: - sourceline: 'ppa:ubuntu-toolchain-r/test' - sourceline: 'deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-9 main' key_url: 'https://apt.llvm.org/llvm-snapshot.gpg.key' - sourceline: 'deb https://apt.kitware.com/ubuntu/ bionic main' key_url: 'https://apt.kitware.com/keys/kitware-archive-latest.asc' - sourceline: 'ppa:mhier/libboost-latest' packages: - clang-9 - cmake - libboost1.70-dev - libeigen3-dev - openmpi-bin - libopenmpi-dev - llvm-7-dev - libz-dev - libfmt-dev - env: - CLANG_VER=10 addons: apt: sources: - sourceline: 'ppa:ubuntu-toolchain-r/test' - sourceline: 'deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-10 main' key_url: 'https://apt.llvm.org/llvm-snapshot.gpg.key' - sourceline: 'deb https://apt.kitware.com/ubuntu/ bionic main' key_url: 'https://apt.kitware.com/keys/kitware-archive-latest.asc' - sourceline: 'ppa:mhier/libboost-latest' packages: - clang-10 - cmake - libboost1.70-dev - libeigen3-dev - openmpi-bin - libopenmpi-dev - llvm-7-dev - libz-dev - libfmt-dev - env: - CLANG_VER=10 - BUILD_TYPE=Debug addons: apt: sources: - sourceline: 'ppa:ubuntu-toolchain-r/test' - sourceline: 'deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-10 main' key_url: 'https://apt.llvm.org/llvm-snapshot.gpg.key' - sourceline: 'deb https://apt.kitware.com/ubuntu/ bionic main' key_url: 'https://apt.kitware.com/keys/kitware-archive-latest.asc' - sourceline: 'ppa:mhier/libboost-latest' packages: - clang-10 - cmake - libboost1.70-dev - libeigen3-dev - openmpi-bin - libopenmpi-dev - llvm-7-dev - libz-dev - libfmt-dev before_script: # Depending on whether GCC_VER or CLANG_VER is set and nonempty, # set CC and CXX accordingly. - | if [ -n "$GCC_VER" ]; then export CC="gcc-$GCC_VER" export CXX="g++-$GCC_VER" fi - | if [ -n "$CLANG_VER" ]; then export CC="clang-$CLANG_VER" export CXX="clang++-$CLANG_VER" fi - | # Check if BUILD_TYPE is set at all, not just whether it is empty or unset. # See https://stackoverflow.com/a/13864829/1935144. if [ -z ${BUILD_TYPE+x} ]; then export BUILD_TYPE=Release fi - mkdir build # Use apt-installed llvm-7-dev rather than travis-provided one which is # picked up through the local clang-7 install in /usr/local/clang-7. - export CMAKE_PREFIX_PATH=/usr/lib/llvm-7 # Use apt-installed cmake rather than travis-provided one # (/usr/local/cmake-3.12.4/bin/cmake). - /usr/bin/cmake -S . -B build -DCMAKE_PREFIX_PATH=$CMAKE_PREFIX_PATH -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DGALOIS_ENABLE_DIST=ON || exit 1 script: - make -C build input - cmake --build build --parallel 2 || exit 1 - (cd build && ctest --output-on-failure --parallel 2 --label-regex quick) || exit 1 notifications: email: false ================================================ FILE: CMakeLists.txt ================================================ cmake_minimum_required(VERSION 3.13) project(Galois) list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules") include(GNUInstallDirs) file(STRINGS config/version.txt GALOIS_VERSION) string(REGEX REPLACE "[ \t\n]" "" GALOIS_VERSION ${GALOIS_VERSION}) string(REGEX REPLACE "([0-9]+)\\.([0-9]+)\\.([0-9]+)" "\\1" GALOIS_VERSION_MAJOR ${GALOIS_VERSION}) string(REGEX REPLACE "([0-9]+)\\.([0-9]+)\\.([0-9]+)" "\\2" GALOIS_VERSION_MINOR ${GALOIS_VERSION}) string(REGEX REPLACE "([0-9]+)\\.([0-9]+)\\.([0-9]+)" "\\3" GALOIS_VERSION_PATCH ${GALOIS_VERSION}) set(GALOIS_COPYRIGHT_YEAR "2018") # Also in COPYRIGHT if(NOT CMAKE_BUILD_TYPE) message(STATUS "No build type selected, default to Release") # cmake default flags with relwithdebinfo is -O2 -g # cmake default flags with release is -O3 -DNDEBUG set(CMAKE_BUILD_TYPE "Release") endif() ###### Options (alternatively pass as options to cmake -DName=Value) ###### ###### Distributed-heterogeneous features ###### set(GALOIS_ENABLE_DIST OFF CACHE BOOL "Enable distributed features") set(GALOIS_CUDA_CAPABILITY "" CACHE STRING "Semi-colon list of CUDA compute capability version numbers to enable GPU features") # e.g., "3.7;6.1" set(GALOIS_COMM_STATS OFF CACHE BOOL "Report more detailed statistics of communication") ###### General features ###### set(GALOIS_ENABLE_PAPI OFF CACHE BOOL "Use PAPI counters for profiling") set(GALOIS_ENABLE_VTUNE OFF CACHE BOOL "Use VTune for profiling") set(GALOIS_STRICT_CONFIG OFF CACHE BOOL "Instead of falling back gracefully, fail") set(GALOIS_GRAPH_LOCATION "" CACHE PATH "Location of inputs for tests if downloaded/stored separately.") set(CXX_CLANG_TIDY "" CACHE STRING "Semi-colon list specifying clang-tidy command and arguments") set(CMAKE_CXX_COMPILER_LAUNCHER "" CACHE STRING "Semi-colon list specifying command to wrap compiler invocations (e.g., ccache)") set(USE_ARCH native CACHE STRING "Optimize for a specific processor architecture ('none' to disable)") set(GALOIS_USE_SANITIZER "" CACHE STRING "Semi-colon list of sanitizers to use (Memory, MemoryWithOrigins, Address, Undefined, Thread)") # This option is automatically handled by CMake. # It makes add_library build a shared lib unless STATIC is explicitly specified. # Putting this here is mostly just a placeholder so people know it's an option. # Currently this is really only intended to change anything for the libgalois_shmem target. set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libraries") set(BUILD_DOCS "" CACHE STRING "Build documentation with make doc. Supported values: , external, internal. external docs hide '*-draft*' and '*-internal* documentation pages and directories when building documentation") ###### Developer features ###### set(GALOIS_PER_ROUND_STATS OFF CACHE BOOL "Report statistics of each round of execution") set(GALOIS_NUM_TEST_GPUS "0" CACHE STRING "Number of test GPUs to use (on a single machine) for running the tests.") set(GALOIS_USE_LCI OFF CACHE BOOL "Use LCI network runtime instead of MPI") set(GALOIS_USE_BARE_MPI OFF CACHE BOOL "Use MPI directly (no dedicated network-runtime thread)") set(GALOIS_NUM_TEST_THREADS "" CACHE STRING "Maximum number of threads to use when running tests (default: number of physical cores)") if(NOT GALOIS_NUM_TEST_THREADS) cmake_host_system_information(RESULT GALOIS_NUM_TEST_THREADS QUERY NUMBER_OF_PHYSICAL_CORES) endif() if(GALOIS_NUM_TEST_THREADS LESS_EQUAL 0) set(GALOIS_NUM_TEST_THREADS 1) endif() ###### Configure (users don't need to go beyond here) ###### include(CTest) ###### Configure compiler ###### # generate compile_commands.json set(CMAKE_EXPORT_COMPILE_COMMANDS ON) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) #...without compiler extensions like gnu++11 set(CMAKE_POSITION_INDEPENDENT_CODE ON) # Always include debug info add_compile_options("$<$:-g>") # GCC if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7) message(FATAL_ERROR "gcc must be version 7 or higher. Found ${CMAKE_CXX_COMPILER_VERSION}.") endif() add_compile_options("$<$:-Wall;-Wextra>") if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 11) add_compile_options("$<$:-Werror>") endif() endif() if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7) message(FATAL_ERROR "clang must be version 7 or higher. Found ${CMAKE_CXX_COMPILER_VERSION}.") endif() add_compile_options("$<$:-Wall;-Wextra>") if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 11) add_compile_options("$<$:-Werror>") endif() endif() if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") add_compile_options("$<$:-Wall;-Wextra>") if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 12) add_compile_options("$<$:-Werror>") endif() endif() if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel") if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 19.0.1) message(FATAL_ERROR "icpc must be 19.0.1 or higher. Found ${CMAKE_CXX_COMPILER_VERSION}.") endif() # Avoid warnings when using noinline for methods defined inside class defintion. add_compile_options("$<$:-wd2196>") endif() # Enable architecture-specific optimizations include(CheckArchFlags) if(ARCH_FLAGS_FOUND) add_compile_options("$<$:${ARCH_CXX_FLAGS}>") add_compile_options("$<$:${ARCH_C_FLAGS}>") add_link_options(${ARCH_LINK_FLAGS}) endif() if(CXX_CLANG_TIDY) set(CMAKE_CXX_CLANG_TIDY ${CXX_CLANG_TIDY} "-header-filter=.*${PROJECT_SOURCE_DIR}.*") # Ignore warning flags intended for the CXX program. This only works because # the two compilers we care about, clang and gcc, both understand # -Wno-unknown-warning-option. add_compile_options("$<$:-Wno-unknown-warning-option>") endif() ###### Configure features ###### if(GALOIS_ENABLE_VTUNE) set(VTune_ROOT /opt/intel/vtune_amplifier) find_package(VTune REQUIRED) include_directories(${VTune_INCLUDE_DIRS}) add_definitions(-DGALOIS_ENABLE_VTUNE) endif() if(GALOIS_ENABLE_PAPI) find_package(PAPI REQUIRED) include_directories(${PAPI_INCLUDE_DIRS}) add_definitions(-DGALOIS_ENABLE_PAPI) endif() find_package(Threads REQUIRED) include(CheckMmap) include(CheckHugePages) if(NOT HAVE_HUGEPAGES AND GALOIS_STRICT_CONFIG) message(FATAL_ERROR "Need huge pages") endif() find_package(Boost 1.58.0 REQUIRED COMPONENTS serialization iostreams) find_package(LLVM REQUIRED CONFIG) if("${LLVM_PACKAGE_VERSION}" VERSION_LESS "7") message(FATAL_ERROR "LLVM 7 or greater is required.") endif() if(NOT DEFINED LLVM_ENABLE_RTTI) message(FATAL_ERROR "Could not determine if LLVM has RTTI enabled.") endif() if(NOT ${LLVM_ENABLE_RTTI}) message(FATAL_ERROR "Galois requires a build of LLVM that includes RTTI. Most package managers do this already, but if you built LLVM from source you need to configure it with `-DLLVM_ENABLE_RTTI=ON`") endif() target_include_directories(LLVMSupport INTERFACE ${LLVM_INCLUDE_DIRS}) include(HandleSanitizer) include(CheckEndian) ###### Test Inputs ###### if(GALOIS_GRAPH_LOCATION) set(BASEINPUT "${GALOIS_GRAPH_LOCATION}") set(BASEOUTPUT "${GALOIS_GRAPH_LOCATION}") message(STATUS "Using graph input and output location ${GALOIS_GRAPH_LOCATION}") elseif(EXISTS /net/ohm/export/iss) set(BASEINPUT /net/ohm/export/iss/inputs) MESSAGE(STATUS "Using graph input location /net/ohm/export/iss/inputs") set(BASEOUTPUT /net/ohm/export/iss/dist-outputs) MESSAGE(STATUS "Using graph output location /net/ohm/export/iss/dist-outputs") else() set(BASEINPUT "${PROJECT_BINARY_DIR}/inputs") set(BASEOUTPUT "${PROJECT_BINARY_DIR}/inputs") message(STATUS "Use 'make input' to download inputs and outputs in the build directory") endif() ###### Source finding ###### add_custom_target(lib) add_custom_target(apps) # Core libraries (lib) add_subdirectory(libsupport) add_subdirectory(libgalois) add_subdirectory(libpygalois) if (GALOIS_ENABLE_DIST) find_package(MPI REQUIRED) add_subdirectory(libdist) add_subdirectory(libcusp) add_subdirectory(libgluon) endif() string(COMPARE NOTEQUAL "${GALOIS_CUDA_CAPABILITY}" "" GALOIS_ENABLE_GPU) if (GALOIS_ENABLE_GPU) enable_language(CUDA) foreach(GENCODE ${GALOIS_CUDA_CAPABILITY}) string(REPLACE "." "" GENCODE ${GENCODE}) add_compile_options("$<$:-gencode=arch=compute_${GENCODE},code=sm_${GENCODE}>") endforeach() # This is necessary to allow building for CUDA 11.x (where CUB is bundled) and earlier versions (where CUB is not included) add_definitions(-DTHRUST_IGNORE_CUB_VERSION_CHECK) add_subdirectory(libgpu) endif() add_subdirectory(libpangolin) # Applications (apps) add_subdirectory(lonestar) add_subdirectory(scripts) add_subdirectory(inputs) add_subdirectory(tools) if(USE_EXP) add_subdirectory(lonestar/experimental) endif(USE_EXP) ###### Documentation ###### if(BUILD_DOCS) set(GALOIS_ROOT ${CMAKE_CURRENT_SOURCE_DIR}) add_subdirectory(docs) endif() ###### Installation ###### include(CMakePackageConfigHelpers) write_basic_package_version_file( ${CMAKE_CURRENT_BINARY_DIR}/GaloisConfigVersion.cmake VERSION ${GALOIS_VERSION} COMPATIBILITY SameMajorVersion ) configure_package_config_file( cmake/GaloisConfig.cmake.in ${CMAKE_CURRENT_BINARY_DIR}/GaloisConfig.cmake INSTALL_DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/Galois" PATH_VARS CMAKE_INSTALL_INCLUDEDIR CMAKE_INSTALL_LIBDIR CMAKE_INSTALL_BINDIR ) install( FILES "${CMAKE_CURRENT_BINARY_DIR}/GaloisConfigVersion.cmake" "${CMAKE_CURRENT_BINARY_DIR}/GaloisConfig.cmake" DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/Galois" COMPONENT dev ) install( EXPORT GaloisTargets NAMESPACE Galois:: DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/Galois" COMPONENT dev ) ###### Distribution ###### set(CPACK_GENERATOR "TGZ") set(CPACK_ARCHIVE_COMPONENT_INSTALL ON) set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/COPYRIGHT") set(CPACK_RESOURCE_FILE_README "${CMAKE_CURRENT_SOURCE_DIR}/README.md") set(CPACK_PACKAGE_VERSION_MAJOR ${GALOIS_VERSION_MAJOR}) set(CPACK_PACKAGE_VERSION_MINOR ${GALOIS_VERSION_MINOR}) set(CPACK_PACKAGE_VERSION_PATCH ${GALOIS_VERSION_PATCH}) include(CPack) ================================================ FILE: COPYRIGHT ================================================ Galois, a framework to exploit amorphous data-parallelism in irregular programs. Copyright (C) 2018, The University of Texas at Austin. All rights reserved. UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances shall University be liable for incidental, special, indirect, direct or consequential damages or loss of profits, interruption of business, or related expenses which may arise from use of Software or Documentation, including but not limited to those resulting from defects in Software and/or Documentation, or loss or inaccuracy of data of any kind. This software is released under the terms of the 3-Clause BSD License (a copy is located in LICENSE.txt at the top-level directory). ================================================ FILE: LICENSE.txt ================================================ The 3-Clause BSD License Copyright 2018 The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: README.md ================================================ Overview ======== [![CircleCI](https://circleci.com/gh/IntelligentSoftwareSystems/Galois.svg?style=svg)](https://circleci.com/gh/IntelligentSoftwareSystems/Galois) [![Build Status](https://travis-ci.org/IntelligentSoftwareSystems/Galois.svg?branch=master)](https://travis-ci.org/IntelligentSoftwareSystems/Galois) Galois is a C++ library designed to ease parallel programming, especially for applications with irregular parallelism (e.g., irregular amount of work in parallel sections, irregular memory accesses and branching patterns). It implements an implicitly parallel programming model, where the programmer replaces serial loop constructs (e.g. for and while) and serial data structures in their algorithms with parallel loop constructs and concurrent data structures provided by Galois to express their algorithms. Galois is designed so that the programmer does not have to deal with low-level parallel programming constructs such as threads, locks, barriers, condition variables, etc. Highlights include: - Parallel *for_each* loop that handles dependencies between iterations, as well as dynamic work creation, and a *do_all* loop for simple parallelism. Both provide load balancing and excellent scalability on multi-socket systems - A concurrent graph library designed for graph analytics algorithms as well as other domains such as irregular meshes. - Scalable concurrent containers such as bag, vector, list, etc. Galois is released under the BSD-3-Clause license. Building Galois =============== You can checkout the latest release by typing (in a terminal): ```Shell git clone -b release-5.0 https://github.com/IntelligentSoftwareSystems/Galois ``` The master branch will be regularly updated, so you may try out the latest development code as well by checking out master branch: ```Shell git clone https://github.com/IntelligentSoftwareSystems/Galois ``` Dependencies ------------ Galois builds, runs, and has been tested on GNU/Linux. Even though Galois may build on systems similar to Linux, we have not tested correctness or performance, so please beware. At the minimum, Galois depends on the following software: - A modern C++ compiler compliant with the C++-17 standard (gcc >= 7, Intel >= 19.0.1, clang >= 7.0) - CMake (>= 3.13) - Boost library (>= 1.58.0, we recommend building/installing the full library) - libllvm (>= 7.0 with RTTI support) - libfmt (>= 4.0) Here are the dependencies for the optional features: - Linux HUGE_PAGES support (please see [www.kernel.org/doc/Documentation/vm/hugetlbpage.txt](https://www.kernel.org/doc/Documentation/vm/hugetlbpage.txt)). Performance will most likely degrade without HUGE_PAGES enabled. Galois uses 2MB huge page size and relies on the kernel configuration to set aside a large amount of 2MB pages. For example, our performance testing machine (4x14 cores, 192GB RAM) is configured to support up to 65536 2MB pages: ```Shell cat /proc/meminfo | fgrep Huge AnonHugePages: 104448 kB HugePages_Total: 65536 HugePages_Free: 65536 HugePages_Rsvd: 0 HugePages_Surp: 0 Hugepagesize: 2048 kB ``` - libnuma support. Performance may degrade without it. Please install libnuma-dev on Debian like systems, and numactl-dev on Red Hat like systems. - Doxygen (>= 1.8.5) for compiling documentation as webpages or latex files - PAPI (>= 5.2.0.0 ) for profiling sections of code - Vtune (>= 2017 ) for profiling sections of code - MPICH2 (>= 3.2) if you are interested in building and running distributed system applications in Galois - CUDA (>= 8.0 and < 11.0) if you want to build GPU or distributed heterogeneous applications. Note that versions >= 11.0 use an incompatible CUB module and will fail to execute. - Eigen (3.3.1 works for us) for some matrix-completion app variants Compiling and Testing Galois ---------------------------- We use CMake to streamline building, testing and installing Galois. In the following, we will highlight some common commands. Let's assume that `SRC_DIR` is the directory where the source code for Galois resides, and you wish to build Galois in some `BUILD_DIR`. Run the following commands to set up a build directory: ```Shell SRC_DIR=`pwd` # Or top-level Galois source dir BUILD_DIR= mkdir -p $BUILD_DIR cmake -S $SRC_DIR -B $BUILD_DIR -DCMAKE_BUILD_TYPE=Release ``` You can also set up a `Debug` build by running the following instead of the last command above: ```Shell cmake -S $SRC_DIR -B $BUILD_DIR -DCMAKE_BUILD_TYPE=Debug ``` Galois applications are in `lonestar` directory. In order to build a particular application: ```Shell make -C $BUILD_DIR/lonestar/ -j # or alternatively make -C $BUILD_DIR -j # or cmake --build $BUILD_DIR --parallel ``` You can also build everything by running `make -j` in the top-level of build directory, but that may take a lot of time. Setting the `BUILD_SHARED_LIBS` to `ON` when calling CMake will make the core runtime library be built as a shared object instead of a static library. The tests for the core runtime will be built by default when you run `make` with no target specified. They can be also built explicitly with: ```Shell make -C $BUILD_DIR/test ``` We provide a few sample inputs that can be downloaded by running: ```Shell make -C $BUILD_DIR input ``` `make input` will download a tarball of inputs and extract it to `$BUILD_DIR/inputs/small_inputs` directory. The tarball is downloaded to `$BUILD_DIR/inputs` Most of the Galois apps have corresponding tests. These tests depend on downloading the reference inputs and building the corresponding apps and test binaries. Once the reference inputs have been downloaded and everything has been built, the tests for the core library and all the apps can be run by running: ```Shell make test # or alternatively ctest ``` in the build directory. Running Galois Applications =========================== Graph Format ------------ Many Galois/Lonestar applications work with graphs. We store graphs in a binary format called *galois graph file* (`.gr` file extension). Other formats such as edge-list or Matrix-Market can be converted to `.gr` format with `graph-convert` tool provided in galois. You can build graph-convert as follows: ```Shell cd $BUILD_DIR make graph-convert ./tools/graph-convert/graph-convert --help ``` Other applications, such as Delaunay Mesh Refinement may read special file formats or some may even generate random inputs on the fly. Running ------- All Lonestar applications take a `-t` command-line option to specify the number of threads to use. All applications run a basic sanity check (often insufficient for correctness) on the program output, which can be turned off with the `-noverify` option. You can specify `-help` command-line option to print all available options. Upon successful completion, each application will produce some stats regarding running time of various sections, parallel loop iterations and memory usage, etc. These stats are in CSV format and can be redirected to a file using `-statFile` option. Please refer to the manual for details on stats. Running LonestarGPU applications -------------------------- Please refer to `lonestar/analytics/gpu/README.md` and `lonestar/scientific/gpu/README.md` for more details on compiling and running LonestarGPU applications. Running Distributed Galois -------------------------- Please refer to `lonestar/analytics/distributed/README.md` for more details on running distributed benchmarks. Documentation ============= Galois documentation is produced using doxygen, included in this repository, which includes a tutorial, a user's manual and API documentation for the Galois library. Users can build doxygen documentation in the build directory using: ```Shell cd $BUILD_DIR make doc your-fav-browser html/index.html & ``` See online documentation at: [http://iss.ices.utexas.edu/?p=projects/galois](http://iss.ices.utexas.edu/?p=projects/galois) Source-Tree Organization ======================== - `libgalois` contains the source code for the shared-memory Galois library, e.g., runtime, graphs, worklists, etc. - `lonestar` contains the Lonestar benchmark applications and tutorial examples for Galois - `libdist` contains the source code for the distributed-memory and heterogeneous Galois library - `lonestardist` contains the source code for the distributed-memory and heterogeneous benchmark applications. Please refer to `lonestardist/README.md` for instructions on building and running these apps. - `tools` contains various helper programs such as graph-converter to convert between graph file formats and graph-stats to print graph properties Using Galois as a library ========================= There are two common ways to use Galois as a library. One way is to copy this repository into your own CMake project, typically using a git submodule. Then you can put the following in your CMakeLists.txt: ```CMake add_subdirectory(galois EXCLUDE_FROM_ALL) add_executable(app ...) target_link_libraries(app Galois::shmem) ``` The other common method is to install Galois outside your project and import it as a package. If you want to install Galois, assuming that you wish to install it under `INSTALL_DIR`: ```Shell cmake -DCMAKE_INSTALL_PREFIX=$INSTALL_DIR $SRC_DIR make install ``` Then, you can put something like the following in CMakeLists.txt: ```CMake list(APPEND CMAKE_PREFIX_PATH ${INSTALL_DIR}) find_package(Galois REQUIRED) add_executable(app ...) target_link_libraries(app Galois::shmem) ``` If you are not using CMake, the corresponding basic commands (although the specific commands vary by system) are: ```Shell c++ -std=c++14 app.cpp -I$INSTALL_DIR/include -L$INSTALL_DIR/lib -lgalois_shmem ``` Third-Party Libraries and Licensing ==================== Galois includes some third party libraries that do not use the same license as Galois. This includes the bliss library (located in lonestar/include/Mining/bliss) and Modern GPU (located in libgpu/moderngpu). Please be aware of this when using Galois. Contact Us ========== For bugs, please raise an [issue](https://github.com/IntelligentSoftwareSystems/Galois/issues) on GiHub. Questions and comments are also welcome at the Galois users mailing list: [galois-users@utlists.utexas.edu](galois-users@utlists.utexas.edu). You may [subscribe here](https://utlists.utexas.edu/sympa/subscribe/galois-users). If you find a bug, it would help us if you sent (1) the command line and program inputs and outputs and (2) a core dump, preferably from an executable built with the debug build. You can enable core dumps by setting `ulimit -c unlimited` before running your program. The location where the core dumps will be stored can be determined with `cat /proc/sys/kernel/core_pattern`. To create a debug build, assuming you will build Galois in `BUILD_DIR` and the source is in `SRC_DIR`: ```Shell cmake -S $SRC_DIR -B $BUILD_DIR -DCMAKE_BUILD_TYPE=Debug make -C $BUILD_DIR ``` A simple way to capture relevant debugging details is to use the `script` command, which will record your terminal input and output. For example, ```Shell script debug-log.txt ulimit -c unlimited cat /proc/sys/kernel/core_pattern make -C $BUILD_DIR VERBOSE=1 my-app with-failing-input exit ``` This will generate a file `debug-log.txt`, which you can send to the mailing list:[galois-users@utlists.utexas.edu](galois-users@utlists.utexas.edu) for further debugging or supply when opening a GitHub issue. ================================================ FILE: cmake/GaloisConfig.cmake.in ================================================ # Config file for the Galois package # # It exports the following targets: # Galois::shmem # Galois::dist # ... # (see GaloisTargets.cmake for all of them) # # It defines the following variables for legacy importing: # Galois_INCLUDE_DIRS # Galois_LIBRARIES # Galois_LIBRARY_DIRS # Galois_BIN_DIRS include(CMakeFindDependencyMacro) @PACKAGE_INIT@ set_and_check(Galois_INCLUDE_DIRS "@PACKAGE_CMAKE_INSTALL_INCLUDEDIR@") set_and_check(Galois_LIBRARY_DIRS "@PACKAGE_CMAKE_INSTALL_LIBDIR@") set_and_check(Galois_BIN_DIRS "@PACKAGE_CMAKE_INSTALL_BINDIR@") set(Galois_LIBRARIES galois_shmem) find_dependency(Threads REQUIRED) find_dependency(Boost 1.58.0 REQUIRED COMPONENTS serialization iostreams) if (@GALOIS_ENABLE_DIST@) find_dependency(MPI REQUIRED) endif() get_filename_component(GALOIS_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) if(NOT Galois::shmem) include("${GALOIS_CMAKE_DIR}/GaloisTargets.cmake") endif() ================================================ FILE: cmake/Modules/CheckArchFlags.cmake ================================================ # Find architecture-specific flags # # Once done this will define # ARCH_FLAGS_FOUND # ARCH_CXX_FLAGS - Compiler flags to enable architecture-specific optimizations # ARCH_C_FLAGS - Compiler flags to enable architecture-specific optimizations # ARCH_LINK_FLAGS - Compiler flags to enable architecture-specific optimizations include(CheckCXXCompilerFlag) if(NOT USE_ARCH OR USE_ARCH STREQUAL "none" OR ARCH_FLAGS_FOUND) set(ARCH_CXX_FLAGS_CANDIDATES) else() set(ARCH_CXX_FLAGS_CANDIDATES "-march=${USE_ARCH}") endif() if(USE_ARCH STREQUAL "mic") if(CMAKE_CXX_COMPILER_ID MATCHES "Intel") list(APPEND ARCH_CXX_FLAGS_CANDIDATES -mmic) endif() if(CMAKE_COMPILER_IS_GNUCC) list(APPEND ARCH_CXX_FLAGS_CANDIDATES -march=knc) endif() endif() foreach(FLAG ${ARCH_CXX_FLAGS_CANDIDATES}) message(STATUS "Try architecture flag = [${FLAG}]") unset(ARCH_CXX_FLAGS_DETECTED) check_cxx_compiler_flag("${FLAG}" ARCH_CXX_FLAGS_DETECTED) if(ARCH_CXX_FLAGS_DETECTED) set(ARCH_FLAGS_FOUND "YES") set(ARCH_CXX_FLAGS "${FLAG}") set(ARCH_C_FLAGS "${FLAG}") set(ARCH_LINK_FLAGS "${FLAG}") endif() endforeach() ================================================ FILE: cmake/Modules/CheckCilk.cmake ================================================ include(CheckCXXSourceCompiles) set(Cilk_CXX_TEST_SOURCE " #include int main(){ cilk_for(int i=0;i<1; ++i); } ") CHECK_CXX_SOURCE_COMPILES("${Cilk_CXX_TEST_SOURCE}" HAVE_CILK) if(HAVE_CILK) message(STATUS "A compiler with CILK support found") endif() ================================================ FILE: cmake/Modules/CheckEndian.cmake ================================================ include(TestBigEndian) TEST_BIG_ENDIAN(HAVE_BIG_ENDIAN) include(CheckIncludeFiles) CHECK_INCLUDE_FILES(endian.h HAVE_ENDIAN_H) include(CheckSymbolExists) CHECK_SYMBOL_EXISTS(le64toh "endian.h" HAVE_LE64TOH) CHECK_SYMBOL_EXISTS(le32toh "endian.h" HAVE_LE32TOH) CHECK_SYMBOL_EXISTS(htobe64 "endian.h" HAVE_HTOBE64) CHECK_SYMBOL_EXISTS(htobe32 "endian.h" HAVE_HTOBE32) CHECK_SYMBOL_EXISTS(htole64 "endian.h" HAVE_HTOLE64) CHECK_SYMBOL_EXISTS(htole32 "endian.h" HAVE_HTOLE32) ================================================ FILE: cmake/Modules/CheckHugePages.cmake ================================================ include(CheckCSourceRuns) set(HugePages_C_TEST_SOURCE " #ifdef __linux__ #include #endif #include int main(int c, char** argv) { void *ptr = mmap(0, 2*1024*1024, PROT_READ|PROT_WRITE, MAP_HUGETLB, -1, 0); return ptr != MAP_FAILED; } ") if(HAVE_HUGEPAGES) else() CHECK_C_SOURCE_RUNS("${HugePages_C_TEST_SOURCE}" HAVE_HUGEPAGES_INTERNAL) if(HAVE_HUGEPAGES_INTERNAL) message(STATUS "Huge pages found") set(HAVE_HUGEPAGES "${HAVE_HUGEPAGES_INTERNAL}" CACHE BOOL "Have hugepages") endif() endif() ================================================ FILE: cmake/Modules/CheckMmap.cmake ================================================ include(CheckCSourceCompiles) set(Mmap64_C_TEST_SOURCE " #ifdef __linux__ #include #endif #include int main(int c, char** argv) { void *ptr = mmap64(0, 2*1024*1024, PROT_READ|PROT_WRITE, MAP_PRIVATE, -1, 0); return 0; } ") if(HAVE_MMAP64) else() CHECK_C_SOURCE_COMPILES("${Mmap64_C_TEST_SOURCE}" HAVE_MMAP64_INTERNAL) if(HAVE_MMAP64_INTERNAL) message(STATUS "mmap64 found") set(HAVE_MMAP64 "${HAVE_MMAP64_INTERNAL}" CACHE BOOL "Have mmap64") endif() endif() ================================================ FILE: cmake/Modules/CheckSchedSetAffinity.cmake ================================================ include(CheckSymbolExists) if(SCHED_SETAFFINITY_FOUND) else() set(CMAKE_REQUIRED_DEFINITIONS -D_GNU_SOURCE) CHECK_SYMBOL_EXISTS(sched_setaffinity sched.h HAVE_SCHED_SETAFFINITY_INTERNAL) if(HAVE_SCHED_SETAFFINITY_INTERNAL) message(STATUS "sched_setaffinity found") set(SCHED_SETAFFINITY_FOUND "${HAVE_SCHED_SETAFFINITY_INTERNAL}") set(SCHED_SETAFFINITY_LIBRARIES rt) endif() endif() ================================================ FILE: cmake/Modules/FindCBLAS.cmake ================================================ # Copyright 2009-2011 The VOTCA Development Team (http://www.votca.org) # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # #! \file #! \ingroup FindPackage #! \brief Find CBLAS #! #! Find the native CBLAS headers and libraries. #! #! - `CBLAS_LIBRARIES` - List of libraries when using cblas. #! - `CBLAS_INCLUDE_DIRS` - List of include directories #! - `CBLAS_FOUND` - True if cblas found. #! #! Cblas can be provided by libblas (Ubuntu), cblas or gslcblas, it will be searched for in #! this order. include(LibFindMacros) if (UNIX) find_package(PkgConfig QUIET) pkg_check_modules(CBLAS_PKGCONF QUIET cblas) endif() if (NOT CBLAS_FOUND) if(CBLAS_PKGCONF_FOUND) foreach(NEW_CBLAS_LIB ${CBLAS_PKGCONF_LIBRARIES}) find_library(LIB_${NEW_CBLAS_LIB} ${NEW_CBLAS_LIB} HINTS ${CBLAS_PKGCONF_LIBRARY_DIRS}) if(NOT LIB_${NEW_CBLAS_LIB}) message(FATAL_ERROR "Could not find ${NEW_CBLAS_LIB} where pkgconfig said it is: ${CBLAS_PKGCONF_LIBRARY_DIRS}") else(NOT LIB_${NEW_CBLAS_LIB}) message(STATUS "Found ${LIB_${NEW_CBLAS_LIB}}.") endif(NOT LIB_${NEW_CBLAS_LIB}) set(CBLAS_LIBRARY ${CBLAS_LIBRARY} ${LIB_${NEW_CBLAS_LIB}}) endforeach(NEW_CBLAS_LIB) else(CBLAS_PKGCONF_FOUND) set(CBLAS_HINT_PATH $ENV{CBLASDIR}/lib $ENV{CBLASDIR}/lib64 $ENV{UIBK_GSL_LIB}) # Check if libblas provides cblas (Ubuntu) find_library(BLAS_LIBRARY NAMES blas PATHS ${CBLAS_HINT_PATH}) if(BLAS_LIBRARY) include(CheckSymbolExists) set(CMAKE_REQUIRED_LIBRARIES ${BLAS_LIBRARY}) check_symbol_exists(cblas_scopy "cblas.h" BLAS_HAS_CBLAS) endif(BLAS_LIBRARY) set(CBLAS_CANDIDATES cblas gslcblas) if(BLAS_HAS_CBLAS) message(STATUS "libblas provides cblas.") set(CBLAS_CANDIDATES blas ${CBLAS_CANDIDATES}) endif(BLAS_HAS_CBLAS) find_library(CBLAS_LIBRARY NAMES ${CBLAS_CANDIDATES} PATHS ${CBLAS_HINT_PATH} ) endif(CBLAS_PKGCONF_FOUND) if("${CBLAS_LIBRARY}" MATCHES gslcblas) set(CBLAS_INCLUDE_CANDIDATE gsl/gsl_cblas.h) else("${CBLAS_LIBRARY}" MATCHES gslcblas) set(CBLAS_INCLUDE_CANDIDATE cblas.h) endif("${CBLAS_LIBRARY}" MATCHES gslcblas) find_path(CBLAS_INCLUDE_DIR ${CBLAS_INCLUDE_CANDIDATE} HINTS ${CBLAS_PKGCONF_INCLUDE_DIRS} $ENV{CBLASDIR}/include $ENV{UIBK_GSL_INC}) # Set the include dir variables and the libraries and let libfind_process do the rest. # NOTE: Singular variables for this library, plural for libraries this this lib depends on. set(CBLAS_PROCESS_INCLUDES CBLAS_INCLUDE_DIR) set(CBLAS_PROCESS_LIBS CBLAS_LIBRARY) libfind_process(CBLAS) message(STATUS "Using '${CBLAS_LIBRARIES}' for cblas.") endif(NOT CBLAS_FOUND) ================================================ FILE: cmake/Modules/FindFortran.cmake ================================================ # Check if Fortran is possibly around before using enable_lanauge because # enable_language(... OPTIONAL) does not fail gracefully if language is not # found: # http://public.kitware.com/Bug/view.php?id=9220 set(Fortran_EXECUTABLE) if(Fortran_EXECUTABLE) set(Fortran_FIND_QUIETLY TRUE) endif() find_program(Fortran_EXECUTABLE NAMES gfortran ifort g77 f77 g90 f90) include(FindPackageHandleStandardArgs) find_package_handle_standard_args(Fortran DEFAULT_MSG Fortran_EXECUTABLE) if(FORTRAN_FOUND) set(Fortran_FOUND TRUE) endif() ================================================ FILE: cmake/Modules/FindGASNET.cmake ================================================ # Find the GasNet librairy # GASNET_FOUND - system has GasNet lib # GASNET_INCLUDE_DIR - the GasNet include directory # GASNET_LIBRARIES - Libraries needed to use GasNet if(GASNET_INCLUDE_DIRS AND GASNET_LIBRARIES) set(GASNET_FIND_QUIETLY TRUE) endif() find_path(GASNET_INCLUDE_DIRS NAMES gasnet.h) find_library(GASNET_LIBRARY_1 NAMES gasnet amudp HINTS ${GASNET_INCLUDE_DIRS}/../lib ) find_library(GASNET_LIBRARY_2 NAMES gasnet gasnet-udp-par HINTS ${GASNET_INCLUDE_DIRS}/../lib ) set(GASNET_LIBRARIES ${GASNET_LIBRARY_2} ${GASNET_LIBRARY_1}) include(FindPackageHandleStandardArgs) find_package_handle_standard_args(GASNET DEFAULT_MSG GASNET_INCLUDE_DIRS GASNET_LIBRARIES) mark_as_advanced(GASNET_INCLUDE_DIRS GASNET_LIBRARIES) ================================================ FILE: cmake/Modules/FindGMP.cmake ================================================ # Find the GMP librairies # GMP_FOUND - system has GMP lib # GMP_INCLUDE_DIR - the GMP include directory # GMP_LIBRARIES - Libraries needed to use GMP # Copyright (c) 2006, Laurent Montel, # # Redistribution and use is allowed according to the terms of the BSD license. # For details see the accompanying COPYING-CMAKE-SCRIPTS file. if(GMP_INCLUDE_DIRS AND GMP_LIBRARIES AND GMPXX_LIBRARIES) set(GMP_FIND_QUIETLY TRUE) endif() find_path(GMP_INCLUDE_DIRS NAMES gmp.h) find_library(GMP_LIBRARIES NAMES gmp libgmp) find_library(GMPXX_LIBRARIES NAMES gmpxx libgmpxx) include(FindPackageHandleStandardArgs) find_package_handle_standard_args(GMP DEFAULT_MSG GMP_INCLUDE_DIRS GMP_LIBRARIES) mark_as_advanced(GMP_INCLUDE_DIRS GMP_LIBRARIES GMPXX_LIBRARIES) ================================================ FILE: cmake/Modules/FindGit.cmake ================================================ # The module defines the following variables: # GIT_EXECUTABLE - path to git command line client # GIT_FOUND - true if the command line client was found # Example usage: # find_package(Git) # if(GIT_FOUND) # message("git found: ${GIT_EXECUTABLE}") # endif() #============================================================================= # Copyright 2010 Kitware, Inc. # # Distributed under the OSI-approved BSD License (the "License"); # see accompanying file Copyright.txt for details. # # This software is distributed WITHOUT ANY WARRANTY; without even the # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # See the License for more information. #============================================================================= # (To distributed this file outside of CMake, substitute the full # License text for the above reference.) # Look for 'git' or 'eg' (easy git) # set(git_names git eg) # Prefer .cmd variants on Windows unless running in a Makefile # in the MSYS shell. # if(WIN32) if(NOT CMAKE_GENERATOR MATCHES "MSYS") set(git_names git.cmd git eg.cmd eg) endif() endif() find_program(GIT_EXECUTABLE NAMES ${git_names} DOC "git command line client" ) mark_as_advanced(GIT_EXECUTABLE) # Handle the QUIETLY and REQUIRED arguments and set GIT_FOUND to TRUE if # all listed variables are TRUE include(FindPackageHandleStandardArgs) find_package_handle_standard_args(Git DEFAULT_MSG GIT_EXECUTABLE) ================================================ FILE: cmake/Modules/FindNUMA.cmake ================================================ # Find numa library # Once done this will define # NUMA_FOUND - libnuma found # NUMA_OLD - old libnuma API if(NOT NUMA_FOUND) find_library(NUMA_LIBRARY NAMES numa PATH_SUFFIXES lib lib64) if(NUMA_LIBRARY) include(CheckLibraryExists) check_library_exists(${NUMA_LIBRARY} numa_available "" NUMA_FOUND_INTERNAL) if(NUMA_FOUND_INTERNAL) check_library_exists(${NUMA_LIBRARY} numa_allocate_nodemask "" NUMA_NEW_INTERNAL) if(NOT NUMA_NEW_INTERNAL) set(NUMA_OLD "yes" CACHE) endif() endif() include(FindPackageHandleStandardArgs) find_package_handle_standard_args(NUMA DEFAULT_MSG NUMA_LIBRARY) mark_as_advanced(NUMA_FOUND) endif() endif() ================================================ FILE: cmake/Modules/FindOpenCL.cmake ================================================ # # This file taken from FindOpenCL project @ http://gitorious.com/findopencl # # - Try to find OpenCL # This module tries to find an OpenCL implementation on your system. It supports # AMD / ATI, Apple and NVIDIA implementations, but should work, too. # # Once done this will define # OPENCL_FOUND - system has OpenCL # OPENCL_INCLUDE_DIRS - the OpenCL include directory # OPENCL_LIBRARIES - link these to use OpenCL # # WIN32 should work, but is untested FIND_PACKAGE( PackageHandleStandardArgs ) SET (OPENCL_VERSION_STRING "0.1.0") SET (OPENCL_VERSION_MAJOR 0) SET (OPENCL_VERSION_MINOR 1) SET (OPENCL_VERSION_PATCH 0) IF (APPLE) FIND_LIBRARY(OPENCL_LIBRARIES OpenCL DOC "OpenCL lib for OSX") FIND_PATH(OPENCL_INCLUDE_DIRS opencl/cl.h DOC "Include for OpenCL on OSX") FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS opencl/cl.hpp DOC "Include for OpenCL CPP bindings on OSX") ELSE (APPLE) IF (WIN32) FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h) FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp) # The AMD SDK currently installs both x86 and x86_64 libraries # This is only a hack to find out architecture IF( ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64" ) SET(OPENCL_LIB_DIR "$ENV{ATISTREAMSDKROOT}/lib/x86_64") SET(OPENCL_LIB_DIR "$ENV{ATIINTERNALSTREAMSDKROOT}/lib/x86_64") ELSE (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64") SET(OPENCL_LIB_DIR "$ENV{ATISTREAMSDKROOT}/lib/x86") SET(OPENCL_LIB_DIR "$ENV{ATIINTERNALSTREAMSDKROOT}/lib/x86") ENDIF( ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64" ) # find out if the user asked for a 64-bit build, and use the corresponding # 64 or 32 bit NVIDIA library paths to the search: STRING(REGEX MATCH "Win64" ISWIN64 ${CMAKE_GENERATOR}) IF("${ISWIN64}" STREQUAL "Win64") FIND_LIBRARY(OPENCL_LIBRARIES OpenCL.lib ${OPENCL_LIB_DIR} $ENV{CUDA_LIB_PATH} $ENV{CUDA_PATH}/lib/x64) ELSE("${ISWIN64}" STREQUAL "Win64") FIND_LIBRARY(OPENCL_LIBRARIES OpenCL.lib ${OPENCL_LIB_DIR} $ENV{CUDA_LIB_PATH} $ENV{CUDA_PATH}/lib/Win32) ENDIF("${ISWIN64}" STREQUAL "Win64") GET_FILENAME_COMPONENT(_OPENCL_INC_CAND ${OPENCL_LIB_DIR}/../../include ABSOLUTE) # On Win32 search relative to the library FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h PATHS "${_OPENCL_INC_CAND}" $ENV{CUDA_INC_PATH} $ENV{CUDA_PATH}/include) FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp PATHS "${_OPENCL_INC_CAND}" $ENV{CUDA_INC_PATH} $ENV{CUDA_PATH}/include) ELSE (WIN32) # Unix style platforms FIND_LIBRARY(OPENCL_LIBRARIES OpenCL ENV LD_LIBRARY_PATH ) GET_FILENAME_COMPONENT(OPENCL_LIB_DIR ${OPENCL_LIBRARIES} PATH) GET_FILENAME_COMPONENT(_OPENCL_INC_CAND ${OPENCL_LIB_DIR}/../../include ABSOLUTE) # The AMD SDK currently does not place its headers # in /usr/include, therefore also search relative # to the library FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h PATHS ${_OPENCL_INC_CAND} $ENV{OPENCL_INCLUDE_DIRS} "/usr/local/cuda/include") FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp PATHS ${_OPENCL_INC_CAND} $ENV{OPENCL_LIB_DIR} "/usr/local/cuda/include") ENDIF (WIN32) ENDIF (APPLE) FIND_PACKAGE_HANDLE_STANDARD_ARGS( OpenCL DEFAULT_MSG OPENCL_LIBRARIES OPENCL_INCLUDE_DIRS ) IF( _OPENCL_CPP_INCLUDE_DIRS ) SET( OPENCL_HAS_CPP_BINDINGS TRUE ) LIST( APPEND OPENCL_INCLUDE_DIRS ${_OPENCL_CPP_INCLUDE_DIRS} ) # This is often the same, so clean up LIST( REMOVE_DUPLICATES OPENCL_INCLUDE_DIRS ) ENDIF( _OPENCL_CPP_INCLUDE_DIRS ) MARK_AS_ADVANCED( OPENCL_INCLUDE_DIRS ) ================================================ FILE: cmake/Modules/FindPAPI.cmake ================================================ # Find PAPI libraries # Once done this will define # PAPI_FOUND - System has PAPI # PAPI_INCLUDE_DIRS - The PAPI include directories # PAPI_LIBRARIES - The libraries needed to use PAPI if(PAPI_INCLUDE_DIRS AND PAPI_LIBRARIES) set(PAPI_FIND_QUIETLY TRUE) endif() # XXX(ddn): our system papi is broken so ignore for now # find_path(PAPI_INCLUDE_DIRS papi.h HINTS ${PAPI_ROOT} PATH_SUFFIXES include NO_DEFAULT_PATH ) find_path(PAPI_INCLUDE_DIRS papi.h HINTS ${PAPI_ROOT} ENV TACC_PAPI_DIR PATH_SUFFIXES include) message(STATUS "PAPI_INCLUDE_DIRS: ${PAPI_INCLUDE_DIRS}") find_library(PAPI_LIBRARY NAMES papi HINTS ${PAPI_ROOT} ENV TACC_PAPI_DIR PATH_SUFFIXES lib lib64) message(STATUS "PAPI_LIBRARY: ${PAPI_LIBRARY}") find_library(PAPI_LIBRARIES NAMES rt PATH_SUFFIXES lib lib64) include(FindPackageHandleStandardArgs) find_package_handle_standard_args(PAPI DEFAULT_MSG PAPI_LIBRARY PAPI_LIBRARIES PAPI_INCLUDE_DIRS) if(PAPI_FOUND) set(PAPI_LIBRARIES ${PAPI_LIBRARY} ${PAPI_LIBRARIES}) endif() mark_as_advanced(PAPI_INCLUDE_DIRS PAPI_LIBRARIES) ================================================ FILE: cmake/Modules/FindQGLViewer.cmake ================================================ # Find QGLViewer libraries # Once done this will define # QGLViewer_FOUND - System has QGLViewer # QGLViewer_INCLUDE_DIRS - The QGLViewer include directories # QGLViewer_LIBRARIES - The libraries needed to use QGLViewer if(QGLViewer_INCLUDE_DIRS AND QGLVIEWER_LIBRARIES) set(QGLViewer_FIND_QUIETLY TRUE) endif() find_path(QGLViewer_INCLUDE_DIRS NAMES QGLViewer/qglviewer.h) find_library(QGLViewer_LIBRARIES NAMES QGLViewer PATH_SUFFIXES lib lib64) include(FindPackageHandleStandardArgs) find_package_handle_standard_args(QGLViewer DEFAULT_MSG QGLViewer_INCLUDE_DIRS QGLViewer_LIBRARIES) if(QGLVIEWER_FOUND) set(QGLViewer_FOUND TRUE) endif() mark_as_advanced(QGLViewer_INCLUDE_DIRS QGLViewer_LIBRARIES) ================================================ FILE: cmake/Modules/FindTBB.cmake ================================================ # Locate Intel Threading Building Blocks include paths and libraries # FindTBB.cmake can be found at https://code.google.com/p/findtbb/ # Written by Hannes Hofmann # Improvements by Gino van den Bergen , # Florian Uhlig , # Jiri Marsik # The MIT License # # Copyright (c) 2011 Hannes Hofmann # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # GvdB: This module uses the environment variable TBB_ARCH_PLATFORM which defines architecture and compiler. # e.g. "ia32/vc8" or "em64t/cc4.1.0_libc2.4_kernel2.6.16.21" # TBB_ARCH_PLATFORM is set by the build script tbbvars[.bat|.sh|.csh], which can be found # in the TBB installation directory (TBB_INSTALL_DIR). # # GvdB: Mac OS X distribution places libraries directly in lib directory. # # For backwards compatibility, you may explicitely set the CMake variables TBB_ARCHITECTURE and TBB_COMPILER. # TBB_ARCHITECTURE [ ia32 | em64t | itanium ] # which architecture to use # TBB_COMPILER e.g. vc9 or cc3.2.3_libc2.3.2_kernel2.4.21 or cc4.0.1_os10.4.9 # which compiler to use (detected automatically on Windows) # This module respects # TBB_INSTALL_DIR or $ENV{TBB21_INSTALL_DIR} or $ENV{TBB_INSTALL_DIR} # This module defines # TBB_INCLUDE_DIRS, where to find task_scheduler_init.h, etc. # TBB_LIBRARY_DIRS, where to find libtbb, libtbbmalloc # TBB_DEBUG_LIBRARY_DIRS, where to find libtbb_debug, libtbbmalloc_debug # TBB_INSTALL_DIR, the base TBB install directory # TBB_LIBRARIES, the libraries to link against to use TBB. # TBB_DEBUG_LIBRARIES, the libraries to link against to use TBB with debug symbols. # TBB_FOUND, If false, don't try to use TBB. # TBB_INTERFACE_VERSION, as defined in tbb/tbb_stddef.h if (WIN32) # has em64t/vc8 em64t/vc9 # has ia32/vc7.1 ia32/vc8 ia32/vc9 set(_TBB_DEFAULT_INSTALL_DIR "C:/Program Files/Intel/TBB" "C:/Program Files (x86)/Intel/TBB") set(_TBB_LIB_NAME "tbb") set(_TBB_LIB_MALLOC_NAME "${_TBB_LIB_NAME}malloc") set(_TBB_LIB_DEBUG_NAME "${_TBB_LIB_NAME}_debug") set(_TBB_LIB_MALLOC_DEBUG_NAME "${_TBB_LIB_MALLOC_NAME}_debug") if (MSVC71) set (_TBB_COMPILER "vc7.1") endif(MSVC71) if (MSVC80) set(_TBB_COMPILER "vc8") endif(MSVC80) if (MSVC90) set(_TBB_COMPILER "vc9") endif(MSVC90) if(MSVC10) set(_TBB_COMPILER "vc10") endif(MSVC10) # Todo: add other Windows compilers such as ICL. set(_TBB_ARCHITECTURE ${TBB_ARCHITECTURE}) endif (WIN32) if (UNIX) if (APPLE) # MAC set(_TBB_DEFAULT_INSTALL_DIR "/Library/Frameworks/Intel_TBB.framework/Versions") # libs: libtbb.dylib, libtbbmalloc.dylib, *_debug set(_TBB_LIB_NAME "tbb") set(_TBB_LIB_MALLOC_NAME "${_TBB_LIB_NAME}malloc") set(_TBB_LIB_DEBUG_NAME "${_TBB_LIB_NAME}_debug") set(_TBB_LIB_MALLOC_DEBUG_NAME "${_TBB_LIB_MALLOC_NAME}_debug") # default flavor on apple: ia32/cc4.0.1_os10.4.9 # Jiri: There is no reason to presume there is only one flavor and # that user's setting of variables should be ignored. if(NOT TBB_COMPILER) set(_TBB_COMPILER "cc4.0.1_os10.4.9") elseif (NOT TBB_COMPILER) set(_TBB_COMPILER ${TBB_COMPILER}) endif(NOT TBB_COMPILER) if(NOT TBB_ARCHITECTURE) set(_TBB_ARCHITECTURE "ia32") elseif(NOT TBB_ARCHITECTURE) set(_TBB_ARCHITECTURE ${TBB_ARCHITECTURE}) endif(NOT TBB_ARCHITECTURE) else (APPLE) # LINUX set(_TBB_DEFAULT_INSTALL_DIR "/opt/intel/tbb" "/usr/local/include" "/usr/include") set(_TBB_LIB_NAME "tbb") set(_TBB_LIB_MALLOC_NAME "${_TBB_LIB_NAME}malloc") set(_TBB_LIB_DEBUG_NAME "${_TBB_LIB_NAME}_debug") set(_TBB_LIB_MALLOC_DEBUG_NAME "${_TBB_LIB_MALLOC_NAME}_debug") # has em64t/cc3.2.3_libc2.3.2_kernel2.4.21 em64t/cc3.3.3_libc2.3.3_kernel2.6.5 em64t/cc3.4.3_libc2.3.4_kernel2.6.9 em64t/cc4.1.0_libc2.4_kernel2.6.16.21 # has ia32/* # has itanium/* set(_TBB_COMPILER ${TBB_COMPILER}) set(_TBB_ARCHITECTURE ${TBB_ARCHITECTURE}) endif (APPLE) endif (UNIX) if (CMAKE_SYSTEM MATCHES "SunOS.*") # SUN # not yet supported # has em64t/cc3.4.3_kernel5.10 # has ia32/* endif (CMAKE_SYSTEM MATCHES "SunOS.*") #-- Clear the public variables set (TBB_FOUND "NO") #-- Find TBB install dir and set ${_TBB_INSTALL_DIR} and cached ${TBB_INSTALL_DIR} # first: use CMake variable TBB_INSTALL_DIR if (TBB_INSTALL_DIR) set (_TBB_INSTALL_DIR ${TBB_INSTALL_DIR}) endif (TBB_INSTALL_DIR) # second: use environment variable if (NOT _TBB_INSTALL_DIR) if (NOT "$ENV{TBBROOT}" STREQUAL "") set (_TBB_INSTALL_DIR $ENV{TBBROOT}) endif() if (NOT "$ENV{TBB_INSTALL_DIR}" STREQUAL "") set (_TBB_INSTALL_DIR $ENV{TBB_INSTALL_DIR}) endif (NOT "$ENV{TBB_INSTALL_DIR}" STREQUAL "") # Intel recommends setting TBB21_INSTALL_DIR if (NOT "$ENV{TBB21_INSTALL_DIR}" STREQUAL "") set (_TBB_INSTALL_DIR $ENV{TBB21_INSTALL_DIR}) endif (NOT "$ENV{TBB21_INSTALL_DIR}" STREQUAL "") if (NOT "$ENV{TBB22_INSTALL_DIR}" STREQUAL "") set (_TBB_INSTALL_DIR $ENV{TBB22_INSTALL_DIR}) endif (NOT "$ENV{TBB22_INSTALL_DIR}" STREQUAL "") if (NOT "$ENV{TBB30_INSTALL_DIR}" STREQUAL "") set (_TBB_INSTALL_DIR $ENV{TBB30_INSTALL_DIR}) endif (NOT "$ENV{TBB30_INSTALL_DIR}" STREQUAL "") endif (NOT _TBB_INSTALL_DIR) # third: try to find path automatically if (NOT _TBB_INSTALL_DIR) if (_TBB_DEFAULT_INSTALL_DIR) set (_TBB_INSTALL_DIR ${_TBB_DEFAULT_INSTALL_DIR}) endif (_TBB_DEFAULT_INSTALL_DIR) endif (NOT _TBB_INSTALL_DIR) # sanity check if (NOT _TBB_INSTALL_DIR) message ("ERROR: Unable to find Intel TBB install directory. ${_TBB_INSTALL_DIR}") else (NOT _TBB_INSTALL_DIR) # finally: set the cached CMake variable TBB_INSTALL_DIR if (NOT TBB_INSTALL_DIR) set (TBB_INSTALL_DIR ${_TBB_INSTALL_DIR} CACHE PATH "Intel TBB install directory") mark_as_advanced(TBB_INSTALL_DIR) endif (NOT TBB_INSTALL_DIR) #-- A macro to rewrite the paths of the library. This is necessary, because # find_library() always found the em64t/vc9 version of the TBB libs macro(TBB_CORRECT_LIB_DIR var_name) # if (NOT "${_TBB_ARCHITECTURE}" STREQUAL "em64t") string(REPLACE em64t "${_TBB_ARCHITECTURE}" ${var_name} ${${var_name}}) # endif (NOT "${_TBB_ARCHITECTURE}" STREQUAL "em64t") string(REPLACE ia32 "${_TBB_ARCHITECTURE}" ${var_name} ${${var_name}}) string(REPLACE vc7.1 "${_TBB_COMPILER}" ${var_name} ${${var_name}}) string(REPLACE vc8 "${_TBB_COMPILER}" ${var_name} ${${var_name}}) string(REPLACE vc9 "${_TBB_COMPILER}" ${var_name} ${${var_name}}) string(REPLACE vc10 "${_TBB_COMPILER}" ${var_name} ${${var_name}}) endmacro(TBB_CORRECT_LIB_DIR var_content) #-- Look for include directory and set ${TBB_INCLUDE_DIR} set (TBB_INC_SEARCH_DIR ${_TBB_INSTALL_DIR}/include) # Jiri: tbbvars now sets the CPATH environment variable to the directory # containing the headers. find_path(TBB_INCLUDE_DIR tbb/task_scheduler_init.h PATHS ${TBB_INC_SEARCH_DIR} ENV CPATH ) mark_as_advanced(TBB_INCLUDE_DIR) #-- Look for libraries # GvdB: $ENV{TBB_ARCH_PLATFORM} is set by the build script tbbvars[.bat|.sh|.csh] if (NOT $ENV{TBB_ARCH_PLATFORM} STREQUAL "") set (_TBB_LIBRARY_DIR ${_TBB_INSTALL_DIR}/lib/$ENV{TBB_ARCH_PLATFORM} ${_TBB_INSTALL_DIR}/$ENV{TBB_ARCH_PLATFORM}/lib ) endif (NOT $ENV{TBB_ARCH_PLATFORM} STREQUAL "") # Jiri: This block isn't mutually exclusive with the previous one # (hence no else), instead I test if the user really specified # the variables in question. if ((NOT ${TBB_ARCHITECTURE} STREQUAL "") AND (NOT ${TBB_COMPILER} STREQUAL "")) # HH: deprecated message(STATUS "[Warning] FindTBB.cmake: The use of TBB_ARCHITECTURE and TBB_COMPILER is deprecated and may not be supported in future versions. Please set \$ENV{TBB_ARCH_PLATFORM} (using tbbvars.[bat|csh|sh]).") # Jiri: It doesn't hurt to look in more places, so I store the hints from # ENV{TBB_ARCH_PLATFORM} and the TBB_ARCHITECTURE and TBB_COMPILER # variables and search them both. set (_TBB_LIBRARY_DIR "${_TBB_INSTALL_DIR}/${_TBB_ARCHITECTURE}/${_TBB_COMPILER}/lib" ${_TBB_LIBRARY_DIR}) endif ((NOT ${TBB_ARCHITECTURE} STREQUAL "") AND (NOT ${TBB_COMPILER} STREQUAL "")) # GvdB: Mac OS X distribution places libraries directly in lib directory. list(APPEND _TBB_LIBRARY_DIR ${_TBB_INSTALL_DIR}/lib) # Jiri: No reason not to check the default paths. From recent versions, # tbbvars has started exporting the LIBRARY_PATH and LD_LIBRARY_PATH # variables, which now point to the directories of the lib files. # It all makes more sense to use the ${_TBB_LIBRARY_DIR} as a HINTS # argument instead of the implicit PATHS as it isn't hard-coded # but computed by system introspection. Searching the LIBRARY_PATH # and LD_LIBRARY_PATH environment variables is now even more important # that tbbvars doesn't export TBB_ARCH_PLATFORM and it facilitates # the use of TBB built from sources. find_library(TBB_LIBRARY ${_TBB_LIB_NAME} HINTS ${_TBB_LIBRARY_DIR} PATHS ENV LIBRARY_PATH ENV LD_LIBRARY_PATH) find_library(TBB_MALLOC_LIBRARY ${_TBB_LIB_MALLOC_NAME} HINTS ${_TBB_LIBRARY_DIR} PATHS ENV LIBRARY_PATH ENV LD_LIBRARY_PATH) #Extract path from TBB_LIBRARY name get_filename_component(TBB_LIBRARY_DIR ${TBB_LIBRARY} PATH) #TBB_CORRECT_LIB_DIR(TBB_LIBRARY) #TBB_CORRECT_LIB_DIR(TBB_MALLOC_LIBRARY) mark_as_advanced(TBB_LIBRARY TBB_MALLOC_LIBRARY) #-- Look for debug libraries # Jiri: Changed the same way as for the release libraries. find_library(TBB_LIBRARY_DEBUG ${_TBB_LIB_DEBUG_NAME} HINTS ${_TBB_LIBRARY_DIR} PATHS ENV LIBRARY_PATH ENV LD_LIBRARY_PATH) find_library(TBB_MALLOC_LIBRARY_DEBUG ${_TBB_LIB_MALLOC_DEBUG_NAME} HINTS ${_TBB_LIBRARY_DIR} PATHS ENV LIBRARY_PATH ENV LD_LIBRARY_PATH) # Jiri: Self-built TBB stores the debug libraries in a separate directory. # Extract path from TBB_LIBRARY_DEBUG name get_filename_component(TBB_LIBRARY_DEBUG_DIR ${TBB_LIBRARY_DEBUG} PATH) #TBB_CORRECT_LIB_DIR(TBB_LIBRARY_DEBUG) #TBB_CORRECT_LIB_DIR(TBB_MALLOC_LIBRARY_DEBUG) mark_as_advanced(TBB_LIBRARY_DEBUG TBB_MALLOC_LIBRARY_DEBUG) if (TBB_INCLUDE_DIR) if (TBB_LIBRARY) set (TBB_FOUND "YES") set (TBB_LIBRARIES ${TBB_LIBRARY} ${TBB_MALLOC_LIBRARY} ${TBB_LIBRARIES}) set (TBB_DEBUG_LIBRARIES ${TBB_LIBRARY_DEBUG} ${TBB_MALLOC_LIBRARY_DEBUG} ${TBB_DEBUG_LIBRARIES}) set (TBB_INCLUDE_DIRS ${TBB_INCLUDE_DIR} CACHE PATH "TBB include directory" FORCE) set (TBB_LIBRARY_DIRS ${TBB_LIBRARY_DIR} CACHE PATH "TBB library directory" FORCE) # Jiri: Self-built TBB stores the debug libraries in a separate directory. set (TBB_DEBUG_LIBRARY_DIRS ${TBB_LIBRARY_DEBUG_DIR} CACHE PATH "TBB debug library directory" FORCE) mark_as_advanced(TBB_INCLUDE_DIRS TBB_LIBRARY_DIRS TBB_DEBUG_LIBRARY_DIRS TBB_LIBRARIES TBB_DEBUG_LIBRARIES) message(STATUS "Found Intel TBB") endif (TBB_LIBRARY) endif (TBB_INCLUDE_DIR) if (NOT TBB_FOUND) message("ERROR: Intel TBB NOT found!") message(STATUS "Looked for Threading Building Blocks in ${_TBB_INSTALL_DIR}") # do only throw fatal, if this pkg is REQUIRED if (TBB_FIND_REQUIRED) message(FATAL_ERROR "Could NOT find TBB library.") endif (TBB_FIND_REQUIRED) endif (NOT TBB_FOUND) endif (NOT _TBB_INSTALL_DIR) if (TBB_FOUND) set(TBB_INTERFACE_VERSION 0) # FILE(READ "${TBB_INCLUDE_DIRS}/tbb/tbb_stddef.h" _TBB_VERSION_CONTENTS) STRING(REGEX REPLACE ".*#define TBB_INTERFACE_VERSION ([0-9]+).*" "\\1" TBB_INTERFACE_VERSION "${_TBB_VERSION_CONTENTS}") set(TBB_INTERFACE_VERSION "${TBB_INTERFACE_VERSION}") endif (TBB_FOUND) ================================================ FILE: cmake/Modules/FindVTune.cmake ================================================ # Find VTune libraries # Once done this will define # VTune_FOUND - System has VTune # VTune_INCLUDE_DIRS - The VTune include directories # VTune_LIBRARIES - The libraries needed to use VTune message(STATUS "${VTune_INCLUDE_DIRS}") if(VTune_INCLUDE_DIRS AND VTune_LIBRARIES) set(VTune_FIND_QUIETLY TRUE) endif() set(VTune_LIBRARY_PATH_CANDIDATES lib lib64 lib32 bin64/k1om bin32/k1om) find_path(VTune_INCLUDE_DIRS ittnotify.h PATHS ${VTune_ROOT} PATH_SUFFIXES include) find_library(VTune_LIBRARY NAMES ittnotify PATHS ${VTune_ROOT} PATH_SUFFIXES ${VTune_LIBRARY_PATH_CANDIDATES}) find_library(VTune_LIBRARIES NAMES dl PATH_SUFFIXES lib lib64 lib32) include(FindPackageHandleStandardArgs) find_package_handle_standard_args(VTune DEFAULT_MSG VTune_LIBRARY VTune_LIBRARIES VTune_INCLUDE_DIRS) if(VTUNE_FOUND) set(VTune_FOUND on) set(VTune_LIBRARIES ${VTune_LIBRARY} ${VTune_LIBRARIES}) endif() mark_as_advanced(VTune_INCLUDE_DIRS VTune_LIBRARIES) ================================================ FILE: cmake/Modules/GetGitVersion-write.cmake ================================================ ### Don't include directly, for use by GetSVNVersion.cmake find_package(Git) # Extract svn info into MY_XXX variables if(GIT_FOUND) execute_process(COMMAND ${GIT_EXECUTABLE} rev-parse --verify --short HEAD WORKING_DIRECTORY ${SOURCE_DIR} OUTPUT_VARIABLE GIT_REVISION OUTPUT_STRIP_TRAILING_WHITESPACE) file(WRITE include/galois/revision.h.txt "#define GALOIS_REVISION \"${GIT_REVISION}\"\n") else() file(WRITE include/galois/revision.h.txt "#define GALOIS_REVISION \"0\"\n") endif() execute_process(COMMAND ${CMAKE_COMMAND} -E copy_if_different include/galois/revision.h.txt include/galois/revision.h) ================================================ FILE: cmake/Modules/GetGitVersion.cmake ================================================ # DUMMY is a non-existent file to force regeneration of svn header every build add_custom_target(revision ALL DEPENDS DUMMY ${PROJECT_BINARY_DIR}/include/galois/revision.h) find_file(_MODULE "GetGitVersion-write.cmake" PATHS ${CMAKE_MODULE_PATH}) add_custom_command(OUTPUT DUMMY ${PROJECT_BINARY_DIR}/include/galois/revision.h COMMAND ${CMAKE_COMMAND} -DSOURCE_DIR=${CMAKE_SOURCE_DIR} -DCMAKE_MODULE_PATH="${CMAKE_SOURCE_DIR}/cmake/Modules/" -P ${_MODULE}) set(_MODULE off) set_source_files_properties(${PROJECT_BINARY_DIR}/include/galois/revision.h PROPERTIES GENERATED TRUE HEADER_FILE_ONLY TRUE) ================================================ FILE: cmake/Modules/HandleSanitizer.cmake ================================================ # Galois: taken from: # https://github.com/llvm/llvm-project/blob/master/llvm/cmake/modules/HandleLLVMOptions.cmake include(CheckCCompilerFlag) include(CheckCXXCompilerFlag) string(TOUPPER "${CMAKE_BUILD_TYPE}" uppercase_CMAKE_BUILD_TYPE) if(NOT GALOIS_USE_SANITIZER) return() endif() function(append value) foreach(variable ${ARGN}) set(${variable} "${${variable}} ${value}" PARENT_SCOPE) endforeach(variable) endfunction() function(append_if condition value) if (${condition}) foreach(variable ${ARGN}) set(${variable} "${${variable}} ${value}" PARENT_SCOPE) endforeach(variable) endif() endfunction() macro(add_flag_if_supported flag name) check_c_compiler_flag("-Werror ${flag}" "C_SUPPORTS_${name}") append_if("C_SUPPORTS_${name}" "${flag}" CMAKE_C_FLAGS) check_cxx_compiler_flag("-Werror ${flag}" "CXX_SUPPORTS_${name}") append_if("CXX_SUPPORTS_${name}" "${flag}" CMAKE_CXX_FLAGS) endmacro() macro(append_common_sanitizer_flags) # Append -fno-omit-frame-pointer and turn on debug info to get better # stack traces. add_flag_if_supported("-fno-omit-frame-pointer" FNO_OMIT_FRAME_POINTER) if (NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG" AND NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "RELWITHDEBINFO") add_flag_if_supported("-gline-tables-only" GLINE_TABLES_ONLY) endif() # Use -O1 even in debug mode, otherwise sanitizers slowdown is too large. if (uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG") add_flag_if_supported("-O1" O1) endif() endmacro() if (GALOIS_USE_SANITIZER STREQUAL "Address") append_common_sanitizer_flags() append("-fsanitize=address" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) elseif (GALOIS_USE_SANITIZER STREQUAL "HWAddress") append_common_sanitizer_flags() append("-fsanitize=hwaddress" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) elseif (GALOIS_USE_SANITIZER MATCHES "Memory(WithOrigins)?") append_common_sanitizer_flags() append("-fsanitize=memory" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) if(GALOIS_USE_SANITIZER STREQUAL "MemoryWithOrigins") append("-fsanitize-memory-track-origins" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) endif() elseif (GALOIS_USE_SANITIZER STREQUAL "Undefined") append_common_sanitizer_flags() append("-fsanitize=undefined -fno-sanitize-recover=all" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) elseif (GALOIS_USE_SANITIZER STREQUAL "Thread") append_common_sanitizer_flags() append("-fsanitize=thread" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) elseif (GALOIS_USE_SANITIZER STREQUAL "DataFlow") append("-fsanitize=dataflow" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) elseif (GALOIS_USE_SANITIZER STREQUAL "Address;Undefined" OR GALOIS_USE_SANITIZER STREQUAL "Undefined;Address") append_common_sanitizer_flags() append("-fsanitize=address,undefined -fno-sanitize-recover=all" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) elseif (GALOIS_USE_SANITIZER STREQUAL "Leaks") append_common_sanitizer_flags() append("-fsanitize=leak" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) else() message(FATAL_ERROR "Unsupported value of GALOIS_USE_SANITIZER: ${GALOIS_USE_SANITIZER}") endif() if (GALOIS_USE_SANITIZER MATCHES "(Undefined;)?Address(;Undefined)?") add_flag_if_supported("-fsanitize-address-use-after-scope" FSANITIZE_USE_AFTER_SCOPE_FLAG) endif() if (GALOIS_USE_SANITIZE_COVERAGE) append("-fsanitize=fuzzer-no-link" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) endif() if (GALOIS_USE_SANITIZER MATCHES ".*Undefined.*") set(BLACKLIST_CONFIGURE_FILE "${PROJECT_SOURCE_DIR}/config/sanitizers/ubsan_blacklist.txt.in") if (EXISTS "${BLACKLIST_CONFIGURE_FILE}") set(BLACKLIST_FILE "${PROJECT_BINARY_DIR}/config/sanitizers/ubsan_blacklist.txt") configure_file("${BLACKLIST_CONFIGURE_FILE}" "${BLACKLIST_FILE}") append("-fsanitize-blacklist=${BLACKLIST_FILE}" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) endif() endif() ================================================ FILE: cmake/Modules/LibFindMacros.cmake ================================================ # Copyright Raimar Sandner 2012–2014. Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE.txt) #! \file #! \ingroup Helpers #! \brief Improved versions of %CMake's `find_package` #! \ingroup Helpers #! \brief Works the same as `find_package`, but forwards the "REQUIRED" and "QUIET" arguments #! used for the current package. #! #! For this to work, the first parameter must be the prefix of the current package, then the #! prefix of the new package etc, which are passed to `find_package`. macro (libfind_package PREFIX) set (LIBFIND_PACKAGE_ARGS ${ARGN}) if (${PREFIX}_FIND_QUIETLY) set (LIBFIND_PACKAGE_ARGS ${LIBFIND_PACKAGE_ARGS} QUIET) endif (${PREFIX}_FIND_QUIETLY) if (${PREFIX}_FIND_REQUIRED) set (LIBFIND_PACKAGE_ARGS ${LIBFIND_PACKAGE_ARGS} REQUIRED) endif (${PREFIX}_FIND_REQUIRED) find_package(${LIBFIND_PACKAGE_ARGS}) endmacro (libfind_package) #! \ingroup Helpers #! \brief Do the final processing once the paths have been detected. #! #! If include dirs are needed, `${PREFIX}_PROCESS_INCLUDES` should be set to contain #! all the variables, each of which contain one include directory. #! Ditto for `${PREFIX}_PROCESS_LIBS` and library files. #! Will set `${PREFIX}_FOUND`, `${PREFIX}_INCLUDE_DIRS` and `${PREFIX}_LIBRARIES`. #! Also handles errors in case library detection was required, etc. macro (libfind_process PREFIX) # Skip processing if already processed during this run if (NOT ${PREFIX}_FOUND) # Start with the assumption that the library was found set (${PREFIX}_FOUND TRUE) # Process all includes and set _FOUND to false if any are missing foreach (i ${${PREFIX}_PROCESS_INCLUDES}) if (${i}) set (${PREFIX}_INCLUDE_DIRS ${${PREFIX}_INCLUDE_DIRS} ${${i}}) mark_as_advanced(${i}) else (${i}) set (${PREFIX}_FOUND FALSE) endif (${i}) endforeach (i) # Process all libraries and set _FOUND to false if any are missing foreach (i ${${PREFIX}_PROCESS_LIBS}) if (${i}) set (${PREFIX}_LIBRARIES ${${PREFIX}_LIBRARIES} ${${i}}) mark_as_advanced(${i}) else (${i}) set (${PREFIX}_FOUND FALSE) endif (${i}) endforeach (i) # Print message and/or exit on fatal error if (${PREFIX}_FOUND) if (NOT ${PREFIX}_FIND_QUIETLY) message (STATUS "Found ${PREFIX} ${${PREFIX}_VERSION}") endif (NOT ${PREFIX}_FIND_QUIETLY) else (${PREFIX}_FOUND) if (${PREFIX}_FIND_REQUIRED) foreach (i ${${PREFIX}_PROCESS_INCLUDES} ${${PREFIX}_PROCESS_LIBS}) message("${i}=${${i}}") endforeach (i) message (FATAL_ERROR "Required library ${PREFIX} NOT FOUND.\nInstall the library (dev version) and try again. If the library is already installed, use ccmake to set the missing variables manually.") endif (${PREFIX}_FIND_REQUIRED) endif (${PREFIX}_FOUND) endif (NOT ${PREFIX}_FOUND) endmacro (libfind_process) ================================================ FILE: cmake/Modules/UseStdMacro.cmake ================================================ add_definitions(-D__STDC_LIMIT_MACROS) add_definitions(-D__STDC_CONSTANT_MACROS) ================================================ FILE: config/sanitizers/ubsan_blacklist.txt.in ================================================ [undefined] src:@PROJECT_SOURCE_DIR@/external/bliss/* ================================================ FILE: config/version.txt ================================================ 6.0.0 ================================================ FILE: external/bliss/bliss/COPYING ================================================ GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The GNU General Public License is a free, copyleft license for software and other kinds of works. The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. We, the Free Software Foundation, use the GNU General Public License for most of our software; it applies also to any other work released this way by its authors. You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. To protect your rights, we need to prevent others from denying you these rights or asking you to surrender the rights. Therefore, you have certain responsibilities if you distribute copies of the software, or if you modify it: responsibilities to respect the freedom of others. For example, if you distribute copies of such a program, whether gratis or for a fee, you must pass on to the recipients the same freedoms that you received. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. Developers that use the GNU GPL protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License giving you legal permission to copy, distribute and/or modify it. For the developers' and authors' protection, the GPL clearly explains that there is no warranty for this free software. For both users' and authors' sake, the GPL requires that modified versions be marked as changed, so that their problems will not be attributed erroneously to authors of previous versions. Some devices are designed to deny users access to install or run modified versions of the software inside them, although the manufacturer can do so. This is fundamentally incompatible with the aim of protecting users' freedom to change the software. The systematic pattern of such abuse occurs in the area of products for individuals to use, which is precisely where it is most unacceptable. Therefore, we have designed this version of the GPL to prohibit the practice for those products. If such problems arise substantially in other domains, we stand ready to extend this provision to those domains in future versions of the GPL, as needed to protect the freedom of users. Finally, every program is threatened constantly by software patents. States should not allow patents to restrict development and use of software on general-purpose computers, but in those that do, we wish to avoid the special danger that patents applied to a free program could make it effectively proprietary. To prevent this, the GPL assures that patents cannot be used to render the program non-free. The precise terms and conditions for copying, distribution and modification follow. TERMS AND CONDITIONS 0. Definitions. "This License" refers to version 3 of the GNU General Public License. "Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. "The Program" refers to any copyrightable work licensed under this License. Each licensee is addressed as "you". "Licensees" and "recipients" may be individuals or organizations. To "modify" a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a "modified version" of the earlier work or a work "based on" the earlier work. A "covered work" means either the unmodified Program or a work based on the Program. To "propagate" a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. To "convey" a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. An interactive user interface displays "Appropriate Legal Notices" to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. 1. Source Code. The "source code" for a work means the preferred form of the work for making modifications to it. "Object code" means any non-source form of a work. A "Standard Interface" means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. The "System Libraries" of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A "Major Component", in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. The "Corresponding Source" for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. The Corresponding Source for a work in source code form is that same work. 2. Basic Permissions. All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law. You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you. Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary. 3. Protecting Users' Legal Rights From Anti-Circumvention Law. No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures. When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures. 4. Conveying Verbatim Copies. You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program. You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee. 5. Conveying Modified Source Versions. You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: a) The work must carry prominent notices stating that you modified it, and giving a relevant date. b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to "keep intact all notices". c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an "aggregate" if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. 6. Conveying Non-Source Forms. You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange. b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge. c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b. d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements. e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. A "User Product" is either (1) a "consumer product", which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, "normally used" refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. "Installation Information" for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network. Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. 7. Additional Terms. "Additional permissions" are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms: a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or d) Limiting the use for publicity purposes of names of licensors or authors of the material; or e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. All other non-permissive additional terms are considered "further restrictions" within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way. 8. Termination. You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11). However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10. 9. Acceptance Not Required for Having Copies. You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so. 10. Automatic Licensing of Downstream Recipients. Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. An "entity transaction" is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. 11. Patents. A "contributor" is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's "contributor version". A contributor's "essential patent claims" are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, "control" includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. In the following three paragraphs, a "patent license" is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To "grant" such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. "Knowingly relying" means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. A patent license is "discriminatory" if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. 12. No Surrender of Others' Freedom. If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. 13. Use with the GNU Affero General Public License. Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU Affero General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the special requirements of the GNU Affero General Public License, section 13, concerning interaction through a network will apply to the combination as such. 14. Revised Versions of this License. The Free Software Foundation may publish revised and/or new versions of the GNU General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU General Public License, you may choose any version ever published by the Free Software Foundation. If the Program specifies that a proxy can decide which future versions of the GNU General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. 15. Disclaimer of Warranty. THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. Limitation of Liability. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 17. Interpretation of Sections 15 and 16. If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . Also add information on how to contact you by electronic and paper mail. If the program does terminal interaction, make it output a short notice like this when it starts in an interactive mode: Copyright (C) This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, your program's commands might be different; for a GUI interface, you would use an "about box". You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. For more information on this, and how to apply and follow the GNU GPL, see . The GNU General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. But first, please read . ================================================ FILE: external/bliss/bliss/COPYING.LESSER ================================================ GNU LESSER GENERAL PUBLIC LICENSE Version 3, 29 June 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. This version of the GNU Lesser General Public License incorporates the terms and conditions of version 3 of the GNU General Public License, supplemented by the additional permissions listed below. 0. Additional Definitions. As used herein, "this License" refers to version 3 of the GNU Lesser General Public License, and the "GNU GPL" refers to version 3 of the GNU General Public License. "The Library" refers to a covered work governed by this License, other than an Application or a Combined Work as defined below. An "Application" is any work that makes use of an interface provided by the Library, but which is not otherwise based on the Library. Defining a subclass of a class defined by the Library is deemed a mode of using an interface provided by the Library. A "Combined Work" is a work produced by combining or linking an Application with the Library. The particular version of the Library with which the Combined Work was made is also called the "Linked Version". The "Minimal Corresponding Source" for a Combined Work means the Corresponding Source for the Combined Work, excluding any source code for portions of the Combined Work that, considered in isolation, are based on the Application, and not on the Linked Version. The "Corresponding Application Code" for a Combined Work means the object code and/or source code for the Application, including any data and utility programs needed for reproducing the Combined Work from the Application, but excluding the System Libraries of the Combined Work. 1. Exception to Section 3 of the GNU GPL. You may convey a covered work under sections 3 and 4 of this License without being bound by section 3 of the GNU GPL. 2. Conveying Modified Versions. If you modify a copy of the Library, and, in your modifications, a facility refers to a function or data to be supplied by an Application that uses the facility (other than as an argument passed when the facility is invoked), then you may convey a copy of the modified version: a) under this License, provided that you make a good faith effort to ensure that, in the event an Application does not supply the function or data, the facility still operates, and performs whatever part of its purpose remains meaningful, or b) under the GNU GPL, with none of the additional permissions of this License applicable to that copy. 3. Object Code Incorporating Material from Library Header Files. The object code form of an Application may incorporate material from a header file that is part of the Library. You may convey such object code under terms of your choice, provided that, if the incorporated material is not limited to numerical parameters, data structure layouts and accessors, or small macros, inline functions and templates (ten or fewer lines in length), you do both of the following: a) Give prominent notice with each copy of the object code that the Library is used in it and that the Library and its use are covered by this License. b) Accompany the object code with a copy of the GNU GPL and this license document. 4. Combined Works. You may convey a Combined Work under terms of your choice that, taken together, effectively do not restrict modification of the portions of the Library contained in the Combined Work and reverse engineering for debugging such modifications, if you also do each of the following: a) Give prominent notice with each copy of the Combined Work that the Library is used in it and that the Library and its use are covered by this License. b) Accompany the Combined Work with a copy of the GNU GPL and this license document. c) For a Combined Work that displays copyright notices during execution, include the copyright notice for the Library among these notices, as well as a reference directing the user to the copies of the GNU GPL and this license document. d) Do one of the following: 0) Convey the Minimal Corresponding Source under the terms of this License, and the Corresponding Application Code in a form suitable for, and under terms that permit, the user to recombine or relink the Application with a modified version of the Linked Version to produce a modified Combined Work, in the manner specified by section 6 of the GNU GPL for conveying Corresponding Source. 1) Use a suitable shared library mechanism for linking with the Library. A suitable mechanism is one that (a) uses at run time a copy of the Library already present on the user's computer system, and (b) will operate properly with a modified version of the Library that is interface-compatible with the Linked Version. e) Provide Installation Information, but only if you would otherwise be required to provide such information under section 6 of the GNU GPL, and only to the extent that such information is necessary to install and execute a modified version of the Combined Work produced by recombining or relinking the Application with a modified version of the Linked Version. (If you use option 4d0, the Installation Information must accompany the Minimal Corresponding Source and Corresponding Application Code. If you use option 4d1, you must provide the Installation Information in the manner specified by section 6 of the GNU GPL for conveying Corresponding Source.) 5. Combined Libraries. You may place library facilities that are a work based on the Library side by side in a single library together with other library facilities that are not Applications and are not covered by this License, and convey such a combined library under terms of your choice, if you do both of the following: a) Accompany the combined library with a copy of the same work based on the Library, uncombined with any other library facilities, conveyed under the terms of this License. b) Give prominent notice with the combined library that part of it is a work based on the Library, and explaining where to find the accompanying uncombined form of the same work. 6. Revised Versions of the GNU Lesser General Public License. The Free Software Foundation may publish revised and/or new versions of the GNU Lesser General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Library as you received it specifies that a certain numbered version of the GNU Lesser General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that published version or of any later version published by the Free Software Foundation. If the Library as you received it does not specify a version number of the GNU Lesser General Public License, you may choose any version of the GNU Lesser General Public License ever published by the Free Software Foundation. If the Library as you received it specifies that a proxy can decide whether future versions of the GNU Lesser General Public License shall apply, that proxy's public statement of acceptance of any version is permanent authorization for you to choose that version for the Library. ================================================ FILE: external/bliss/bliss/abgraph.hh ================================================ #ifndef BLISS_AB_GRAPH_HH #define BLISS_AB_GRAPH_HH #include #include #include #include #include #include #include namespace bliss { class AbstractGraph; } #include "kstack.hh" #include "kqueue.hh" #include "heap.hh" #include "orbit.hh" #include "partition.hh" #include "bignum.hh" #include "uintseqhash.hh" namespace bliss { void fatal_error(const char* fmt, ...) { va_list ap; va_start(ap, fmt); fprintf(stderr,"Bliss fatal error: "); vfprintf(stderr, fmt, ap); fprintf(stderr, "\nAborting!\n"); va_end(ap); exit(1); } #define _INTERNAL_ERROR() fatal_error("%s:%d: internal error",__FILE__,__LINE__) #define _OUT_OF_MEMORY() fatal_error("%s:%d: out of memory",__FILE__,__LINE__) typedef std::pair Index; class TreeNode { //friend class AbstractGraph; public: unsigned int split_cell_first; int split_element; static const int SPLIT_START = -1; static const int SPLIT_END = -2; Partition::BacktrackPoint partition_bt_point; unsigned int certificate_index; static const char NO = -1; static const char MAYBE = 0; static const char YES = 1; /* First path stuff */ bool fp_on; bool fp_cert_equal; char fp_extendable; /* Best path stuff */ bool in_best_path; int cmp_to_best_path; unsigned int failure_recording_ival; /* Component recursion related data */ unsigned int cr_cep_stack_size; unsigned int cr_cep_index; unsigned int cr_level; bool needs_long_prune; unsigned int long_prune_begin; std::set > long_prune_redundant; UintSeqHash eqref_hash; unsigned int subcertificate_length; }; typedef struct { unsigned int splitting_element; unsigned int certificate_index; unsigned int subcertificate_length; UintSeqHash eqref_hash; } PathInfo; // \brief Statistics returned by the bliss search algorithm. class Stats { friend class AbstractGraph; /** \internal The size of the automorphism group. */ BigNum group_size; /** \internal An approximation (due to possible overflows) of * the size of the automorphism group. */ long double group_size_approx; /** \internal The number of nodes in the search tree. */ long unsigned int nof_nodes; /** \internal The number of leaf nodes in the search tree. */ long unsigned int nof_leaf_nodes; /** \internal The number of bad nodes in the search tree. */ long unsigned int nof_bad_nodes; /** \internal The number of canonical representative updates. */ long unsigned int nof_canupdates; /** \internal The number of generator permutations. */ long unsigned int nof_generators; /** \internal The maximal depth of the search tree. */ unsigned long int max_level; /** */ void reset() { group_size.assign(1); group_size_approx = 1.0; nof_nodes = 0; nof_leaf_nodes = 0; nof_bad_nodes = 0; nof_canupdates = 0; nof_generators = 0; max_level = 0; } public: Stats() { reset(); } /** Print the statistics. */ size_t print(FILE* const fp) const { size_t r = 0; r += fprintf(fp, "Nodes: %lu\n", nof_nodes); r += fprintf(fp, "Leaf nodes: %lu\n", nof_leaf_nodes); r += fprintf(fp, "Bad nodes: %lu\n", nof_bad_nodes); r += fprintf(fp, "Canrep updates: %lu\n", nof_canupdates); r += fprintf(fp, "Generators: %lu\n", nof_generators); r += fprintf(fp, "Max level: %lu\n", max_level); r += fprintf(fp, "|Aut|: ")+group_size.print(fp)+fprintf(fp, "\n"); fflush(fp); return r; } /** An approximation (due to possible overflows/rounding errors) of * the size of the automorphism group. */ long double get_group_size_approx() const {return group_size_approx;} /** The number of nodes in the search tree. */ long unsigned int get_nof_nodes() const {return nof_nodes;} /** The number of leaf nodes in the search tree. */ long unsigned int get_nof_leaf_nodes() const {return nof_leaf_nodes;} /** The number of bad nodes in the search tree. */ long unsigned int get_nof_bad_nodes() const {return nof_bad_nodes;} /** The number of canonical representative updates. */ long unsigned int get_nof_canupdates() const {return nof_canupdates;} /** The number of generator permutations. */ long unsigned int get_nof_generators() const {return nof_generators;} /** The maximal depth of the search tree. */ unsigned long int get_max_level() const {return max_level;} }; // \brief An abstract base class for different types of graphs. class AbstractGraph { friend class Partition; public: //AbstractGraph(); // Constructor and destructor routines for the abstract graph class AbstractGraph() { // Initialize stuff first_path_labeling = 0; first_path_labeling_inv = 0; best_path_labeling = 0; best_path_labeling_inv = 0; first_path_automorphism = 0; best_path_automorphism = 0; in_search = false; // Default value for using "long prune" opt_use_long_prune = true; // Default value for using failure recording opt_use_failure_recording = true; // Default value for using component recursion opt_use_comprec = true; verbose_level = 0; verbstr = stdout; report_hook = 0; report_user_param = 0; } //virtual ~AbstractGraph(); virtual ~AbstractGraph() { if(first_path_labeling) { free(first_path_labeling); first_path_labeling = 0; } if(first_path_labeling_inv) { free(first_path_labeling_inv); first_path_labeling_inv = 0; } if(best_path_labeling) { free(best_path_labeling); best_path_labeling = 0; } if(best_path_labeling_inv) { free(best_path_labeling_inv); best_path_labeling_inv = 0; } if(first_path_automorphism) { free(first_path_automorphism); first_path_automorphism = 0; } if(best_path_automorphism) { free(best_path_automorphism); best_path_automorphism = 0; } report_hook = 0; report_user_param = 0; } //Set the verbose output level for the algorithms. // \param level the level of verbose output, 0 means no verbose output //void set_verbose_level(const unsigned int level); void set_verbose_level(const unsigned int level) { verbose_level = level; } /** * Set the file stream for the verbose output. * \param fp the file stream; if null, no verbose output is written */ //void set_verbose_file(FILE * const fp); void set_verbose_file(FILE* const fp) { verbstr = fp; } /** * Add a new vertex with color \a color in the graph and return its index. */ virtual unsigned int add_vertex(const unsigned int color = 0) = 0; /** * Add an edge between vertices \a source and \a target. * Duplicate edges between vertices are ignored but try to avoid introducing * them in the first place as they are not ignored immediately but will * consume memory and computation resources for a while. */ virtual void add_edge(const unsigned int source, const unsigned int target, Index index) = 0; /** * Change the color of the vertex \a vertex to \a color. */ virtual void change_color(const unsigned int vertex, const unsigned int color) = 0; /** * Check whether \a perm is an automorphism of this graph. * Unoptimized, mainly for debugging purposes. */ //virtual bool is_automorphism(const std::vector& perm) const; virtual bool is_automorphism(const std::vector& perm) const { _INTERNAL_ERROR(); return false; } /** Activate/deactivate failure recording. * May not be called during the search, i.e. from an automorphism reporting * hook function. * \param active if true, activate failure recording, deactivate otherwise */ void set_failure_recording(const bool active) {assert(!in_search); opt_use_failure_recording = active;} /** Activate/deactivate component recursion. * The choice affects the computed canonical labelings; * therefore, if you want to compare whether two graphs are isomorphic by * computing and comparing (for equality) their canonical versions, * be sure to use the same choice for both graphs. * May not be called during the search, i.e. from an automorphism reporting * hook function. * \param active if true, activate component recursion, deactivate otherwise */ void set_component_recursion(const bool active) {assert(!in_search); opt_use_comprec = active;} /** * Return the number of vertices in the graph. */ virtual unsigned int get_nof_vertices() const = 0; /** * Return a new graph that is the result of applying the permutation \a perm * to this graph. This graph is not modified. * \a perm must contain N=this.get_nof_vertices() elements and be a bijection * on {0,1,...,N-1}, otherwise the result is undefined or a segfault. */ virtual AbstractGraph* permute(const unsigned* const perm) const = 0; virtual AbstractGraph* permute(const std::vector& perm) const = 0; /** * Find a set of generators for the automorphism group of the graph. * The function \a hook (if non-null) is called each time a new generator * for the automorphism group is found. * The first argument \a user_param for the hook is the * \a hook_user_param given below, * the second argument \a n is the length of the automorphism (equal to * get_nof_vertices()) and * the third argument \a aut is the automorphism * (a bijection on {0,...,get_nof_vertices()-1}). * The memory for the automorphism \a aut will be invalidated immediately * after the return from the hook function; * if you want to use the automorphism later, you have to take a copy of it. * Do not call any member functions in the hook. * The search statistics are copied in \a stats. */ //void find_automorphisms(Stats& stats, void (*hook)(void* user_param, unsigned int n, const unsigned int* aut), void* hook_user_param); void find_automorphisms(Stats& stats, void (*hook)(void *user_param, unsigned int n, const unsigned int *aut), void *user_param) { report_hook = hook; report_user_param = user_param; search(false, stats); if(first_path_labeling) { free(first_path_labeling); first_path_labeling = 0; } if(best_path_labeling) { free(best_path_labeling); best_path_labeling = 0; } } /** * Otherwise the same as find_automorphisms() except that * a canonical labeling of the graph (a bijection on * {0,...,get_nof_vertices()-1}) is returned. * The memory allocated for the returned canonical labeling will remain * valid only until the next call to a member function with the exception * that constant member functions (for example, bliss::Graph::permute()) can * be called without invalidating the labeling. * To compute the canonical version of an undirected graph, call this * function and then bliss::Graph::permute() with the returned canonical * labeling. * Note that the computed canonical version may depend on the applied version * of bliss as well as on some other options (for instance, the splitting * heuristic selected with bliss::Graph::set_splitting_heuristic()). */ //const unsigned int* canonical_form(Stats& stats, void (*hook)(void* user_param, unsigned int n, const unsigned int* aut), void* hook_user_param); const unsigned * canonical_form(Stats& stats, void (*hook)(void *user_param, unsigned int n, const unsigned int *aut), void *user_param) { report_hook = hook; report_user_param = user_param; search(true, stats); return best_path_labeling; } /** * Write the graph to a file in a variant of the DIMACS format. * See the bliss website * for the definition of the file format. * Note that in the DIMACS file the vertices are numbered from 1 to N while * in this C++ API they are from 0 to N-1. * Thus the vertex n in the file corresponds to the vertex n-1 in the API. * \param fp the file stream where the graph is written */ virtual void write_dimacs(FILE * const fp) = 0; /** * Write the graph to a file in the graphviz dotty format. * \param fp the file stream where the graph is written */ virtual void write_dot(FILE * const fp) = 0; /** * Write the graph in a file in the graphviz dotty format. * Do nothing if the file cannot be written. * \param file_name the name of the file to which the graph is written */ virtual void write_dot(const char * const file_name) = 0; /** * Get a hash value for the graph. * \return the hash value */ virtual unsigned int get_hash() = 0; /** * Disable/enable the "long prune" method. * The choice affects the computed canonical labelings; * therefore, if you want to compare whether two graphs are isomorphic by * computing and comparing (for equality) their canonical versions, * be sure to use the same choice for both graphs. * May not be called during the search, i.e. from an automorphism reporting * hook function. * \param active if true, activate "long prune", deactivate otherwise */ void set_long_prune_activity(const bool active) { assert(!in_search); opt_use_long_prune = active; } protected: /** \internal * How much verbose output is produced (0 means none) */ unsigned int verbose_level; /** \internal * The output stream for verbose output. */ FILE *verbstr; protected: /** \internal * The ordered partition used in the search algorithm. */ Partition p; /** \internal * Whether the search for automorphisms and a canonical labeling is * in progress. */ bool in_search; /** \internal * Is failure recording in use? */ bool opt_use_failure_recording; /* The "tree-specific" invariant value for the point when current path * got different from the first path */ unsigned int failure_recording_fp_deviation; /** \internal * Is component recursion in use? */ bool opt_use_comprec; unsigned int refine_current_path_certificate_index; bool refine_compare_certificate; bool refine_equal_to_first; unsigned int refine_first_path_subcertificate_end; int refine_cmp_to_best; unsigned int refine_best_path_subcertificate_end; static const unsigned int CERT_SPLIT = 0; //UINT_MAX; static const unsigned int CERT_EDGE = 1; //UINT_MAX-1; /** \internal * Add a triple (v1,v2,v3) in the certificate. * May modify refine_equal_to_first and refine_cmp_to_best. * May also update eqref_hash and failure_recording_fp_deviation. */ //void cert_add(const unsigned int v1, const unsigned int v2, const unsigned int v3); // Certificate building void cert_add(const unsigned int v1, const unsigned int v2, const unsigned int v3) { if(refine_compare_certificate) { if(refine_equal_to_first) { /* So far equivalent to the first path... */ unsigned int index = certificate_current_path.size(); if(index >= refine_first_path_subcertificate_end) { refine_equal_to_first = false; } else if(certificate_first_path[index] != v1) { refine_equal_to_first = false; } else if(certificate_first_path[++index] != v2) { refine_equal_to_first = false; } else if(certificate_first_path[++index] != v3) { refine_equal_to_first = false; } if(opt_use_failure_recording and !refine_equal_to_first) { /* We just became different from the first path, * remember the deviation point tree-specific invariant * for the use of failure recording */ UintSeqHash h; h.update(v1); h.update(v2); h.update(v3); h.update(index); h.update(eqref_hash.get_value()); failure_recording_fp_deviation = h.get_value(); } } if(refine_cmp_to_best == 0) { /* So far equivalent to the current best path... */ unsigned int index = certificate_current_path.size(); if(index >= refine_best_path_subcertificate_end) { refine_cmp_to_best = 1; } else if(v1 > certificate_best_path[index]) { refine_cmp_to_best = 1; } else if(v1 < certificate_best_path[index]) { refine_cmp_to_best = -1; } else if(v2 > certificate_best_path[++index]) { refine_cmp_to_best = 1; } else if(v2 < certificate_best_path[index]) { refine_cmp_to_best = -1; } else if(v3 > certificate_best_path[++index]) { refine_cmp_to_best = 1; } else if(v3 < certificate_best_path[index]) { refine_cmp_to_best = -1; } } if((refine_equal_to_first == false) and (refine_cmp_to_best < 0)) return; } /* Update the current path certificate */ certificate_current_path.push_back(v1); certificate_current_path.push_back(v2); certificate_current_path.push_back(v3); } /** \internal * Add a redundant triple (v1,v2,v3) in the certificate. * Can also just dicard the triple. * May modify refine_equal_to_first and refine_cmp_to_best. * May also update eqref_hash and failure_recording_fp_deviation. */ //void cert_add_redundant(const unsigned int x, const unsigned int y, const unsigned int z); void cert_add_redundant(const unsigned int v1, const unsigned int v2, const unsigned int v3) { return cert_add(v1, v2, v3); } /**\internal * Is the long prune method in use? */ bool opt_use_long_prune; /**\internal * Maximum amount of memory (in megabytes) available for * the long prune method */ static const unsigned int long_prune_options_max_mem = 50; /**\internal * Maximum amount of automorphisms stored for the long prune method; * less than this is stored if the memory limit above is reached first */ static const unsigned int long_prune_options_max_stored_auts = 100; unsigned int long_prune_max_stored_autss; std::vector *> long_prune_fixed; std::vector *> long_prune_mcrs; std::vector long_prune_temp; unsigned int long_prune_begin; unsigned int long_prune_end; /** \internal * Initialize the "long prune" data structures. */ //void long_prune_init(); /** \internal * Release the memory allocated for "long prune" data structures. */ //void long_prune_deallocate(); //void long_prune_add_automorphism(const unsigned int *aut); //std::vector& long_prune_get_fixed(const unsigned int index); //std::vector& long_prune_allocget_fixed(const unsigned int index); //std::vector& long_prune_get_mcrs(const unsigned int index); //std::vector& long_prune_allocget_mcrs(const unsigned int index); /** \internal * Swap the i:th and j:th stored automorphism information; * i and j must be "in window, i.e. in [long_prune_begin,long_prune_end[ */ //void long_prune_swap(const unsigned int i, const unsigned int j); //Long prune code void long_prune_init() { const unsigned int N = get_nof_vertices(); long_prune_temp.clear(); long_prune_temp.resize(N); /* Of how many automorphisms we can store information in the predefined, fixed amount of memory? */ const unsigned int nof_fitting_in_max_mem = (long_prune_options_max_mem * 1024 * 1024) / (((N * 2) / 8)+1); long_prune_max_stored_autss = long_prune_options_max_stored_auts; /* Had some problems with g++ in using (a* tmp = long_prune_fixed[real_i]; long_prune_fixed[real_i] = long_prune_fixed[real_j]; long_prune_fixed[real_j] = tmp; tmp = long_prune_mcrs[real_i]; long_prune_mcrs[real_i] = long_prune_mcrs[real_j]; long_prune_mcrs[real_j] = tmp; } std::vector& long_prune_allocget_fixed(const unsigned int index) { const unsigned int i = index % long_prune_max_stored_autss; if(!long_prune_fixed[i]) long_prune_fixed[i] = new std::vector(get_nof_vertices()); return *long_prune_fixed[i]; } std::vector& long_prune_get_fixed(const unsigned int index) { return *long_prune_fixed[index % long_prune_max_stored_autss]; } std::vector& long_prune_allocget_mcrs(const unsigned int index) { const unsigned int i = index % long_prune_max_stored_autss; if(!long_prune_mcrs[i]) long_prune_mcrs[i] = new std::vector(get_nof_vertices()); return *long_prune_mcrs[i]; } std::vector& long_prune_get_mcrs(const unsigned int index) { return *long_prune_mcrs[index % long_prune_max_stored_autss]; } void long_prune_add_automorphism(const unsigned int* aut) { if(long_prune_max_stored_autss == 0) return; const unsigned int N = get_nof_vertices(); /* If the buffer of stored auts is full, remove the oldest aut */ if(long_prune_end - long_prune_begin == long_prune_max_stored_autss) { long_prune_begin++; } long_prune_end++; std::vector& fixed = long_prune_allocget_fixed(long_prune_end-1); std::vector& mcrs = long_prune_allocget_mcrs(long_prune_end-1); /* Mark nodes that are (i) fixed or (ii) minimal orbit representatives * under the automorphism 'aut' */ for(unsigned int i = 0; i < N; i++) { fixed[i] = (aut[i] == i); if(long_prune_temp[i] == false) { mcrs[i] = true; unsigned int j = aut[i]; while(j != i) { long_prune_temp[j] = true; j = aut[j]; } } else { mcrs[i] = false; } /* Clear the temp array on-the-fly... */ long_prune_temp[i] = false; } } /* * Data structures and routines for refining the partition p into equitable */ Heap neighbour_heap; virtual bool split_neighbourhood_of_unit_cell(Partition::Cell *) = 0; virtual bool split_neighbourhood_of_cell(Partition::Cell * const) = 0; //void refine_to_equitable(); //void refine_to_equitable(Partition::Cell * const unit_cell); //void refine_to_equitable(Partition::Cell * const unit_cell1, Partition::Cell * const unit_cell2); void refine_to_equitable() { /* Start refinement from all cells -> push 'em all in the splitting queue */ for(Partition::Cell* cell = p.first_cell; cell; cell = cell->next) p.splitting_queue_add(cell); do_refine_to_equitable(); } void refine_to_equitable(Partition::Cell* const unit_cell) { p.splitting_queue_add(unit_cell); do_refine_to_equitable(); } void refine_to_equitable(Partition::Cell* const unit_cell1, Partition::Cell* const unit_cell2) { p.splitting_queue_add(unit_cell1); p.splitting_queue_add(unit_cell2); do_refine_to_equitable(); } /** \internal * \return false if it was detected that the current certificate * is different from the first and/or best (whether this is checked * depends on in_search and refine_compare_certificate flags. */ //bool do_refine_to_equitable(); bool do_refine_to_equitable() { eqref_hash.reset(); while(!p.splitting_queue_is_empty()) { Partition::Cell* const cell = p.splitting_queue_pop(); if(cell->is_unit()) { if(in_search) { const unsigned int index = cell->first; if(first_path_automorphism) { /* Build the (potential) automorphism on-the-fly */ first_path_automorphism[first_path_labeling_inv[index]] = p.elements[index]; } if(best_path_automorphism) { /* Build the (potential) automorphism on-the-fly */ best_path_automorphism[best_path_labeling_inv[index]] = p.elements[index]; } } const bool worse = split_neighbourhood_of_unit_cell(cell); if(in_search and worse) goto worse_exit; } else { const bool worse = split_neighbourhood_of_cell(cell); if(in_search and worse) goto worse_exit; } } return true; worse_exit: /* Clear splitting_queue */ p.splitting_queue_clear(); return false; } unsigned int eqref_max_certificate_index; /** \internal * Whether eqref_hash is updated during equitable refinement process. */ bool compute_eqref_hash; UintSeqHash eqref_hash; /** \internal * Check whether the current partition p is equitable. * Performance: very slow, use only for debugging purposes. */ virtual bool is_equitable() const = 0; unsigned int *first_path_labeling; unsigned int *first_path_labeling_inv; Orbit first_path_orbits; unsigned int *first_path_automorphism; unsigned int *best_path_labeling; unsigned int *best_path_labeling_inv; Orbit best_path_orbits; unsigned int *best_path_automorphism; //void update_labeling(unsigned int * const lab); /** \internal * Assign the labeling induced by the current partition 'this.p' to * \a labeling. * That is, if the partition is [[2,0],[1]], * then \a labeling will map 0 to 1, 1 to 2, and 2 to 0. */ void update_labeling(unsigned int* const labeling) { const unsigned int N = get_nof_vertices(); unsigned int* ep = p.elements; for(unsigned int i = 0; i < N; i++, ep++) labeling[*ep] = i; } //void update_labeling_and_its_inverse(unsigned int * const lab, unsigned int * const lab_inv); /** \internal * The same as update_labeling() except that the inverse of the labeling * is also produced and assigned to \a labeling_inv. */ void update_labeling_and_its_inverse(unsigned int* const labeling, unsigned int* const labeling_inv) { const unsigned int N = get_nof_vertices(); unsigned int* ep = p.elements; unsigned int* clip = labeling_inv; for(unsigned int i = 0; i < N; ) { labeling[*ep] = i; i++; *clip = *ep; ep++; clip++; } } void update_orbit_information(Orbit &o, const unsigned int *perm) { const unsigned int N = get_nof_vertices(); for(unsigned int i = 0; i < N; i++) if(perm[i] != i) o.merge_orbits(i, perm[i]); } //void reset_permutation(unsigned int *perm); /* Mainly for debugging purposes */ //virtual bool is_automorphism(unsigned int* const perm); // \internal // Reset the permutation \a perm to the identity permutation. void reset_permutation(unsigned int* perm) { const unsigned int N = get_nof_vertices(); for(unsigned int i = 0; i < N; i++, perm++) *perm = i; } virtual bool is_automorphism(unsigned int* const perm) { _INTERNAL_ERROR(); return false; } std::vector certificate_current_path; std::vector certificate_first_path; std::vector certificate_best_path; unsigned int certificate_index; virtual void initialize_certificate() = 0; virtual void remove_duplicate_edges() = 0; virtual void make_initial_equitable_partition() = 0; virtual Partition::Cell* find_next_cell_to_be_splitted(Partition::Cell *cell) = 0; //void search(const bool canonical, Stats &stats); #include "search.h" void (*report_hook)(void *user_param, unsigned int n, const unsigned int *aut); void *report_user_param; /* * * Nonuniform component recursion (NUCR) * */ /** The currently traversed component */ unsigned int cr_level; /** \internal * The "Component End Point" data structure */ class CR_CEP { public: /** At which level in the search was this CEP created */ unsigned int creation_level; /** The current component has been fully traversed when the partition has * this many discrete cells left */ unsigned int discrete_cell_limit; /** The component to be traversed after the current one */ unsigned int next_cr_level; /** The next component end point */ unsigned int next_cep_index; bool first_checked; bool best_checked; }; /** \internal * A stack for storing Component End Points */ std::vector cr_cep_stack; /** \internal * Find the first non-uniformity component at the component recursion * level \a level. * The component is stored in \a cr_component. * If no component is found, \a cr_component is empty. * Returns false if all the cells in the component recursion level \a level * were discrete. * Modifies the max_ival and max_ival_count fields of Partition:Cell * (assumes that they are 0 when called and * quarantees that they are 0 when returned). */ virtual bool nucr_find_first_component(const unsigned int level) = 0; virtual bool nucr_find_first_component(const unsigned int level, std::vector& component, unsigned int& component_elements, Partition::Cell*& sh_return) = 0; /** \internal * The non-uniformity component found by nucr_find_first_component() * is stored here. */ std::vector cr_component; /** \internal * The number of vertices in the component \a cr_component */ unsigned int cr_component_elements; }; // Assumes that the elements in the cell are sorted according to their invariant values. Partition::Cell* Partition::split_cell(Partition::Cell* const original_cell) { Partition::Cell* cell = original_cell; const bool original_cell_was_in_splitting_queue = original_cell->in_splitting_queue; Partition::Cell* largest_new_cell = 0; while(true) { unsigned int* ep = elements + cell->first; const unsigned int* const lp = ep + cell->length; const unsigned int ival = invariant_values[*ep]; invariant_values[*ep] = 0; element_to_cell_map[*ep] = cell; in_pos[*ep] = ep; ep++; while(ep < lp) { const unsigned int e = *ep; if(invariant_values[e] != ival) break; invariant_values[e] = 0; in_pos[e] = ep; ep++; element_to_cell_map[e] = cell; } if(ep == lp) break; Partition::Cell* const new_cell = aux_split_in_two(cell, (ep - elements) - cell->first); if(graph and graph->compute_eqref_hash) { graph->eqref_hash.update(new_cell->first); graph->eqref_hash.update(new_cell->length); graph->eqref_hash.update(ival); } /* Add cells in splitting_queue */ assert(!new_cell->is_in_splitting_queue()); if(original_cell_was_in_splitting_queue) { /* In this case, all new cells are inserted in splitting_queue */ assert(cell->is_in_splitting_queue()); splitting_queue_add(new_cell); } else { /* Otherwise, we can omit one new cell from splitting_queue */ assert(!cell->is_in_splitting_queue()); if(largest_new_cell == 0) { largest_new_cell = cell; } else { assert(!largest_new_cell->is_in_splitting_queue()); if(cell->length > largest_new_cell->length) { splitting_queue_add(largest_new_cell); largest_new_cell = cell; } else { splitting_queue_add(cell); } } } /* Process the rest of the cell */ cell = new_cell; } if(original_cell == cell) { /* All the elements in cell had the same invariant value */ return cell; } /* Add cells in splitting_queue */ if(!original_cell_was_in_splitting_queue) { /* Also consider the last new cell */ assert(largest_new_cell); if(cell->length > largest_new_cell->length) { splitting_queue_add(largest_new_cell); largest_new_cell = cell; } else { splitting_queue_add(cell); } if(largest_new_cell->is_unit()) { /* Needed in certificate computation */ splitting_queue_add(largest_new_cell); } } return cell; } } #endif ================================================ FILE: external/bliss/bliss/bignum.hh ================================================ #ifndef BLISS_BIGNUM_HH #define BLISS_BIGNUM_HH /* Copyright (c) 2003-2015 Tommi Junttila Released under the GNU Lesser General Public License version 3. This file is part of bliss. bliss is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, version 3 of the License. bliss is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with bliss. If not, see . */ #if defined(BLISS_USE_GMP) #include #endif #include #include #include "defs.hh" namespace bliss { /** * \brief A very simple class for big integers (or approximation of them). * * If the compile time flag BLISS_USE_GMP is set, * then the GNU Multiple Precision Arithmetic library (GMP) is used to * obtain arbitrary precision, otherwise "long double" is used to * approximate big integers. */ #if defined(BLISS_USE_GMP) class BigNum { mpz_t v; public: /** * Create a new big number and set it to zero. */ BigNum() {mpz_init(v); } /** * Destroy the number. */ ~BigNum() {mpz_clear(v); } /** * Set the number to \a n. */ void assign(const int n) {mpz_set_si(v, n); } /** * Multiply the number with \a n. */ void multiply(const int n) {mpz_mul_si(v, v, n); } /** * Print the number in the file stream \a fp. */ size_t print(FILE* const fp) const {return mpz_out_str(fp, 10, v); } }; #else class BigNum { long double v; public: /** * Create a new big number and set it to zero. */ BigNum(): v(0.0) {} /** * Set the number to \a n. */ void assign(const int n) {v = (long double)n; } /** * Multiply the number with \a n. */ void multiply(const int n) {v *= (long double)n; } /** * Print the number in the file stream \a fp. */ size_t print(FILE* const fp) const {return fprintf(fp, "%Lg", v); } }; #endif } //namespace bliss #endif ================================================ FILE: external/bliss/bliss/defs.hh ================================================ #ifndef BLISS_DEFS_HH #define BLISS_DEFS_HH #include #include /* Copyright (c) 2003-2015 Tommi Junttila Released under the GNU Lesser General Public License version 3. This file is part of bliss. bliss is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, version 3 of the License. bliss is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with bliss. If not, see . */ namespace bliss { /** * The version number of bliss. */ static const char * const version = "0.73"; /* * If a fatal error (out of memory, internal error) is encountered, * this function is called. * There should not be a return from this function but exit or * a jump to code that deallocates the AbstractGraph instance that called this. */ void fatal_error(const char* fmt, ...); #if defined(BLISS_DEBUG) #define BLISS_CONSISTENCY_CHECKS #define BLISS_EXPENSIVE_CONSISTENCY_CHECKS #endif #if defined(BLISS_CONSISTENCY_CHECKS) /* Force a check that the found automorphisms are valid */ #define BLISS_VERIFY_AUTOMORPHISMS #endif #if defined(BLISS_CONSISTENCY_CHECKS) /* Force a check that the generated partitions are equitable */ #define BLISS_VERIFY_EQUITABLEDNESS #endif } // namespace bliss /*! \mainpage Bliss * * \section intro_sec Introduction * * This is the source code documentation of bliss, * produced by running doxygen in * the source directory. * The algorithms and data structures used in bliss are documented in * the papers found at the * bliss web site. * * * \section compile_sec Compiling * * Compiling bliss in Linux should be easy, just execute * \code * make * \endcode * in the bliss source directory. * This will produce the executable program \c bliss as well as * the library file \c libbliss.a that can be linked in other programs. * If you have the GNU Multiple Precision * Arithmetic Library (GMP) installed in your machine, you can also use * \code * make gmp * \endcode * to enable exact computation of automorphism group sizes. * * When linking the bliss library \c libbliss.a in other programs, * remember to include the standard c++ library * (and the GMP library if you compiled bliss to include it). * For instance, * \code gcc -o test test.c -lstdc++ -lgmp -lbliss\endcode * * \section cppapi_sec The C++ language API * * The C++ language API is the main API to bliss; * all other APIs are just more or less complete variants of it. * The C++ API consists basically of the public methods in * the classes bliss::AbstractGraph, bliss::Graph, and bliss::Digraph. * For an example of its use, * see the \ref executable "source of the bliss executable". * * * \section capi_sec The C language API * * The C language API is given in the file bliss_C.h. * It is currently more restricted than the C++ API so * consider using the C++ API whenever possible. */ #endif ================================================ FILE: external/bliss/bliss/graph.hh ================================================ #ifndef BLISS_GRAPH_HH #define BLISS_GRAPH_HH #include "abgraph.hh" namespace bliss { #ifdef USE_DOMAIN typedef std::pair IndexEdge; #else typedef unsigned IndexEdge; #endif #if defined(BLISS_CONSISTENCY_CHECKS) static bool is_permutation(const unsigned int N, const unsigned int* perm) { if(N == 0) return true; std::vector m(N, false); for(unsigned int i = 0; i < N; i++) { if(perm[i] >= N) return false; if(m[perm[i]]) return false; m[perm[i]] = true; } return true; } #endif static bool is_permutation(const std::vector& perm) { const unsigned int N = perm.size(); if(N == 0) return true; std::vector m(N, false); for(unsigned int i = 0; i < N; i++) { if(perm[i] >= N) return false; if(m[perm[i]]) return false; m[perm[i]] = true; } return true; } // \brief The class for undirected, vertex colored graphs. // Multiple edges between vertices are not allowed (i.e., are ignored). class Graph : public AbstractGraph { public: /** * The possible splitting heuristics. * The selected splitting heuristics affects the computed canonical * labelings; therefore, if you want to compare whether two graphs * are isomorphic by computing and comparing (for equality) their * canonical versions, be sure to use the same splitting heuristics * for both graphs. */ typedef enum { /** First non-unit cell. * Very fast but may result in large search spaces on difficult graphs. * Use for large but easy graphs. */ shs_f = 0, /** First smallest non-unit cell. * Fast, should usually produce smaller search spaces than shs_f. */ shs_fs, /** First largest non-unit cell. * Fast, should usually produce smaller search spaces than shs_f. */ shs_fl, /** First maximally non-trivially connected non-unit cell. * Not so fast, should usually produce smaller search spaces than shs_f, * shs_fs, and shs_fl. */ shs_fm, /** First smallest maximally non-trivially connected non-unit cell. * Not so fast, should usually produce smaller search spaces than shs_f, * shs_fs, and shs_fl. */ shs_fsm, /** First largest maximally non-trivially connected non-unit cell. * Not so fast, should usually produce smaller search spaces than shs_f, * shs_fs, and shs_fl. */ shs_flm } SplittingHeuristic; //moved from protected scope by Zhiqiang class Vertex { public: Vertex() { color = 0;} ~Vertex(){ ; } #ifdef USE_DOMAIN void add_edge(const unsigned other_vertex, Index index) { edges.push_back(std::make_pair(other_vertex, index)); #else void add_edge(const unsigned other_vertex) { edges.push_back(other_vertex); #endif } void remove_duplicate_edges(std::vector& tmp) { #if defined(BLISS_CONSISTENCY_CHECKS) /* Pre-conditions */ for(unsigned int i = 0; i < tmp.size(); i++) assert(tmp[i] == false); #endif for(std::vector::iterator iter = edges.begin(); iter != edges.end(); ) { #ifdef USE_DOMAIN const unsigned int dest_vertex = iter->first; //cxh #else const unsigned int dest_vertex = *iter; #endif if(tmp[dest_vertex] == true) { /* A duplicate edge found! */ iter = edges.erase(iter); } else { /* Not seen earlier, mark as seen */ tmp[dest_vertex] = true; iter++; } } /* Clear tmp */ for(std::vector::iterator iter = edges.begin(); iter != edges.end(); iter++) { #ifdef USE_DOMAIN tmp[iter->first] = false;// cxh #else tmp[*iter] = false; #endif } #if defined(BLISS_CONSISTENCY_CHECKS) /* Post-conditions */ for(unsigned int i = 0; i < tmp.size(); i++) assert(tmp[i] == false); #endif } void sort_edges() { std::sort(edges.begin(), edges.end()); } unsigned color; //std::vector edges; std::vector edges; // cxh: add the edge ids from the embedding unsigned nof_edges() const {return edges.size(); } }; //added by Zhiqiang std::vector & get_vertices_rstream() { return vertices; } void sort_edges_rstream() { sort_edges(); } protected: std::vector vertices; void sort_edges() { for(unsigned int i = 0; i < get_nof_vertices(); i++) vertices[i].sort_edges(); } void remove_duplicate_edges() { std::vector tmp(vertices.size(), false); for(std::vector::iterator vi = vertices.begin(); vi != vertices.end(); vi++) { #if defined(BLISS_EXPENSIVE_CONSISTENCY_CHECKS) for(unsigned int i = 0; i < tmp.size(); i++) assert(tmp[i] == false); #endif (*vi).remove_duplicate_edges(tmp); } } // \internal Partition independent invariant. // Return the color of the vertex. Time complexity: O(1) static unsigned int vertex_color_invariant(const Graph* const g, const unsigned int v) { return g->vertices[v].color; } /** \internal * Partition independent invariant. * Returns the degree of the vertex. * DUPLICATE EDGES MUST HAVE BEEN REMOVED BEFORE. * Time complexity: O(1). */ // Return the degree of the vertex. Time complexity: O(1) static unsigned int degree_invariant(const Graph* const g, const unsigned int v) { return g->vertices[v].nof_edges(); } /** \internal * Partition independent invariant. * Returns 1 if there is an edge from the vertex to itself, 0 if not. * Time complexity: O(k), where k is the number of edges leaving the vertex. */ // Return 1 if the vertex v has a self-loop, 0 otherwise // Time complexity: O(E_v), where E_v is the number of edges leaving v static unsigned selfloop_invariant(const Graph* const g, const unsigned int v) { const Vertex& vertex = g->vertices[v]; for(std::vector::const_iterator ei = vertex.edges.begin(); ei != vertex.edges.end(); ei++) { #ifdef USE_DOMAIN if(ei->first == v) return 1; // cxh #else if(*ei == v) return 1; #endif } return 0; } // Refine the partition p according to a partition independent invariant bool refine_according_to_invariant(unsigned int (*inv)(const Graph* const g, const unsigned int v)) { bool refined = false; for(Partition::Cell* cell = p.first_nonsingleton_cell; cell; ) { Partition::Cell* const next_cell = cell->next_nonsingleton; const unsigned int* ep = p.elements + cell->first; for(unsigned int i = cell->length; i > 0; i--, ep++) { const unsigned int ival = inv(this, *ep); p.invariant_values[*ep] = ival; if(ival > cell->max_ival) { cell->max_ival = ival; cell->max_ival_count = 1; } else if(ival == cell->max_ival) { cell->max_ival_count++; } } Partition::Cell* const last_new_cell = p.zplit_cell(cell, true); refined |= (last_new_cell != cell); cell = next_cell; } return refined; } // Routines needed when refining the partition p into equitable // Split the neighbourhood of a cell according to the equitable invariant bool split_neighbourhood_of_cell(Partition::Cell* const cell) { const bool was_equal_to_first = refine_equal_to_first; if(compute_eqref_hash) { eqref_hash.update(cell->first); eqref_hash.update(cell->length); } const unsigned int* ep = p.elements + cell->first; for(unsigned int i = cell->length; i > 0; i--) { const Vertex& v = vertices[*ep++]; std::vector::const_iterator ei = v.edges.begin(); for(unsigned int j = v.nof_edges(); j != 0; j--) { #ifdef USE_DOMAIN const unsigned int dest_vertex = (ei++)->first; // cxh #else const unsigned int dest_vertex = *ei++; #endif Partition::Cell * const neighbour_cell = p.get_cell(dest_vertex); if(neighbour_cell->is_unit()) continue; const unsigned int ival = ++p.invariant_values[dest_vertex]; if(ival > neighbour_cell->max_ival) { neighbour_cell->max_ival = ival; neighbour_cell->max_ival_count = 1; if(ival == 1) { neighbour_heap.insert(neighbour_cell->first); } } else if(ival == neighbour_cell->max_ival) { neighbour_cell->max_ival_count++; } } } while(!neighbour_heap.is_empty()) { const unsigned int start = neighbour_heap.remove(); Partition::Cell * const neighbour_cell = p.get_cell(p.elements[start]); if(compute_eqref_hash) { eqref_hash.update(neighbour_cell->first); eqref_hash.update(neighbour_cell->length); eqref_hash.update(neighbour_cell->max_ival); eqref_hash.update(neighbour_cell->max_ival_count); } Partition::Cell* const last_new_cell = p.zplit_cell(neighbour_cell, true); // Update certificate and hash if needed const Partition::Cell* c = neighbour_cell; while(1) { if(in_search) { // Build certificate cert_add_redundant(CERT_SPLIT, c->first, c->length); // No need to continue? if(refine_compare_certificate and (refine_equal_to_first == false) and (refine_cmp_to_best < 0)) goto worse_exit; } if(compute_eqref_hash) { eqref_hash.update(c->first); eqref_hash.update(c->length); } if(c == last_new_cell) break; c = c->next; } } if(refine_compare_certificate and (refine_equal_to_first == false) and (refine_cmp_to_best < 0)) return true; return false; worse_exit: // Clear neighbour heap UintSeqHash rest; while(!neighbour_heap.is_empty()) { const unsigned int start = neighbour_heap.remove(); Partition::Cell * const neighbour_cell = p.get_cell(p.elements[start]); if(opt_use_failure_recording and was_equal_to_first) { rest.update(neighbour_cell->first); rest.update(neighbour_cell->length); rest.update(neighbour_cell->max_ival); rest.update(neighbour_cell->max_ival_count); } neighbour_cell->max_ival = 0; neighbour_cell->max_ival_count = 0; p.clear_ivs(neighbour_cell); } if(opt_use_failure_recording and was_equal_to_first) { for(unsigned int i = p.splitting_queue.size(); i > 0; i--) { Partition::Cell* const cell = p.splitting_queue.pop_front(); rest.update(cell->first); rest.update(cell->length); p.splitting_queue.push_back(cell); } rest.update(failure_recording_fp_deviation); failure_recording_fp_deviation = rest.get_value(); } return true; } bool split_neighbourhood_of_unit_cell(Partition::Cell* const unit_cell) { const bool was_equal_to_first = refine_equal_to_first; if(compute_eqref_hash) { eqref_hash.update(0x87654321); eqref_hash.update(unit_cell->first); eqref_hash.update(1); } const Vertex& v = vertices[p.elements[unit_cell->first]]; std::vector::const_iterator ei = v.edges.begin(); for(unsigned int j = v.nof_edges(); j > 0; j--) { #ifdef USE_DOMAIN const unsigned int dest_vertex = (ei++)->first; // cxh #else const unsigned int dest_vertex = *ei++; #endif Partition::Cell * const neighbour_cell = p.get_cell(dest_vertex); if(neighbour_cell->is_unit()) { if(in_search) { /* Remember neighbour in order to generate certificate */ neighbour_heap.insert(neighbour_cell->first); } continue; } if(neighbour_cell->max_ival_count == 0) { neighbour_heap.insert(neighbour_cell->first); } neighbour_cell->max_ival_count++; unsigned int * const swap_position = p.elements + neighbour_cell->first + neighbour_cell->length - neighbour_cell->max_ival_count; *p.in_pos[dest_vertex] = *swap_position; p.in_pos[*swap_position] = p.in_pos[dest_vertex]; *swap_position = dest_vertex; p.in_pos[dest_vertex] = swap_position; } while(!neighbour_heap.is_empty()) { const unsigned int start = neighbour_heap.remove(); Partition::Cell* neighbour_cell = p.get_cell(p.elements[start]); #if defined(BLISS_CONSISTENCY_CHECKS) if(neighbour_cell->is_unit()) { } else { } #endif if(compute_eqref_hash) { eqref_hash.update(neighbour_cell->first); eqref_hash.update(neighbour_cell->length); eqref_hash.update(neighbour_cell->max_ival_count); } if(neighbour_cell->length > 1 and neighbour_cell->max_ival_count != neighbour_cell->length) { Partition::Cell * const new_cell = p.aux_split_in_two(neighbour_cell, neighbour_cell->length - neighbour_cell->max_ival_count); unsigned int *ep = p.elements + new_cell->first; unsigned int * const lp = p.elements+new_cell->first+new_cell->length; while(ep < lp) { p.element_to_cell_map[*ep] = new_cell; ep++; } neighbour_cell->max_ival_count = 0; if(compute_eqref_hash) { /* Update hash */ eqref_hash.update(neighbour_cell->first); eqref_hash.update(neighbour_cell->length); eqref_hash.update(0); eqref_hash.update(new_cell->first); eqref_hash.update(new_cell->length); eqref_hash.update(1); } /* Add cells in splitting_queue */ if(neighbour_cell->is_in_splitting_queue()) { /* Both cells must be included in splitting_queue in order to ensure refinement into equitable partition */ p.splitting_queue_add(new_cell); } else { Partition::Cell *min_cell, *max_cell; if(neighbour_cell->length <= new_cell->length) { min_cell = neighbour_cell; max_cell = new_cell; } else { min_cell = new_cell; max_cell = neighbour_cell; } /* Put the smaller cell in splitting_queue */ p.splitting_queue_add(min_cell); if(max_cell->is_unit()) { /* Put the "larger" cell also in splitting_queue */ p.splitting_queue_add(max_cell); } } /* Update pointer for certificate generation */ neighbour_cell = new_cell; } else { /* neighbour_cell->length == 1 || neighbour_cell->max_ival_count == neighbour_cell->length */ neighbour_cell->max_ival_count = 0; } /* * Build certificate if required */ if(in_search) { for(unsigned int i = neighbour_cell->first, j = neighbour_cell->length; j > 0; j--, i++) { /* Build certificate */ cert_add(CERT_EDGE, unit_cell->first, i); /* No need to continue? */ if(refine_compare_certificate and (refine_equal_to_first == false) and (refine_cmp_to_best < 0)) goto worse_exit; } } /* if(in_search) */ } /* while(!neighbour_heap.is_empty()) */ if(refine_compare_certificate and (refine_equal_to_first == false) and (refine_cmp_to_best < 0)) return true; return false; worse_exit: /* Clear neighbour heap */ UintSeqHash rest; while(!neighbour_heap.is_empty()) { const unsigned int start = neighbour_heap.remove(); Partition::Cell * const neighbour_cell = p.get_cell(p.elements[start]); if(opt_use_failure_recording and was_equal_to_first) { rest.update(neighbour_cell->first); rest.update(neighbour_cell->length); rest.update(neighbour_cell->max_ival_count); } neighbour_cell->max_ival_count = 0; } if(opt_use_failure_recording and was_equal_to_first) { rest.update(failure_recording_fp_deviation); failure_recording_fp_deviation = rest.get_value(); } return true; } // Build the initial equitable partition void make_initial_equitable_partition() { refine_according_to_invariant(&vertex_color_invariant); p.splitting_queue_clear(); //p.print_signature(stderr); fprintf(stderr, "\n"); refine_according_to_invariant(&selfloop_invariant); p.splitting_queue_clear(); //p.print_signature(stderr); fprintf(stderr, "\n"); refine_according_to_invariant(°ree_invariant); p.splitting_queue_clear(); //p.print_signature(stderr); fprintf(stderr, "\n"); refine_to_equitable(); //p.print_signature(stderr); fprintf(stderr, "\n"); } // \internal // \copydoc AbstractGraph::is_equitable() const //Check whether the current partition p is equitable. //Performance: very slow, use only for debugging purposes. bool is_equitable() const { const unsigned int N = get_nof_vertices(); if(N == 0) return true; std::vector first_count = std::vector(N, 0); std::vector other_count = std::vector(N, 0); for(Partition::Cell *cell = p.first_cell; cell; cell = cell->next) { if(cell->is_unit()) continue; unsigned int *ep = p.elements + cell->first; const Vertex &first_vertex = vertices[*ep++]; /* Count how many edges lead from the first vertex to * the neighbouring cells */ for(std::vector::const_iterator ei = first_vertex.edges.begin(); ei != first_vertex.edges.end(); ei++) { #ifdef USE_DOMAIN first_count[p.get_cell(ei->first)->first]++; // cxh #else first_count[p.get_cell(*ei)->first]++; #endif } /* Count and compare to the edges of the other vertices */ for(unsigned int i = cell->length; i > 1; i--) { const Vertex &vertex = vertices[*ep++]; for(std::vector::const_iterator ei = vertex.edges.begin(); ei != vertex.edges.end(); ei++) { #ifdef USE_DOMAIN other_count[p.get_cell(ei->first)->first]++; // cxh #else other_count[p.get_cell(*ei)->first]++; #endif } for(Partition::Cell *cell2 = p.first_cell; cell2; cell2 = cell2->next) { if(first_count[cell2->first] != other_count[cell2->first]) { /* Not equitable */ return false; } other_count[cell2->first] = 0; } } /* Reset first_count */ for(unsigned int i = 0; i < N; i++) first_count[i] = 0; } return true; } /* Splitting heuristics, documented in more detail in graph.cc */ SplittingHeuristic sh; // Find the next cell to be splitted Partition::Cell* find_next_cell_to_be_splitted(Partition::Cell* cell) { switch(sh) { case shs_f: return sh_first(); case shs_fs: return sh_first_smallest(); case shs_fl: return sh_first_largest(); case shs_fm: return sh_first_max_neighbours(); case shs_fsm: return sh_first_smallest_max_neighbours(); case shs_flm: return sh_first_largest_max_neighbours(); default: fatal_error("Internal error - unknown splitting heuristics"); return 0; } } // \internal // A splitting heuristic. // Returns the first nonsingleton cell in the current partition. Partition::Cell* sh_first() { Partition::Cell* best_cell = 0; for(Partition::Cell* cell = p.first_nonsingleton_cell; cell; cell = cell->next_nonsingleton) { if(opt_use_comprec and p.cr_get_level(cell->first) != cr_level) continue; best_cell = cell; break; } return best_cell; } // \internal A splitting heuristic. // Returns the first smallest nonsingleton cell in the current partition. Partition::Cell* sh_first_smallest() { Partition::Cell* best_cell = 0; unsigned int best_size = UINT_MAX; for(Partition::Cell* cell = p.first_nonsingleton_cell; cell; cell = cell->next_nonsingleton) { if(opt_use_comprec and p.cr_get_level(cell->first) != cr_level) continue; if(cell->length < best_size) { best_size = cell->length; best_cell = cell; } } return best_cell; } // \internal A splitting heuristic. // Returns the first largest nonsingleton cell in the current partition. Partition::Cell* sh_first_largest() { Partition::Cell* best_cell = 0; unsigned int best_size = 0; for(Partition::Cell* cell = p.first_nonsingleton_cell; cell; cell = cell->next_nonsingleton) { if(opt_use_comprec and p.cr_get_level(cell->first) != cr_level) continue; if(cell->length > best_size) { best_size = cell->length; best_cell = cell; } } return best_cell; } // \internal // A splitting heuristic. // Returns the first nonsingleton cell with max number of neighbouring nonsingleton cells. // Assumes that the partition p is equitable. // Assumes that the max_ival fields of the cells are all 0. Partition::Cell* sh_first_max_neighbours() { Partition::Cell* best_cell = 0; int best_value = -1; KStack neighbour_cells_visited; neighbour_cells_visited.init(get_nof_vertices()); for(Partition::Cell* cell = p.first_nonsingleton_cell; cell; cell = cell->next_nonsingleton) { if(opt_use_comprec and p.cr_get_level(cell->first) != cr_level) continue; const Vertex& v = vertices[p.elements[cell->first]]; std::vector::const_iterator ei = v.edges.begin(); for(unsigned int j = v.nof_edges(); j > 0; j--) { #ifdef USE_DOMAIN Partition::Cell * const neighbour_cell = p.get_cell((ei++)->first); // cxh #else Partition::Cell * const neighbour_cell = p.get_cell(*ei++); #endif if(neighbour_cell->is_unit()) continue; neighbour_cell->max_ival++; if(neighbour_cell->max_ival == 1) neighbour_cells_visited.push(neighbour_cell); } int value = 0; while(!neighbour_cells_visited.is_empty()) { Partition::Cell* const neighbour_cell = neighbour_cells_visited.pop(); if(neighbour_cell->max_ival != neighbour_cell->length) value++; neighbour_cell->max_ival = 0; } if(value > best_value) { best_value = value; best_cell = cell; } } return best_cell; } // \internal A splitting heuristic. // Returns the first smallest nonsingleton cell with max number of neighbouring nonsingleton cells. // Assumes that the partition p is equitable. Assumes that the max_ival fields of the cells are all 0. Partition::Cell* sh_first_smallest_max_neighbours() { Partition::Cell* best_cell = 0; int best_value = -1; unsigned int best_size = UINT_MAX; KStack neighbour_cells_visited; neighbour_cells_visited.init(get_nof_vertices()); for(Partition::Cell* cell = p.first_nonsingleton_cell; cell; cell = cell->next_nonsingleton) { if(opt_use_comprec and p.cr_get_level(cell->first) != cr_level) continue; const Vertex& v = vertices[p.elements[cell->first]]; std::vector::const_iterator ei = v.edges.begin(); for(unsigned int j = v.nof_edges(); j > 0; j--) { #ifdef USE_DOMAIN Partition::Cell* const neighbour_cell = p.get_cell((ei++)->first); // cxh #else Partition::Cell* const neighbour_cell = p.get_cell(*ei++); #endif if(neighbour_cell->is_unit()) continue; neighbour_cell->max_ival++; if(neighbour_cell->max_ival == 1) neighbour_cells_visited.push(neighbour_cell); } int value = 0; while(!neighbour_cells_visited.is_empty()) { Partition::Cell* const neighbour_cell = neighbour_cells_visited.pop(); if(neighbour_cell->max_ival != neighbour_cell->length) value++; neighbour_cell->max_ival = 0; } if((value > best_value) or (value == best_value and cell->length < best_size)) { best_value = value; best_size = cell->length; best_cell = cell; } } return best_cell; } // \internal A splitting heuristic. // Returns the first largest nonsingleton cell with max number of neighbouring nonsingleton cells. // Assumes that the partition p is equitable. Assumes that the max_ival fields of the cells are all 0. Partition::Cell* sh_first_largest_max_neighbours() { Partition::Cell* best_cell = 0; int best_value = -1; unsigned int best_size = 0; KStack neighbour_cells_visited; neighbour_cells_visited.init(get_nof_vertices()); for(Partition::Cell* cell = p.first_nonsingleton_cell; cell; cell = cell->next_nonsingleton) { if(opt_use_comprec and p.cr_get_level(cell->first) != cr_level) continue; const Vertex& v = vertices[p.elements[cell->first]]; std::vector::const_iterator ei = v.edges.begin(); for(unsigned int j = v.nof_edges(); j > 0; j--) { #ifdef USE_DOMAIN Partition::Cell* const neighbour_cell = p.get_cell((ei++)->first); // cxh #else Partition::Cell* const neighbour_cell = p.get_cell(*ei++); #endif if(neighbour_cell->is_unit()) continue; neighbour_cell->max_ival++; if(neighbour_cell->max_ival == 1) neighbour_cells_visited.push(neighbour_cell); } int value = 0; while(!neighbour_cells_visited.is_empty()) { Partition::Cell* const neighbour_cell = neighbour_cells_visited.pop(); if(neighbour_cell->max_ival != neighbour_cell->length) value++; neighbour_cell->max_ival = 0; } if((value > best_value) or (value == best_value and cell->length > best_size)) { best_value = value; best_size = cell->length; best_cell = cell; } } return best_cell; } //Initialize the certificate size and memory void initialize_certificate() { certificate_index = 0; certificate_current_path.clear(); certificate_first_path.clear(); certificate_best_path.clear(); } bool is_automorphism(unsigned* const perm) { std::set > edges1; std::set > edges2; #if defined(BLISS_CONSISTENCY_CHECKS) if(!is_permutation(get_nof_vertices(), perm)) _INTERNAL_ERROR(); #endif for(unsigned int i = 0; i < get_nof_vertices(); i++) { Vertex& v1 = vertices[i]; edges1.clear(); for(std::vector::iterator ei = v1.edges.begin(); ei != v1.edges.end(); ei++) #ifdef USE_DOMAIN edges1.insert(perm[ei->first]); // cxh #else edges1.insert(perm[*ei]); #endif Vertex& v2 = vertices[perm[i]]; edges2.clear(); for(std::vector::iterator ei = v2.edges.begin(); ei != v2.edges.end(); ei++) #ifdef USE_DOMAIN edges2.insert(ei->first); // cxh #else edges2.insert(*ei); #endif if(!(edges1 == edges2)) return false; } return true; } bool nucr_find_first_component(const unsigned level) { cr_component.clear(); cr_component_elements = 0; /* Find first non-discrete cell in the component level */ Partition::Cell* first_cell = p.first_nonsingleton_cell; while(first_cell) { if(p.cr_get_level(first_cell->first) == level) break; first_cell = first_cell->next_nonsingleton; } /* The component is discrete, return false */ if(!first_cell) return false; std::vector component; first_cell->max_ival = 1; component.push_back(first_cell); for(unsigned int i = 0; i < component.size(); i++) { Partition::Cell* const cell = component[i]; const Vertex& v = vertices[p.elements[cell->first]]; std::vector::const_iterator ei = v.edges.begin(); for(unsigned int j = v.nof_edges(); j > 0; j--) { #ifdef USE_DOMAIN const unsigned int neighbour = (ei++)->first; // cxh #else const unsigned int neighbour = *ei++; #endif Partition::Cell* const neighbour_cell = p.get_cell(neighbour); /* Skip unit neighbours */ if(neighbour_cell->is_unit()) continue; /* Already marked to be in the same component? */ if(neighbour_cell->max_ival == 1) continue; /* Is the neighbour at the same component recursion level? */ if(p.cr_get_level(neighbour_cell->first) != level) continue; if(neighbour_cell->max_ival_count == 0) neighbour_heap.insert(neighbour_cell->first); neighbour_cell->max_ival_count++; } while(!neighbour_heap.is_empty()) { const unsigned int start = neighbour_heap.remove(); Partition::Cell* const neighbour_cell = p.get_cell(p.elements[start]); /* Skip saturated neighbour cells */ if(neighbour_cell->max_ival_count == neighbour_cell->length) { neighbour_cell->max_ival_count = 0; continue; } neighbour_cell->max_ival_count = 0; neighbour_cell->max_ival = 1; component.push_back(neighbour_cell); } } for(unsigned int i = 0; i < component.size(); i++) { Partition::Cell* const cell = component[i]; cell->max_ival = 0; cr_component.push_back(cell->first); cr_component_elements += cell->length; } if(verbstr and verbose_level > 2) { fprintf(verbstr, "NU-component with %lu cells and %u vertices\n", (long unsigned)cr_component.size(), cr_component_elements); fflush(verbstr); } return true; } bool nucr_find_first_component(const unsigned int level, std::vector& component, unsigned int& component_elements, Partition::Cell*& sh_return) { component.clear(); component_elements = 0; sh_return = 0; unsigned int sh_first = 0; unsigned int sh_size = 0; unsigned int sh_nuconn = 0; /* Find first non-discrete cell in the component level */ Partition::Cell* first_cell = p.first_nonsingleton_cell; while(first_cell) { if(p.cr_get_level(first_cell->first) == level) break; first_cell = first_cell->next_nonsingleton; } if(!first_cell) { /* The component is discrete, return false */ return false; } std::vector comp; KStack neighbours; neighbours.init(get_nof_vertices()); first_cell->max_ival = 1; comp.push_back(first_cell); for(unsigned int i = 0; i < comp.size(); i++) { Partition::Cell* const cell = comp[i]; const Vertex& v = vertices[p.elements[cell->first]]; std::vector::const_iterator ei = v.edges.begin(); for(unsigned int j = v.nof_edges(); j > 0; j--) { #ifdef USE_DOMAIN const unsigned int neighbour = (ei++)->first; // cxh #else const unsigned int neighbour = *ei++; #endif Partition::Cell* const neighbour_cell = p.get_cell(neighbour); /* Skip unit neighbours */ if(neighbour_cell->is_unit()) continue; /* Is the neighbour at the same component recursion level? */ //if(p.cr_get_level(neighbour_cell->first) != level) // continue; if(neighbour_cell->max_ival_count == 0) neighbours.push(neighbour_cell); neighbour_cell->max_ival_count++; } unsigned int nuconn = 1; while(!neighbours.is_empty()) { Partition::Cell* const neighbour_cell = neighbours.pop(); //neighbours.pop_back(); /* Skip saturated neighbour cells */ if(neighbour_cell->max_ival_count == neighbour_cell->length) { neighbour_cell->max_ival_count = 0; continue; } nuconn++; neighbour_cell->max_ival_count = 0; if(neighbour_cell->max_ival == 0) { comp.push_back(neighbour_cell); neighbour_cell->max_ival = 1; } } switch(sh) { case shs_f: if(sh_return == 0 or cell->first <= sh_first) { sh_return = cell; sh_first = cell->first; } break; case shs_fs: if(sh_return == 0 or cell->length < sh_size or (cell->length == sh_size and cell->first <= sh_first)) { sh_return = cell; sh_first = cell->first; sh_size = cell->length; } break; case shs_fl: if(sh_return == 0 or cell->length > sh_size or (cell->length == sh_size and cell->first <= sh_first)) { sh_return = cell; sh_first = cell->first; sh_size = cell->length; } break; case shs_fm: if(sh_return == 0 or nuconn > sh_nuconn or (nuconn == sh_nuconn and cell->first <= sh_first)) { sh_return = cell; sh_first = cell->first; sh_nuconn = nuconn; } break; case shs_fsm: if(sh_return == 0 or nuconn > sh_nuconn or (nuconn == sh_nuconn and (cell->length < sh_size or (cell->length == sh_size and cell->first <= sh_first)))) { sh_return = cell; sh_first = cell->first; sh_size = cell->length; sh_nuconn = nuconn; } break; case shs_flm: if(sh_return == 0 or nuconn > sh_nuconn or (nuconn == sh_nuconn and (cell->length > sh_size or (cell->length == sh_size and cell->first <= sh_first)))) { sh_return = cell; sh_first = cell->first; sh_size = cell->length; sh_nuconn = nuconn; } break; default: fatal_error("Internal error - unknown splitting heuristics"); return 0; } } assert(sh_return); for(unsigned int i = 0; i < comp.size(); i++) { Partition::Cell* const cell = comp[i]; cell->max_ival = 0; component.push_back(cell->first); component_elements += cell->length; } if(verbstr and verbose_level > 2) { fprintf(verbstr, "NU-component with %lu cells and %u vertices\n", (long unsigned)component.size(), component_elements); fflush(verbstr); } return true; } public: // Create a new graph with \a N vertices and no edges. Graph(const unsigned nof_vertices = 0) { vertices.resize(nof_vertices); sh = shs_flm; } /** * Destroy the graph. */ ~Graph() { ; } /** * Read the graph from the file \a fp in a variant of the DIMACS format. * See the bliss website * for the definition of the file format. * Note that in the DIMACS file the vertices are numbered from 1 to N while * in this C++ API they are from 0 to N-1. * Thus the vertex n in the file corresponds to the vertex n-1 in the API. * * \param fp the file stream for the graph file * \param errstr if non-null, the possible error messages are printed * in this file stream * \return a new Graph object or 0 if reading failed for some * reason */ static Graph* read_dimacs(FILE* const fp, FILE* const errstr = stderr) { return NULL; } /** * Write the graph to a file in a variant of the DIMACS format. * See the bliss website * for the definition of the file format. */ void write_dimacs(FILE* const fp) {} // \copydoc AbstractGraph::write_dot(FILE * const fp) void write_dot(FILE* const fp) {} // \copydoc AbstractGraph::write_dot(const char * const file_name) void write_dot(const char* const file_name) {} // \copydoc AbstractGraph::is_automorphism(const std::vector& perm) const bool is_automorphism(const std::vector& perm) const { if(!(perm.size() == get_nof_vertices() and is_permutation(perm))) return false; std::set > edges1; std::set > edges2; for(unsigned i = 0; i < get_nof_vertices(); i++) { const Vertex& v1 = vertices[i]; edges1.clear(); for(std::vector::const_iterator ei = v1.edges.begin(); ei != v1.edges.end(); ei++) #ifdef USE_DOMAIN edges1.insert(perm[ei->first]); // cxh #else edges1.insert(perm[*ei]); #endif const Vertex& v2 = vertices[perm[i]]; edges2.clear(); for(std::vector::const_iterator ei = v2.edges.begin(); ei != v2.edges.end(); ei++) #ifdef USE_DOMAIN edges2.insert(ei->first); // cxh #else edges2.insert(*ei); #endif if(!(edges1 == edges2)) return false; } return true; } // \copydoc AbstractGraph::get_hash() virtual unsigned get_hash() { remove_duplicate_edges(); sort_edges(); UintSeqHash h; h.update(get_nof_vertices()); /* Hash the color of each vertex */ for(unsigned int i = 0; i < get_nof_vertices(); i++) { h.update(vertices[i].color); } /* Hash the edges */ for(unsigned int i = 0; i < get_nof_vertices(); i++) { Vertex &v = vertices[i]; for(std::vector::const_iterator ei = v.edges.begin(); ei != v.edges.end(); ei++) { #ifdef USE_DOMAIN const unsigned int dest_i = ei->first; // cxh #else const unsigned int dest_i = *ei; #endif if(dest_i < i) continue; h.update(i); h.update(dest_i); } } return h.get_value(); } // Return the number of vertices in the graph. unsigned int get_nof_vertices() const {return vertices.size(); } // \copydoc AbstractGraph::permute(const unsigned int* const perm) const Graph* permute(const unsigned* perm) const { #if defined(BLISS_CONSISTENCY_CHECKS) if(!is_permutation(get_nof_vertices(), perm)) _INTERNAL_ERROR(); #endif Graph* const g = new Graph(get_nof_vertices()); for(unsigned i = 0; i < get_nof_vertices(); i++) { const Vertex& v = vertices[i]; Vertex& permuted_v = g->vertices[perm[i]]; permuted_v.color = v.color; for(std::vector::const_iterator ei = v.edges.begin(); ei != v.edges.end(); ei++) { #ifdef USE_DOMAIN const unsigned dest_v = ei->first; //cxh permuted_v.add_edge(perm[dest_v], ei->second); #else const unsigned dest_v = *ei; permuted_v.add_edge(perm[dest_v]); #endif } permuted_v.sort_edges(); } return g; } Graph* permute(const std::vector& perm) const { #if defined(BLISS_CONSISTENCY_CHECKS) #endif Graph* const g = new Graph(get_nof_vertices()); for(unsigned int i = 0; i < get_nof_vertices(); i++) { const Vertex& v = vertices[i]; Vertex& permuted_v = g->vertices[perm[i]]; permuted_v.color = v.color; for(std::vector::const_iterator ei = v.edges.begin(); ei != v.edges.end(); ei++) { #ifdef USE_DOMAIN const unsigned dest_v = ei->first; // cxh permuted_v.add_edge(perm[dest_v], ei->second); #else const unsigned dest_v = *ei; permuted_v.add_edge(perm[dest_v]); #endif } permuted_v.sort_edges(); } return g; } // Add a new vertex with color \a color in the graph and return its index. unsigned add_vertex(const unsigned color = 0) { const unsigned int vertex_num = vertices.size(); vertices.resize(vertex_num + 1); vertices.back().color = color; return vertex_num; } /** * Add an edge between vertices \a v1 and \a v2. * Duplicate edges between vertices are ignored but try to avoid introducing * them in the first place as they are not ignored immediately but will * consume memory and computation resources for a while. */ void add_edge(const unsigned vertex1, const unsigned vertex2, Index index) { //printf("Adding edge (%u -> %u)\n", vertex1, vertex2); #ifdef USE_DOMAIN vertices[vertex1].add_edge(vertex2, index); vertices[vertex2].add_edge(vertex1, std::make_pair(index.second, index.first)); #else vertices[vertex1].add_edge(vertex2); vertices[vertex2].add_edge(vertex1); #endif } // Change the color of the vertex \a vertex to \a color. void change_color(const unsigned vertex, const unsigned color) { vertices[vertex].color = color; } /** * Compare this graph with the graph \a other. * Returns 0 if the graphs are equal, and a negative (positive) integer * if this graph is "smaller than" ("greater than", resp.) than \a other. */ int cmp(Graph& other) { /* Compare the numbers of vertices */ if(get_nof_vertices() < other.get_nof_vertices()) return -1; if(get_nof_vertices() > other.get_nof_vertices()) return 1; /* Compare vertex colors */ for(unsigned i = 0; i < get_nof_vertices(); i++) { if(vertices[i].color < other.vertices[i].color) return -1; if(vertices[i].color > other.vertices[i].color) return 1; } /* Compare vertex degrees */ remove_duplicate_edges(); other.remove_duplicate_edges(); for(unsigned i = 0; i < get_nof_vertices(); i++) { if(vertices[i].nof_edges() < other.vertices[i].nof_edges()) return -1; if(vertices[i].nof_edges() > other.vertices[i].nof_edges()) return 1; } /* Compare edges */ for(unsigned i = 0; i < get_nof_vertices(); i++) { Vertex &v1 = vertices[i]; Vertex &v2 = other.vertices[i]; v1.sort_edges(); v2.sort_edges(); std::vector::const_iterator ei1 = v1.edges.begin(); std::vector::const_iterator ei2 = v2.edges.begin(); while(ei1 != v1.edges.end()) { #ifdef USE_DOMAIN if(ei1->first < ei2->first) return -1; // cxh if(ei1->first > ei2->first) return 1; // cxh #else if(*ei1 < *ei2) return -1; if(*ei1 > *ei2) return 1; #endif ei1++; ei2++; } } return 0; } /** * Set the splitting heuristic used by the automorphism and canonical * labeling algorithm. * The selected splitting heuristics affects the computed canonical * labelings; therefore, if you want to compare whether two graphs * are isomorphic by computing and comparing (for equality) their * canonical versions, be sure to use the same splitting heuristics * for both graphs. */ void set_splitting_heuristic(const SplittingHeuristic shs) {sh = shs; } }; } #endif ================================================ FILE: external/bliss/bliss/heap.hh ================================================ #ifndef BLISS_HEAP_HH #define BLISS_HEAP_HH #include #include #include namespace bliss { /** \internal * \brief A capacity bounded heap data structure. */ class Heap { unsigned int N; unsigned int n; unsigned int *array; //void upheap(unsigned int k); void upheap(unsigned int index) { const unsigned int v = array[index]; array[0] = 0; while(array[index/2] > v) { array[index] = array[index/2]; index = index/2; } array[index] = v; } //void downheap(unsigned int k); void downheap(unsigned int index) { const unsigned int v = array[index]; const unsigned int lim = n/2; while(index <= lim) { unsigned int new_index = index + index; if((new_index < n) and (array[new_index] > array[new_index+1])) new_index++; if(v <= array[new_index]) break; array[index] = array[new_index]; index = new_index; } array[index] = v; } public: /** * Create a new heap. * init() must be called after this. */ Heap() {array = 0; n = 0; N = 0; } ~Heap() { if(array) { free(array); array = 0; n = 0; N = 0; } } /** * Initialize the heap to have the capacity to hold \e size elements. */ //void init(const unsigned int size); void init(const unsigned int size) { if(size > N) { if(array) free(array); array = (unsigned int*)malloc((size + 1) * sizeof(unsigned int)); N = size; } } /** * Is the heap empty? * Time complexity is O(1). */ bool is_empty() const { return (n==0); } /** * Remove all the elements in the heap. * Time complexity is O(1). */ void clear() { n = 0; } /** * Insert the element \a e in the heap. * Time complexity is O(log(N)), where N is the number of elements * currently in the heap. */ //void insert(const unsigned int e); void insert(const unsigned int v) { array[++n] = v; upheap(n); } /** * Remove and return the smallest element in the heap. * Time complexity is O(log(N)), where N is the number of elements * currently in the heap. */ //unsigned int remove(); unsigned int remove() { const unsigned int v = array[1]; array[1] = array[n--]; downheap(1); return v; } /** * Get the number of elements in the heap. */ unsigned int size() const {return n; } }; } // namespace bliss #endif ================================================ FILE: external/bliss/bliss/kqueue.hh ================================================ #ifndef BLISS_KQUEUE_HH #define BLISS_KQUEUE_HH /* Copyright (c) 2003-2015 Tommi Junttila Released under the GNU Lesser General Public License version 3. This file is part of bliss. bliss is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, version 3 of the License. bliss is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with bliss. If not, see . */ #include "defs.hh" namespace bliss { /** \internal * \brief A very simple implementation of queues with fixed capacity. */ template class KQueue { public: /** * Create a new queue with capacity zero. * The function init() should be called next. */ KQueue(); ~KQueue(); /** * Initialize the queue to have the capacity to hold at most \a N elements. */ void init(const unsigned int N); /** Is the queue empty? */ bool is_empty() const; /** Return the number of elements in the queue. */ unsigned int size() const; /** Remove all the elements in the queue. */ void clear(); /** Return (but don't remove) the first element in the queue. */ Type front() const; /** Remove and return the first element of the queue. */ Type pop_front(); /** Push the element \a e in the front of the queue. */ void push_front(Type e); /** Remove and return the last element of the queue. */ Type pop_back(); /** Push the element \a e in the back of the queue. */ void push_back(Type e); private: Type *entries, *end; Type *head, *tail; }; template KQueue::KQueue() { entries = 0; end = 0; head = 0; tail = 0; } template KQueue::~KQueue() { if(entries) free(entries); } template void KQueue::init(const unsigned int k) { assert(k > 0); if(entries) free(entries); entries = (Type*)malloc((k + 1) * sizeof(Type)); end = entries + k + 1; head = entries; tail = head; } template void KQueue::clear() { head = entries; tail = head; } template bool KQueue::is_empty() const { return(head == tail); } template unsigned int KQueue::size() const { if(tail >= head) return(tail - head); return((end - head) + (tail - entries)); } template Type KQueue::front() const { return *head; } template Type KQueue::pop_front() { Type *old_head = head; head++; if(head == end) head = entries; return *old_head; } template void KQueue::push_front(Type e) { if(head == entries) head = end - 1; else head--; *head = e; } template void KQueue::push_back(Type e) { *tail = e; tail++; if(tail == end) tail = entries; } } // namespace bliss #endif ================================================ FILE: external/bliss/bliss/kstack.hh ================================================ #ifndef BLISS_KSTACK_H #define BLISS_KSTACK_H /* Copyright (c) 2003-2015 Tommi Junttila Released under the GNU Lesser General Public License version 3. This file is part of bliss. bliss is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, version 3 of the License. bliss is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with bliss. If not, see . */ #include #include "defs.hh" namespace bliss { /** \internal * \brief A very simple implementation of a stack with fixed capacity. */ template class KStack { public: /** * Create a new stack with zero capacity. * The function init() should be called next. */ KStack(); /** * Create a new stack with the capacity to hold at most \a N elements. */ KStack(int N); ~KStack(); /** * Initialize the stack to have the capacity to hold at most \a N elements. */ void init(int N); /** * Is the stack empty? */ bool is_empty() const {return(cursor == entries); } /** * Return (but don't remove) the top element of the stack. */ Type top() const {BLISS_ASSERT(cursor > entries); return *cursor; } /** * Pop (remove) the top element of the stack. */ Type pop() { return *cursor--; } /** * Push the element \a e in the stack. */ void push(Type e) { *(++cursor) = e; } /** Remove all the elements in the stack. */ void clean() {cursor = entries; } /** * Get the number of elements in the stack. */ unsigned int size() const {return(cursor - entries); } /** * Return the i:th element in the stack, where \a i is in the range * 0,...,this.size()-1; the 0:th element is the bottom element * in the stack. */ Type element_at(unsigned int i) { assert(i < size()); return entries[i+1]; } /** Return the capacity (NOT the number of elements) of the stack. */ int capacity() {return kapacity; } private: int kapacity; Type *entries; Type *cursor; }; template KStack::KStack() { kapacity = 0; entries = 0; cursor = 0; } template KStack::KStack(int k) { assert(k > 0); kapacity = k; entries = (Type*)malloc((k+1) * sizeof(Type)); cursor = entries; } template void KStack::init(int k) { assert(k > 0); if(entries) free(entries); kapacity = k; entries = (Type*)malloc((k+1) * sizeof(Type)); cursor = entries; } template KStack::~KStack() { free(entries); } } // namespace bliss #endif ================================================ FILE: external/bliss/bliss/orbit.hh ================================================ #ifndef BLISS_ORBIT_HH #define BLISS_ORBIT_HH namespace bliss { class Orbit { class OrbitEntry { public: unsigned int element; OrbitEntry *next; unsigned int size; }; OrbitEntry *orbits; OrbitEntry **in_orbit; unsigned int nof_elements; unsigned int _nof_orbits; void merge_orbits(OrbitEntry *orbit1, OrbitEntry *orbit2) { if(orbit1 != orbit2) { _nof_orbits--; // Only update the elements in the smaller orbit if(orbit1->size > orbit2->size) { OrbitEntry * const temp = orbit2; orbit2 = orbit1; orbit1 = temp; } // Link the elements of orbit1 to the almost beginning of orbit2 OrbitEntry *e = orbit1; while(e->next) { in_orbit[e->element] = orbit2; e = e->next; } in_orbit[e->element] = orbit2; e->next = orbit2->next; orbit2->next = orbit1; // Keep the minimal orbit representative in the beginning if(orbit1->element < orbit2->element) { const unsigned int temp = orbit1->element; orbit1->element = orbit2->element; orbit2->element = temp; } orbit2->size += orbit1->size; } } public: // Create a new orbit information object. // The init() function must be called next to actually initialize the object. Orbit() { orbits = 0; in_orbit = 0; nof_elements = 0; } ~Orbit() { if(orbits) { free(orbits); orbits = 0; } if(in_orbit) { free(in_orbit); in_orbit = 0; } nof_elements = 0; } // Initialize the orbit information to consider sets of \a N elements. // It is required that \a N > 0. // The orbit information is reset so that each element forms an orbit of its own. // Time complexity is O(N). \sa reset() void init(const unsigned int n) { assert(n > 0); if(orbits) free(orbits); orbits = (OrbitEntry*)malloc(n * sizeof(OrbitEntry)); if(in_orbit) free(in_orbit); in_orbit = (OrbitEntry**)malloc(n * sizeof(OrbitEntry*)); nof_elements = n; reset(); } // Reset the orbits so that each element forms an orbit of its own. // Time complexity is O(N). void reset() { assert(orbits); assert(in_orbit); for(unsigned int i = 0; i < nof_elements; i++) { orbits[i].element = i; orbits[i].next = 0; orbits[i].size = 1; in_orbit[i] = &orbits[i]; } _nof_orbits = nof_elements; } // Merge the orbits of the elements \a e1 and \a e2. // Time complexity is O(k), where k is the number of elements in // the smaller of the merged orbits. void merge_orbits(unsigned int e1, unsigned int e2) { merge_orbits(in_orbit[e1], in_orbit[e2]); } // Is the element \a e the smallest element in its orbit? // Time complexity is O(1). bool is_minimal_representative(unsigned element) const { return(get_minimal_representative(element) == element); } /// Get the smallest element in the orbit of the element \a e. // Time complexity is O(1). unsigned get_minimal_representative(unsigned element) const { OrbitEntry * const orbit = in_orbit[element]; return(orbit->element); } // Get the number of elements in the orbit of the element \a e. // Time complexity is O(1). unsigned orbit_size(unsigned element) const { return(in_orbit[element]->size); } // Get the number of orbits. // Time complexity is O(1). unsigned int nof_orbits() const {return _nof_orbits; } }; } // namespace bliss #endif ================================================ FILE: external/bliss/bliss/partition.hh ================================================ #ifndef BLISS_PARTITION_HH #define BLISS_PARTITION_HH /* Copyright (c) 2003-2015 Tommi Junttila Released under the GNU Lesser General Public License version 3. This file is part of bliss. bliss is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, version 3 of the License. bliss is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with bliss. If not, see . */ namespace bliss { class Partition; } #include #include #include #include #include "kstack.hh" #include "kqueue.hh" #include "heap.hh" #include "orbit.hh" #include "abgraph.hh" #include "graph.hh" namespace bliss { /** \internal * \brief A class for refinable, backtrackable ordered partitions. * * This is rather a data structure with some helper functions than * a proper self-contained class. * That is, for efficiency reasons the fields of this class are directly * manipulated from bliss::AbstractGraph and its subclasses. * Conversely, some methods of this class modify the fields of * bliss::AbstractGraph, too. */ class Partition { public: /** * \brief Data structure for holding information about a cell in a Partition. */ class Cell { friend class Partition; public: unsigned int length; // Index of the first element of the cell in the Partition::elements array unsigned int first; unsigned int max_ival; unsigned int max_ival_count; private: bool in_splitting_queue; public: bool in_neighbour_heap; /* Pointer to the next cell, null if this is the last one. */ Cell* next; Cell* prev; Cell* next_nonsingleton; Cell* prev_nonsingleton; unsigned int split_level; /** Is this a unit cell? */ bool is_unit() const {return(length == 1); } /** Is this cell in splitting queue? */ bool is_in_splitting_queue() const {return(in_splitting_queue); } }; private: /** \internal * Data structure for remembering information about splits in order to * perform efficient backtracking over the splits. */ class RefInfo { public: unsigned int split_cell_first; int prev_nonsingleton_first; int next_nonsingleton_first; }; /** \internal * A stack for remembering the splits, used for backtracking. */ KStack refinement_stack; class BacktrackInfo { public: BacktrackInfo() : refinement_stack_size(0), cr_backtrack_point(0) {} unsigned int refinement_stack_size; unsigned int cr_backtrack_point; }; /** \internal * The main stack for enabling backtracking. */ std::vector bt_stack; public: AbstractGraph* graph; /* Used during equitable partition refinement */ KQueue splitting_queue; //void splitting_queue_add(Cell* const cell); Cell* splitting_queue_pop(); bool splitting_queue_is_empty() const; //void splitting_queue_clear(); void splitting_queue_add(Cell* const cell) { static const unsigned int smallish_cell_threshold = 1; cell->in_splitting_queue = true; if(cell->length <= smallish_cell_threshold) splitting_queue.push_front(cell); else splitting_queue.push_back(cell); } void splitting_queue_clear() { while(!splitting_queue_is_empty()) splitting_queue_pop(); } /** Type for backtracking points. */ typedef unsigned int BacktrackPoint; /** * Get a new backtrack point for the current partition */ //BacktrackPoint set_backtrack_point(); BacktrackPoint set_backtrack_point() { BacktrackInfo info; info.refinement_stack_size = refinement_stack.size(); if(cr_enabled) info.cr_backtrack_point = cr_get_backtrack_point(); BacktrackPoint p = bt_stack.size(); bt_stack.push_back(info); return p; } /** * Backtrack to the point \a p and remove it. */ //void goto_backtrack_point(BacktrackPoint p); void goto_backtrack_point(BacktrackPoint p) { BacktrackInfo info = bt_stack[p]; bt_stack.resize(p); if(cr_enabled) cr_goto_backtrack_point(info.cr_backtrack_point); const unsigned int dest_refinement_stack_size = info.refinement_stack_size; assert(refinement_stack.size() >= dest_refinement_stack_size); while(refinement_stack.size() > dest_refinement_stack_size) { RefInfo i = refinement_stack.pop(); const unsigned int first = i.split_cell_first; Cell* cell = get_cell(elements[first]); if(cell->first != first) { assert(cell->first < first); assert(cell->split_level <= dest_refinement_stack_size); goto done; } assert(cell->split_level > dest_refinement_stack_size); while(cell->split_level > dest_refinement_stack_size) { assert(cell->prev); cell = cell->prev; } while(cell->next and cell->next->split_level > dest_refinement_stack_size) { /* Merge next cell */ Cell* const next_cell = cell->next; if(cell->length == 1) discrete_cell_count--; if(next_cell->length == 1) discrete_cell_count--; /* Update element_to_cell_map values of elements added in cell */ unsigned int* ep = elements + next_cell->first; unsigned int* const lp = ep + next_cell->length; for( ; ep < lp; ep++) element_to_cell_map[*ep] = cell; /* Update cell parameters */ cell->length += next_cell->length; if(next_cell->next) next_cell->next->prev = cell; cell->next = next_cell->next; /* (Pseudo)free next_cell */ next_cell->first = 0; next_cell->length = 0; next_cell->prev = 0; next_cell->next = free_cells; free_cells = next_cell; } done: if(i.prev_nonsingleton_first >= 0) { Cell* const prev_cell = get_cell(elements[i.prev_nonsingleton_first]); cell->prev_nonsingleton = prev_cell; prev_cell->next_nonsingleton = cell; } else { //assert(cell->prev_nonsingleton == 0); cell->prev_nonsingleton = 0; first_nonsingleton_cell = cell; } if(i.next_nonsingleton_first >= 0) { Cell* const next_cell = get_cell(elements[i.next_nonsingleton_first]); cell->next_nonsingleton = next_cell; next_cell->prev_nonsingleton = cell; } else { //assert(cell->next_nonsingleton == 0); cell->next_nonsingleton = 0; } } } /** * Split the non-unit Cell \a cell = {\a element,e1,e2,...,en} containing * the element \a element in two: * \a cell = {e1,...,en} and \a newcell = {\a element}. * @param cell a non-unit Cell * @param element an element in \a cell * @return the new unit Cell \a newcell */ //Cell* individualize(Cell* const cell, const unsigned int element); Cell* individualize(Cell * const cell, const unsigned int element) { unsigned int * const pos = in_pos[element]; const unsigned int last = cell->first + cell->length - 1; *pos = elements[last]; in_pos[*pos] = pos; elements[last] = element; in_pos[element] = elements + last; Partition::Cell * const new_cell = aux_split_in_two(cell, cell->length-1); element_to_cell_map[element] = new_cell; return new_cell; } //Cell* aux_split_in_two(Cell* const cell, const unsigned int first_half_size); Cell* aux_split_in_two(Cell* const cell, const unsigned int first_half_size) { RefInfo i; /* (Pseudo)allocate new cell */ Cell * const new_cell = free_cells; free_cells = new_cell->next; /* Update new cell parameters */ new_cell->first = cell->first + first_half_size; new_cell->length = cell->length - first_half_size; new_cell->next = cell->next; if(new_cell->next) new_cell->next->prev = new_cell; new_cell->prev = cell; new_cell->split_level = refinement_stack.size()+1; /* Update old, splitted cell parameters */ cell->length = first_half_size; cell->next = new_cell; /* CR */ if(cr_enabled) cr_create_at_level_trailed(new_cell->first, cr_get_level(cell->first)); /* Add cell in refinement_stack for backtracking */ i.split_cell_first = new_cell->first; if(cell->prev_nonsingleton) i.prev_nonsingleton_first = cell->prev_nonsingleton->first; else i.prev_nonsingleton_first = -1; if(cell->next_nonsingleton) i.next_nonsingleton_first = cell->next_nonsingleton->first; else i.next_nonsingleton_first = -1; refinement_stack.push(i); /* Modify nonsingleton cell list */ if(new_cell->length > 1) { new_cell->prev_nonsingleton = cell; new_cell->next_nonsingleton = cell->next_nonsingleton; if(new_cell->next_nonsingleton) new_cell->next_nonsingleton->prev_nonsingleton = new_cell; cell->next_nonsingleton = new_cell; } else { new_cell->next_nonsingleton = 0; new_cell->prev_nonsingleton = 0; discrete_cell_count++; } if(cell->is_unit()) { if(cell->prev_nonsingleton) cell->prev_nonsingleton->next_nonsingleton = cell->next_nonsingleton; else first_nonsingleton_cell = cell->next_nonsingleton; if(cell->next_nonsingleton) cell->next_nonsingleton->prev_nonsingleton = cell->prev_nonsingleton; cell->next_nonsingleton = 0; cell->prev_nonsingleton = 0; discrete_cell_count++; } return new_cell; } private: unsigned int N; Cell* cells; Cell* free_cells; unsigned int discrete_cell_count; public: Cell* first_cell; Cell* first_nonsingleton_cell; unsigned int *elements; /* invariant_values[e] gives the invariant value of the element e */ unsigned int *invariant_values; /* element_to_cell_map[e] gives the cell of the element e */ Cell **element_to_cell_map; /** Get the cell of the element \a e */ Cell* get_cell(const unsigned int e) const { return element_to_cell_map[e]; } /* in_pos[e] points to the elements array s.t. *in_pos[e] = e */ unsigned int **in_pos; //Partition(); //~Partition(); Partition() { N = 0; elements = 0; in_pos = 0; invariant_values = 0; cells = 0; free_cells = 0; element_to_cell_map = 0; graph = 0; discrete_cell_count = 0; /* Initialize a distribution count sorting array. */ for(unsigned int i = 0; i < 256; i++) dcs_count[i] = 0; cr_enabled = false; cr_cells = 0; cr_levels = 0; } ~Partition() { if(elements) {free(elements); elements = 0; } if(cells) {free(cells); cells = 0; } if(element_to_cell_map) {free(element_to_cell_map); element_to_cell_map = 0; } if(in_pos) {free(in_pos); in_pos = 0; } if(invariant_values) {free(invariant_values); invariant_values = 0; } N = 0; } /** * Initialize the partition to the unit partition (all elements in one cell) * over the \a N > 0 elements {0,...,\a N-1}. */ //void init(const unsigned int N); void init(const unsigned int M) { assert(M > 0); N = M; if(elements) free(elements); elements = (unsigned int*)malloc(N * sizeof(unsigned int)); for(unsigned int i = 0; i < N; i++) elements[i] = i; if(in_pos) free(in_pos); in_pos = (unsigned int**)malloc(N * sizeof(unsigned int*)); for(unsigned int i = 0; i < N; i++) in_pos[i] = elements + i; if(invariant_values) free(invariant_values); invariant_values = (unsigned int*)malloc(N * sizeof(unsigned int)); for(unsigned int i = 0; i < N; i++) invariant_values[i] = 0; if(cells) free(cells); cells = (Cell*)malloc(N * sizeof(Cell)); cells[0].first = 0; cells[0].length = N; cells[0].max_ival = 0; cells[0].max_ival_count = 0; cells[0].in_splitting_queue = false; cells[0].in_neighbour_heap = false; cells[0].prev = 0; cells[0].next = 0; cells[0].next_nonsingleton = 0; cells[0].prev_nonsingleton = 0; cells[0].split_level = 0; first_cell = &cells[0]; if(N == 1) { first_nonsingleton_cell = 0; discrete_cell_count = 1; } else { first_nonsingleton_cell = &cells[0]; discrete_cell_count = 0; } for(unsigned int i = 1; i < N; i++) { cells[i].first = 0; cells[i].length = 0; cells[i].max_ival = 0; cells[i].max_ival_count = 0; cells[i].in_splitting_queue = false; cells[i].in_neighbour_heap = false; cells[i].prev = 0; cells[i].next = (i < N-1)?&cells[i+1]:0; cells[i].next_nonsingleton = 0; cells[i].prev_nonsingleton = 0; } if(N > 1) free_cells = &cells[1]; else free_cells = 0; if(element_to_cell_map) free(element_to_cell_map); element_to_cell_map = (Cell **)malloc(N * sizeof(Cell *)); for(unsigned int i = 0; i < N; i++) element_to_cell_map[i] = first_cell; splitting_queue.init(N); refinement_stack.init(N); /* Reset the main backtracking stack */ bt_stack.clear(); } /** * Returns true iff the partition is discrete, meaning that all * the elements are in their own cells. */ bool is_discrete() const {return(free_cells == 0); } unsigned int nof_discrete_cells() const {return(discrete_cell_count); } /** * Print the partition into the file stream \a fp. */ //size_t print(FILE* const fp, const bool add_newline = true) const; size_t print(FILE* const fp, const bool add_newline = true) const { size_t r = 0; const char* cell_sep = ""; r += fprintf(fp, "["); for(Cell* cell = first_cell; cell; cell = cell->next) { /* Print cell */ r += fprintf(fp, "%s{", cell_sep); cell_sep = ","; const char* elem_sep = ""; for(unsigned int i = 0; i < cell->length; i++) { r += fprintf(fp, "%s%u", elem_sep, elements[cell->first + i]); elem_sep = ","; } r += fprintf(fp, "}"); } r += fprintf(fp, "]"); if(add_newline) r += fprintf(fp, "\n"); return r; } /** * Print the partition cell sizes into the file stream \a fp. */ //size_t print_signature(FILE* const fp, const bool add_newline = true) const; size_t print_signature(FILE* const fp, const bool add_newline = true) const { size_t r = 0; const char* cell_sep = ""; r += fprintf(fp, "["); for(Cell* cell = first_cell; cell; cell = cell->next) { if(cell->is_unit()) continue; //fprintf(fp, "%s%u", cell_sep, cr_cells[cell->first].level); r += fprintf(fp, "%s%u", cell_sep, cell->length); cell_sep = ","; } r += fprintf(fp, "]"); if(add_newline) r += fprintf(fp, "\n"); return r; } /* * Splits the Cell \a cell into [cell_1,...,cell_n] * according to the invariant_values of the elements in \a cell. * After splitting, cell_1 == \a cell. * Returns the pointer to the Cell cell_n; * cell_n != cell iff the Cell \a cell was actually splitted. * The flag \a max_ival_info_ok indicates whether the max_ival and * max_ival_count fields of the Cell \a cell have consistent values * when the method is called. * Clears the invariant values of elements in the Cell \a cell as well as * the max_ival and max_ival_count fields of the Cell \a cell. */ //Cell *zplit_cell(Cell * const cell, const bool max_ival_info_ok); Cell* zplit_cell(Cell* const cell, const bool max_ival_info_ok) { Cell* last_new_cell = cell; if(!max_ival_info_ok) { /* Compute max_ival info */ assert(cell->max_ival == 0); assert(cell->max_ival_count == 0); unsigned int *ep = elements + cell->first; for(unsigned int i = cell->length; i > 0; i--, ep++) { const unsigned int ival = invariant_values[*ep]; if(ival > cell->max_ival) { cell->max_ival = ival; cell->max_ival_count = 1; } else if(ival == cell->max_ival) { cell->max_ival_count++; } } } #ifdef BLISS_CONSISTENCY_CHECKS /* Verify max_ival info */ { unsigned int nof_zeros = 0; unsigned int max_ival = 0; unsigned int max_ival_count = 0; unsigned int *ep = elements + cell->first; for(unsigned int i = cell->length; i > 0; i--, ep++) { const unsigned int ival = invariant_values[*ep]; if(ival == 0) nof_zeros++; if(ival > max_ival) { max_ival = ival; max_ival_count = 1; } else if(ival == max_ival) max_ival_count++; } assert(max_ival == cell->max_ival); assert(max_ival_count == cell->max_ival_count); } #endif /* max_ival info has been computed */ if(cell->max_ival_count == cell->length) { /* All invariant values are the same, clear 'em */ if(cell->max_ival > 0) clear_ivs(cell); } else { /* All invariant values are not the same */ if(cell->max_ival == 1) { /* Specialized splitting for cells with binary invariant values */ last_new_cell = sort_and_split_cell1(cell); } else if(cell->max_ival < 256) { /* Specialized splitting for cells with invariant values < 256 */ last_new_cell = sort_and_split_cell255(cell, cell->max_ival); } else { /* Generic sorting and splitting */ const bool sorted = shellsort_cell(cell); if (!sorted) printf("error sorting\n"); assert(sorted); last_new_cell = split_cell(cell); } } cell->max_ival = 0; cell->max_ival_count = 0; return last_new_cell; } /* * Routines for component recursion */ //void cr_init(); //void cr_free(); unsigned int cr_get_level(const unsigned int cell_index) const; //unsigned int cr_split_level(const unsigned int level, const std::vector& cells); /* * * Component recursion specific code * */ void cr_init() { assert(bt_stack.empty()); cr_enabled = true; if(cr_cells) free(cr_cells); cr_cells = (CRCell*)malloc(N * sizeof(CRCell)); if(!cr_cells) {assert(false && "Mem out"); } if(cr_levels) free(cr_levels); cr_levels = (CRCell**)malloc(N * sizeof(CRCell*)); if(!cr_levels) {assert(false && "Mem out"); } for(unsigned int i = 0; i < N; i++) { cr_levels[i] = 0; cr_cells[i].level = UINT_MAX; cr_cells[i].next = 0; cr_cells[i].prev_next_ptr = 0; } for(const Cell *cell = first_cell; cell; cell = cell->next) cr_create_at_level_trailed(cell->first, 0); cr_max_level = 0; } void cr_free() { if(cr_cells) {free(cr_cells); cr_cells = 0; } if(cr_levels) {free(cr_levels); cr_levels = 0; } cr_created_trail.clear(); cr_splitted_level_trail.clear(); cr_bt_info.clear(); cr_max_level = 0; cr_enabled = false; } unsigned int cr_split_level(const unsigned int level, const std::vector& splitted_cells) { assert(cr_enabled); assert(level <= cr_max_level); cr_levels[++cr_max_level] = 0; cr_splitted_level_trail.push_back(level); for(unsigned int i = 0; i < splitted_cells.size(); i++) { const unsigned int cell_index = splitted_cells[i]; assert(cell_index < N); CRCell& cr_cell = cr_cells[cell_index]; assert(cr_cell.level == level); cr_cell.detach(); cr_create_at_level(cell_index, cr_max_level); } return cr_max_level; } /** Clear the invariant_values of the elements in the Cell \a cell. */ //void clear_ivs(Cell* const cell); void clear_ivs(Cell* const cell) { unsigned int* ep = elements + cell->first; for(unsigned int i = cell->length; i > 0; i--, ep++) invariant_values[*ep] = 0; } private: /* * Component recursion data structures */ /* Is component recursion support in use? */ bool cr_enabled; class CRCell { public: unsigned int level; CRCell* next; CRCell** prev_next_ptr; void detach() { if(next) next->prev_next_ptr = prev_next_ptr; *(prev_next_ptr) = next; level = UINT_MAX; next = 0; prev_next_ptr = 0; } }; CRCell* cr_cells; CRCell** cr_levels; class CR_BTInfo { public: unsigned int created_trail_index; unsigned int splitted_level_trail_index; }; std::vector cr_created_trail; std::vector cr_splitted_level_trail; std::vector cr_bt_info; unsigned int cr_max_level; //void cr_create_at_level(const unsigned int cell_index, unsigned int level); //void cr_create_at_level_trailed(const unsigned int cell_index, unsigned int level); //unsigned int cr_get_backtrack_point(); //void cr_goto_backtrack_point(const unsigned int btpoint); void cr_create_at_level(const unsigned int cell_index, const unsigned int level) { assert(cr_enabled); assert(cell_index < N); assert(level < N); CRCell& cr_cell = cr_cells[cell_index]; assert(cr_cell.level == UINT_MAX); assert(cr_cell.next == 0); assert(cr_cell.prev_next_ptr == 0); if(cr_levels[level]) cr_levels[level]->prev_next_ptr = &(cr_cell.next); cr_cell.next = cr_levels[level]; cr_levels[level] = &cr_cell; cr_cell.prev_next_ptr = &cr_levels[level]; cr_cell.level = level; } void cr_create_at_level_trailed(const unsigned int cell_index, const unsigned int level) { assert(cr_enabled); cr_create_at_level(cell_index, level); cr_created_trail.push_back(cell_index); } unsigned int cr_get_backtrack_point() { assert(cr_enabled); CR_BTInfo info; info.created_trail_index = cr_created_trail.size(); info.splitted_level_trail_index = cr_splitted_level_trail.size(); cr_bt_info.push_back(info); return cr_bt_info.size()-1; } void cr_goto_backtrack_point(const unsigned int btpoint) { assert(cr_enabled); assert(btpoint < cr_bt_info.size()); while(cr_created_trail.size() > cr_bt_info[btpoint].created_trail_index) { const unsigned int cell_index = cr_created_trail.back(); cr_created_trail.pop_back(); CRCell& cr_cell = cr_cells[cell_index]; assert(cr_cell.level != UINT_MAX); assert(cr_cell.prev_next_ptr); cr_cell.detach(); } while(cr_splitted_level_trail.size() > cr_bt_info[btpoint].splitted_level_trail_index) { const unsigned int dest_level = cr_splitted_level_trail.back(); cr_splitted_level_trail.pop_back(); assert(cr_max_level > 0); assert(dest_level < cr_max_level); while(cr_levels[cr_max_level]) { CRCell *cr_cell = cr_levels[cr_max_level]; cr_cell->detach(); cr_create_at_level(cr_cell - cr_cells, dest_level); } cr_max_level--; } cr_bt_info.resize(btpoint); } // Auxiliary routines for sorting and splitting cells //Cell* sort_and_split_cell1(Cell* cell); //Cell* sort_and_split_cell255(Cell* const cell, const unsigned int max_ival); //bool shellsort_cell(Cell* cell); // Assumes that the invariant values are NOT the same and that the cell contains more than one element Cell* sort_and_split_cell1(Cell* const cell) { #if defined(BLISS_EXPENSIVE_CONSISTENCY_CHECKS) assert(cell->length > 1); assert(cell->first + cell->length <= N); unsigned int nof_0_found = 0; unsigned int nof_1_found = 0; for(unsigned int i = cell->first; i < cell->first + cell->length; i++) { const unsigned int ival = invariant_values[elements[i]]; assert(ival == 0 or ival == 1); if(ival == 0) nof_0_found++; else nof_1_found++; } assert(nof_0_found > 0); assert(nof_1_found > 0); assert(nof_1_found == cell->max_ival_count); assert(nof_0_found + nof_1_found == cell->length); assert(cell->max_ival == 1); #endif /* (Pseudo)allocate new cell */ Cell* const new_cell = free_cells; free_cells = new_cell->next; #define NEW_SORT1 #ifdef NEW_SORT1 unsigned int *ep0 = elements + cell->first; unsigned int *ep1 = ep0 + cell->length - cell->max_ival_count; if(cell->max_ival_count > cell->length / 2) { /* There are more ones than zeros, only move zeros */ unsigned int * const end = ep0 + cell->length; while(ep1 < end) { while(invariant_values[*ep1] == 0) { const unsigned int tmp = *ep1; *ep1 = *ep0; *ep0 = tmp; in_pos[tmp] = ep0; in_pos[*ep1] = ep1; ep0++; } element_to_cell_map[*ep1] = new_cell; invariant_values[*ep1] = 0; ep1++; } } else { /* There are more zeros than ones, only move ones */ unsigned int * const end = ep1; while(ep0 < end) { while(invariant_values[*ep0] != 0) { const unsigned int tmp = *ep0; *ep0 = *ep1; *ep1 = tmp; in_pos[tmp] = ep1; in_pos[*ep0] = ep0; ep1++; } ep0++; } ep1 = end; while(ep1 < elements + cell->first + cell->length) { element_to_cell_map[*ep1] = new_cell; invariant_values[*ep1] = 0; ep1++; } } /* Update new cell parameters */ new_cell->first = cell->first + cell->length - cell->max_ival_count; new_cell->length = cell->length - (new_cell->first - cell->first); new_cell->next = cell->next; if(new_cell->next) new_cell->next->prev = new_cell; new_cell->prev = cell; new_cell->split_level = refinement_stack.size()+1; /* Update old, splitted cell parameters */ cell->length = new_cell->first - cell->first; cell->next = new_cell; /* CR */ if(cr_enabled) cr_create_at_level_trailed(new_cell->first, cr_get_level(cell->first)); #else /* Sort vertices in the cell according to the invariant values */ unsigned int *ep0 = elements + cell->first; unsigned int *ep1 = ep0 + cell->length; while(ep1 > ep0) { const unsigned int element = *ep0; const unsigned int ival = invariant_values[element]; invariant_values[element] = 0; if(ival == 0) { ep0++; } else { ep1--; *ep0 = *ep1; *ep1 = element; element_to_cell_map[element] = new_cell; in_pos[element] = ep1; in_pos[*ep0] = ep0; } } /* Update new cell parameters */ new_cell->first = ep1 - elements; new_cell->length = cell->length - (new_cell->first - cell->first); new_cell->next = cell->next; if(new_cell->next) new_cell->next->prev = new_cell; new_cell->prev = cell; new_cell->split_level = cell->split_level; /* Update old, splitted cell parameters */ cell->length = new_cell->first - cell->first; cell->next = new_cell; cell->split_level = refinement_stack.size()+1; /* CR */ if(cr_enabled) cr_create_at_level_trailed(new_cell->first, cr_get_level(cell->first)); #endif /* ifdef NEW_SORT1*/ /* Add cell in refinement stack for backtracking */ { RefInfo i; i.split_cell_first = new_cell->first; if(cell->prev_nonsingleton) i.prev_nonsingleton_first = cell->prev_nonsingleton->first; else i.prev_nonsingleton_first = -1; if(cell->next_nonsingleton) i.next_nonsingleton_first = cell->next_nonsingleton->first; else i.next_nonsingleton_first = -1; /* Modify nonsingleton cell list */ if(new_cell->length > 1) { new_cell->prev_nonsingleton = cell; new_cell->next_nonsingleton = cell->next_nonsingleton; if(new_cell->next_nonsingleton) new_cell->next_nonsingleton->prev_nonsingleton = new_cell; cell->next_nonsingleton = new_cell; } else { new_cell->next_nonsingleton = 0; new_cell->prev_nonsingleton = 0; discrete_cell_count++; } if(cell->is_unit()) { if(cell->prev_nonsingleton) cell->prev_nonsingleton->next_nonsingleton = cell->next_nonsingleton; else first_nonsingleton_cell = cell->next_nonsingleton; if(cell->next_nonsingleton) cell->next_nonsingleton->prev_nonsingleton = cell->prev_nonsingleton; cell->next_nonsingleton = 0; cell->prev_nonsingleton = 0; discrete_cell_count++; } refinement_stack.push(i); } /* Add cells in splitting queue */ if(cell->in_splitting_queue) { /* Both cells must be included in splitting_queue in order to have refinement to equitable partition */ splitting_queue_add(new_cell); } else { Cell *min_cell, *max_cell; if(cell->length <= new_cell->length) { min_cell = cell; max_cell = new_cell; } else { min_cell = new_cell; max_cell = cell; } /* Put the smaller cell in splitting_queue */ splitting_queue_add(min_cell); if(max_cell->is_unit()) { /* Put the "larger" cell also in splitting_queue */ splitting_queue_add(max_cell); } } return new_cell; } // Sort the elements in a cell according to their invariant values. // The invariant values are not cleared. // Warning: the in_pos array is left in incorrect state. bool shellsort_cell(Cell* const cell) { unsigned int h; unsigned int* ep; if(cell->is_unit()) return false; /* Check whether all the elements have the same invariant value */ bool equal_invariant_values = true; { ep = elements + cell->first; const unsigned int ival = invariant_values[*ep]; ep++; for(unsigned int i = cell->length - 1; i > 0; i--) { if(invariant_values[*ep] != ival) { equal_invariant_values = false; break; } ep++; } } if(equal_invariant_values) return false; ep = elements + cell->first; for(h = 1; h <= cell->length/9; h = 3*h + 1) ; for( ; h > 0; h = h/3) { for(unsigned int i = h; i < cell->length; i++) { const unsigned int element = ep[i]; const unsigned int ival = invariant_values[element]; unsigned int j = i; while(j >= h and invariant_values[ep[j-h]] > ival) { ep[j] = ep[j-h]; j -= h; } ep[j] = element; } } return true; } // Distribution count sorting of cells with invariant values less than 256. Cell* sort_and_split_cell255(Cell* const cell, const unsigned int max_ival) { if(cell->is_unit()) { /* Reset invariant value */ invariant_values[elements[cell->first]] = 0; return cell; } #ifdef BLISS_CONSISTENCY_CHECKS for(unsigned int i = 0; i < 256; i++) assert(dcs_count[i] == 0); #endif /* * Compute the distribution of invariant values to the count array */ { const unsigned int *ep = elements + cell->first; const unsigned int ival = invariant_values[*ep]; dcs_count[ival]++; ep++; #if defined(BLISS_CONSISTENCY_CHECKS) bool equal_invariant_values = true; #endif for(unsigned int i = cell->length - 1; i != 0; i--) { const unsigned int ival2 = invariant_values[*ep]; dcs_count[ival2]++; #if defined(BLISS_CONSISTENCY_CHECKS) if(ival2 != ival) { equal_invariant_values = false; } #endif ep++; } #if defined(BLISS_CONSISTENCY_CHECKS) assert(!equal_invariant_values); if(equal_invariant_values) { assert(dcs_count[ival] == cell->length); dcs_count[ival] = 0; clear_ivs(cell); return cell; } #endif } /* Build start array */ dcs_cumulate_count(max_ival); /* Do the sorting */ for(unsigned int i = 0; i <= max_ival; i++) { unsigned int *ep = elements + cell->first + dcs_start[i]; for(unsigned int j = dcs_count[i]; j > 0; j--) { while(true) { const unsigned int element = *ep; const unsigned int ival = invariant_values[element]; if(ival == i) break; *ep = elements[cell->first + dcs_start[ival]]; elements[cell->first + dcs_start[ival]] = element; dcs_start[ival]++; dcs_count[ival]--; } ep++; } dcs_count[i] = 0; } #if defined(BLISS_CONSISTENCY_CHECKS) for(unsigned int i = 0; i < 256; i++) assert(dcs_count[i] == 0); #endif /* split cell */ Cell* const new_cell = split_cell(cell); return new_cell; } Cell* split_cell(Cell* const cell); /* * Some auxiliary stuff needed for distribution count sorting. * To make the code thread-safe (modulo the requirement that each graph is * only accessed in one thread at a time), the arrays are owned by * the partition instance, not statically defined. */ unsigned int dcs_count[256]; unsigned int dcs_start[256]; //void dcs_cumulate_count(const unsigned int max); // An auxiliary function for distribution count sorting. // Build start array so that // dcs_start[0] = 0 and dcs_start[i+1] = dcs_start[i] + dcs_count[i]. void dcs_cumulate_count(const unsigned int max) { unsigned int* count_p = dcs_count; unsigned int* start_p = dcs_start; unsigned int sum = 0; for(unsigned int i = max+1; i > 0; i--) { *start_p = sum; start_p++; sum += *count_p; count_p++; } } }; inline Partition::Cell* Partition::splitting_queue_pop() { Cell* const cell = splitting_queue.pop_front(); cell->in_splitting_queue = false; return cell; } inline bool Partition::splitting_queue_is_empty() const { return splitting_queue.is_empty(); } inline unsigned int Partition::cr_get_level(const unsigned int cell_index) const { return(cr_cells[cell_index].level); } } // namespace bliss #endif ================================================ FILE: external/bliss/bliss/search.h ================================================ void search(const bool canonical, Stats& stats) { const unsigned int N = get_nof_vertices(); unsigned int all_same_level = UINT_MAX; p.graph = this; /* * Must be done! */ remove_duplicate_edges(); /* * Reset search statistics */ stats.reset(); stats.nof_nodes = 1; stats.nof_leaf_nodes = 1; /* Free old first path data structures */ if (first_path_labeling) { free(first_path_labeling); first_path_labeling = 0; } if (first_path_labeling_inv) { free(first_path_labeling_inv); first_path_labeling_inv = 0; } if (first_path_automorphism) { free(first_path_automorphism); first_path_automorphism = 0; } /* Free old best path data structures */ if (best_path_labeling) { free(best_path_labeling); best_path_labeling = 0; } if (best_path_labeling_inv) { free(best_path_labeling_inv); best_path_labeling_inv = 0; } if (best_path_automorphism) { free(best_path_automorphism); best_path_automorphism = 0; } if (N == 0) { /* Nothing to do, return... */ return; } /* Initialize the partition ... */ p.init(N); /* ... and the component recursion data structures in the partition */ if (opt_use_comprec) p.cr_init(); neighbour_heap.init(N); in_search = false; /* Do not compute certificate when building the initial partition */ refine_compare_certificate = false; /* The 'eqref_hash' hash value is not computed when building * the initial partition as it is not used for anything at the moment. * This saves some cycles. */ compute_eqref_hash = false; // Timer timer1; make_initial_equitable_partition(); if (verbstr and verbose_level >= 2) { fprintf(verbstr, "Initial partition computed in %.2f seconds\n", 0.0); // timer1.get_duration()); fflush(verbstr); } /* * Allocate space for the "first path" and "best path" labelings */ if (first_path_labeling) free(first_path_labeling); first_path_labeling = (unsigned int*)calloc(N, sizeof(unsigned int)); if (!first_path_labeling) _OUT_OF_MEMORY(); if (best_path_labeling) free(best_path_labeling); best_path_labeling = (unsigned int*)calloc(N, sizeof(unsigned int)); if (!best_path_labeling) _OUT_OF_MEMORY(); /* * Is the initial partition discrete? */ if (p.is_discrete()) { /* Make the best path labeling i.e. the canonical labeling */ update_labeling(best_path_labeling); /* Update statistics */ stats.nof_leaf_nodes = 1; return; } /* * Allocate the inverses of the "first path" and "best path" labelings */ if (first_path_labeling_inv) free(first_path_labeling_inv); first_path_labeling_inv = (unsigned int*)calloc(N, sizeof(unsigned int)); if (!first_path_labeling_inv) _OUT_OF_MEMORY(); if (best_path_labeling_inv) free(best_path_labeling_inv); best_path_labeling_inv = (unsigned int*)calloc(N, sizeof(unsigned int)); if (!best_path_labeling_inv) _OUT_OF_MEMORY(); /* * Allocate space for the automorphisms */ if (first_path_automorphism) free(first_path_automorphism); first_path_automorphism = (unsigned int*)malloc(N * sizeof(unsigned int)); if (!first_path_automorphism) _OUT_OF_MEMORY(); if (best_path_automorphism) free(best_path_automorphism); best_path_automorphism = (unsigned int*)malloc(N * sizeof(unsigned int)); if (!best_path_automorphism) _OUT_OF_MEMORY(); /* * Initialize orbit information so that all vertices are in their own orbits */ first_path_orbits.init(N); best_path_orbits.init(N); /* * Initialize certificate memory */ initialize_certificate(); std::vector search_stack; std::vector first_path_info; std::vector best_path_info; search_stack.clear(); /* Initialize "long prune" data structures */ if (opt_use_long_prune) long_prune_init(); /* * Initialize failure recording data structures */ typedef std::set> FailureRecordingSet; std::vector failure_recording_hashes; /* * Initialize component recursion data structures */ cr_cep_stack.clear(); unsigned int cr_cep_index = 0; { /* Inset a sentinel "component end point" */ CR_CEP sentinel; sentinel.creation_level = 0; sentinel.discrete_cell_limit = get_nof_vertices(); sentinel.next_cr_level = 0; sentinel.next_cep_index = 0; sentinel.first_checked = false; sentinel.best_checked = false; cr_cep_index = 0; cr_cep_stack.push_back(sentinel); } cr_level = 0; if (opt_use_comprec and nucr_find_first_component(cr_level) == true and p.nof_discrete_cells() + cr_component_elements < cr_cep_stack[cr_cep_index].discrete_cell_limit) { cr_level = p.cr_split_level(0, cr_component); CR_CEP cep; cep.creation_level = 0; cep.discrete_cell_limit = p.nof_discrete_cells() + cr_component_elements; cep.next_cr_level = 0; cep.next_cep_index = cr_cep_index; cep.first_checked = false; cep.best_checked = false; cr_cep_index = cr_cep_stack.size(); cr_cep_stack.push_back(cep); } /* * Build the root node of the search tree */ { TreeNode root; Partition::Cell* split_cell = find_next_cell_to_be_splitted(p.first_cell); root.split_cell_first = split_cell->first; root.split_element = TreeNode::SPLIT_START; root.partition_bt_point = p.set_backtrack_point(); root.certificate_index = 0; root.fp_on = true; root.fp_cert_equal = true; root.fp_extendable = TreeNode::MAYBE; root.in_best_path = false; root.cmp_to_best_path = 0; root.long_prune_begin = 0; root.failure_recording_ival = 0; /* Save component recursion info for backtracking */ root.cr_level = cr_level; root.cr_cep_stack_size = cr_cep_stack.size(); root.cr_cep_index = cr_cep_index; search_stack.push_back(root); } /* * Set status and global flags for search related procedures */ in_search = true; /* Do not compare certificates during refinement until the first path has been * traversed to the leaf */ refine_compare_certificate = false; /* * The actual backtracking search */ while (!search_stack.empty()) { TreeNode& current_node = search_stack.back(); const unsigned int current_level = (unsigned int)search_stack.size() - 1; if (opt_use_comprec) { CR_CEP& cep = cr_cep_stack[current_node.cr_cep_index]; if (cep.first_checked == true and current_node.fp_extendable == TreeNode::MAYBE and !search_stack[cep.creation_level].fp_on) { current_node.fp_extendable = TreeNode::NO; } } if (current_node.fp_on) { if (current_node.split_element == TreeNode::SPLIT_END) { search_stack.pop_back(); continue; } } else { if (current_node.fp_extendable == TreeNode::YES) { search_stack.pop_back(); continue; } if (current_node.split_element == TreeNode::SPLIT_END) { if (opt_use_failure_recording) { TreeNode& parent_node = search_stack[current_level - 1]; if (parent_node.fp_on) failure_recording_hashes[current_level - 1].insert( current_node.failure_recording_ival); } search_stack.pop_back(); continue; } if (current_node.fp_extendable == TreeNode::NO and (!canonical or current_node.cmp_to_best_path < 0)) { if (opt_use_failure_recording) { TreeNode& parent_node = search_stack[current_level - 1]; if (parent_node.fp_on) failure_recording_hashes[current_level - 1].insert( current_node.failure_recording_ival); } search_stack.pop_back(); continue; } } /* Restore partition ... */ p.goto_backtrack_point(current_node.partition_bt_point); /* ... and re-remember backtracking point */ current_node.partition_bt_point = p.set_backtrack_point(); /* Restore current path certificate */ certificate_index = current_node.certificate_index; refine_current_path_certificate_index = current_node.certificate_index; certificate_current_path.resize(certificate_index); /* Fetch split cell information */ Partition::Cell* const cell = p.get_cell(p.elements[current_node.split_cell_first]); /* Restore component recursion information */ cr_level = current_node.cr_level; cr_cep_stack.resize(current_node.cr_cep_stack_size); cr_cep_index = current_node.cr_cep_index; /* * Update long prune redundancy sets */ if (opt_use_long_prune and current_level >= 1 and !current_node.fp_on) { unsigned int begin = (current_node.long_prune_begin > long_prune_begin) ? current_node.long_prune_begin : long_prune_begin; for (unsigned int i = begin; i < long_prune_end; i++) { const std::vector& fixed = long_prune_get_fixed(i); #if defined(BLISS_CONSISTENCY_CHECKS) for (unsigned int l = 0; l < search_stack.size() - 2; l++) assert(fixed[search_stack[l].split_element]); #endif if (fixed[search_stack[search_stack.size() - 1 - 1].split_element] == false) { long_prune_swap(begin, i); begin++; current_node.long_prune_begin = begin; continue; } } if (current_node.split_element == TreeNode::SPLIT_START) { current_node.needs_long_prune = true; } else if (current_node.needs_long_prune) { current_node.needs_long_prune = false; unsigned int begin = (current_node.long_prune_begin > long_prune_begin) ? current_node.long_prune_begin : long_prune_begin; for (unsigned int i = begin; i < long_prune_end; i++) { const std::vector& fixed = long_prune_get_fixed(i); #if defined(BLISS_CONSISTENCY_CHECKS) for (unsigned int l = 0; l < search_stack.size() - 2; l++) assert(fixed[search_stack[l].split_element]); #endif assert(fixed[search_stack[current_level - 1].split_element] == true); if (fixed[search_stack[current_level - 1].split_element] == false) { long_prune_swap(begin, i); begin++; current_node.long_prune_begin = begin; continue; } const std::vector& mcrs = long_prune_get_mcrs(i); unsigned int* ep = p.elements + cell->first; for (unsigned int j = cell->length; j > 0; j--, ep++) { if (mcrs[*ep] == false) current_node.long_prune_redundant.insert(*ep); } } } } /* * Find the next smallest, non-isomorphic element in the cell and * store it in current_node.split_element */ { unsigned int next_split_element = UINT_MAX; // unsigned int* next_split_element_pos = 0; unsigned int* ep = p.elements + cell->first; if (current_node.fp_on) { /* Find the next larger splitting element that is * a minimal orbit representative w.r.t. first_path_orbits */ for (unsigned int i = cell->length; i > 0; i--, ep++) { if ((int)(*ep) > current_node.split_element and *ep < next_split_element and first_path_orbits.is_minimal_representative(*ep)) { next_split_element = *ep; // next_split_element_pos = ep; } } } else if (current_node.in_best_path) { /* Find the next larger splitting element that is * a minimal orbit representative w.r.t. best_path_orbits */ for (unsigned int i = cell->length; i > 0; i--, ep++) { if ((int)(*ep) > current_node.split_element and *ep < next_split_element and best_path_orbits.is_minimal_representative(*ep) and (!opt_use_long_prune or current_node.long_prune_redundant.find(*ep) == current_node.long_prune_redundant.end())) { next_split_element = *ep; // next_split_element_pos = ep; } } } else { /* Find the next larger splitting element */ for (unsigned int i = cell->length; i > 0; i--, ep++) { if ((int)(*ep) > current_node.split_element and *ep < next_split_element and (!opt_use_long_prune or current_node.long_prune_redundant.find(*ep) == current_node.long_prune_redundant.end())) { next_split_element = *ep; // next_split_element_pos = ep; } } } if (next_split_element == UINT_MAX) { /* No more (unexplored children) in the cell */ current_node.split_element = TreeNode::SPLIT_END; if (current_node.fp_on) { /* Update group size */ const unsigned int index = first_path_orbits.orbit_size( first_path_info[search_stack.size() - 1].splitting_element); stats.group_size.multiply(index); stats.group_size_approx *= (long double)index; /* * Update all_same_level */ if (index == cell->length and all_same_level == current_level + 1) all_same_level = current_level; if (verbstr and verbose_level >= 2) { fprintf(verbstr, "Level %u: orbits=%u, index=%u/%u, all_same_level=%u\n", current_level, first_path_orbits.nof_orbits(), index, cell->length, all_same_level); fflush(verbstr); } } continue; } /* Split on smallest */ current_node.split_element = next_split_element; } const unsigned int child_level = current_level + 1; /* Update some statistics */ stats.nof_nodes++; if (search_stack.size() > stats.max_level) stats.max_level = search_stack.size(); /* Set flags and indices for the refiner certificate builder */ refine_equal_to_first = current_node.fp_cert_equal; refine_cmp_to_best = current_node.cmp_to_best_path; if (!first_path_info.empty()) { if (refine_equal_to_first) refine_first_path_subcertificate_end = first_path_info[search_stack.size() - 1].certificate_index + first_path_info[search_stack.size() - 1].subcertificate_length; if (canonical) { if (refine_cmp_to_best == 0) refine_best_path_subcertificate_end = best_path_info[search_stack.size() - 1].certificate_index + best_path_info[search_stack.size() - 1].subcertificate_length; } else refine_cmp_to_best = -1; } const bool was_fp_cert_equal = current_node.fp_cert_equal; /* Individualize, i.e. split the cell in two, the latter new cell * will be a unit one containing info.split_element */ Partition::Cell* const new_cell = p.individualize(cell, current_node.split_element); /* * Refine the new partition to equitable */ if (cell->is_unit()) refine_to_equitable(cell, new_cell); else refine_to_equitable(new_cell); /* Update statistics */ if (p.is_discrete()) stats.nof_leaf_nodes++; if (!first_path_info.empty()) { /* We are no longer on the first path */ const unsigned int subcertificate_length = certificate_current_path.size() - certificate_index; if (refine_equal_to_first) { /* Was equal to the first path so far */ PathInfo& first_pinfo = first_path_info[current_level]; assert(first_pinfo.certificate_index == certificate_index); if (subcertificate_length != first_pinfo.subcertificate_length) { refine_equal_to_first = false; if (opt_use_failure_recording) failure_recording_fp_deviation = subcertificate_length; } else if (first_pinfo.eqref_hash.cmp(eqref_hash) != 0) { refine_equal_to_first = false; if (opt_use_failure_recording) failure_recording_fp_deviation = eqref_hash.get_value(); } } if (canonical and (refine_cmp_to_best == 0)) { /* Was equal to the best path so far */ PathInfo& bestp_info = best_path_info[current_level]; assert(bestp_info.certificate_index == certificate_index); if (subcertificate_length < bestp_info.subcertificate_length) { refine_cmp_to_best = -1; } else if (subcertificate_length > bestp_info.subcertificate_length) { refine_cmp_to_best = 1; } else if (bestp_info.eqref_hash.cmp(eqref_hash) > 0) { refine_cmp_to_best = -1; } else if (bestp_info.eqref_hash.cmp(eqref_hash) < 0) { refine_cmp_to_best = 1; } } if (opt_use_failure_recording and was_fp_cert_equal and !refine_equal_to_first) { UintSeqHash k; k.update(failure_recording_fp_deviation); k.update(eqref_hash.get_value()); failure_recording_fp_deviation = k.get_value(); if (current_node.fp_on) failure_recording_hashes[current_level].insert( failure_recording_fp_deviation); else { for (unsigned int i = current_level; i > 0; i--) { if (search_stack[i].fp_on) break; const FailureRecordingSet& s = failure_recording_hashes[i]; if (i == current_level and s.find(failure_recording_fp_deviation) != s.end()) break; if (s.find(0) != s.end()) break; search_stack[i].fp_extendable = TreeNode::NO; } } } /* Check if no longer equal to the first path and, * if canonical labeling is desired, also worse than the * current best path */ if (refine_equal_to_first == false and (!canonical or (refine_cmp_to_best < 0))) { /* Yes, backtrack */ stats.nof_bad_nodes++; if (current_node.fp_cert_equal == true and current_level + 1 > all_same_level) { assert(all_same_level >= 1); for (unsigned int i = all_same_level; i < search_stack.size(); i++) { search_stack[i].fp_extendable = TreeNode::NO; } } continue; } } #if defined(BLISS_VERIFY_EQUITABLEDNESS) /* The new partition should be equitable */ if (!is_equitable()) fatal_error("consistency check failed - partition after refinement is " "not equitable"); #endif /* * Next level search tree node info */ TreeNode child_node; /* No more in the first path */ child_node.fp_on = false; /* No more in the best path */ child_node.in_best_path = false; child_node.fp_cert_equal = refine_equal_to_first; if (current_node.fp_extendable == TreeNode::NO or (current_node.fp_extendable == TreeNode::MAYBE and child_node.fp_cert_equal == false)) child_node.fp_extendable = TreeNode::NO; else child_node.fp_extendable = TreeNode::MAYBE; child_node.cmp_to_best_path = refine_cmp_to_best; child_node.failure_recording_ival = 0; child_node.cr_cep_stack_size = current_node.cr_cep_stack_size; child_node.cr_cep_index = current_node.cr_cep_index; child_node.cr_level = current_node.cr_level; certificate_index = certificate_current_path.size(); current_node.eqref_hash = eqref_hash; current_node.subcertificate_length = certificate_index - current_node.certificate_index; /* * The first encountered leaf node at the end of the "first path"? */ if (p.is_discrete() and first_path_info.empty()) { // fprintf(stdout, "Level %u: FIRST\n", child_level); fflush(stdout); stats.nof_canupdates++; /* * Update labelings and their inverses */ update_labeling_and_its_inverse(first_path_labeling, first_path_labeling_inv); update_labeling_and_its_inverse(best_path_labeling, best_path_labeling_inv); /* * Reset automorphism array */ reset_permutation(first_path_automorphism); reset_permutation(best_path_automorphism); /* * Reset orbit information */ first_path_orbits.reset(); best_path_orbits.reset(); /* * Reset group size */ stats.group_size.assign(1); stats.group_size_approx = 1.0; /* * Reset all_same_level */ all_same_level = child_level; /* * Mark the current path to be the first and best one and save it */ const unsigned int base_size = search_stack.size(); best_path_info.clear(); // fprintf(stdout, " New base is: "); for (unsigned int i = 0; i < base_size; i++) { search_stack[i].fp_on = true; search_stack[i].fp_cert_equal = true; search_stack[i].fp_extendable = TreeNode::YES; search_stack[i].in_best_path = true; search_stack[i].cmp_to_best_path = 0; PathInfo path_info; path_info.splitting_element = search_stack[i].split_element; path_info.certificate_index = search_stack[i].certificate_index; path_info.eqref_hash = search_stack[i].eqref_hash; path_info.subcertificate_length = search_stack[i].subcertificate_length; first_path_info.push_back(path_info); best_path_info.push_back(path_info); // fprintf(stdout, "%u ", search_stack[i].split_element); } // fprintf(stdout, "\n"); fflush(stdout); /* Copy certificates */ certificate_first_path = certificate_current_path; certificate_best_path = certificate_current_path; /* From now on, compare certificates when refining */ refine_compare_certificate = true; if (opt_use_failure_recording) failure_recording_hashes.resize(base_size); /* for(unsigned int j = 0; j < search_stack.size(); j++) fprintf(stderr, "%u ", search_stack[j].split_element); fprintf(stderr, "\n"); p.print(stderr); fprintf(stderr, "\n"); */ /* * Backtrack to the previous level */ continue; } if (p.is_discrete() and child_node.fp_cert_equal) { /* * A leaf node that is equal to the first one. * An automorphism found: aut[i] = elements[first_path_labeling[i]] */ goto handle_first_path_automorphism; } if (!p.is_discrete()) { Partition::Cell* next_split_cell = 0; /* * An internal, non-leaf node */ if (opt_use_comprec) { assert(p.nof_discrete_cells() <= cr_cep_stack[cr_cep_index].discrete_cell_limit); assert(cr_level == child_node.cr_level); if (p.nof_discrete_cells() == cr_cep_stack[cr_cep_index].discrete_cell_limit) { /* We have reached the end of a component */ assert(cr_cep_index != 0); CR_CEP& cep = cr_cep_stack[cr_cep_index]; /* First, compare with respect to the first path */ if (first_path_info.empty() or child_node.fp_cert_equal) { if (cep.first_checked == false) { /* First time, go to the next component */ cep.first_checked = true; } else { assert(!first_path_info.empty()); assert(cep.creation_level < search_stack.size()); TreeNode& old_info = search_stack[cep.creation_level]; /* If the component was found when on the first path, * handle the found automorphism as the other * first path automorphisms */ if (old_info.fp_on) goto handle_first_path_automorphism; } } if (canonical and !first_path_info.empty() and child_node.cmp_to_best_path >= 0) { if (cep.best_checked == false) { /* First time, go to the next component */ cep.best_checked = true; } else { assert(cep.creation_level < search_stack.size()); TreeNode& old_info = search_stack[cep.creation_level]; if (child_node.cmp_to_best_path == 0) { /* If the component was found when on the best path, * handle the found automorphism as the other * best path automorphisms */ if (old_info.in_best_path) goto handle_best_path_automorphism; /* Otherwise, we do not remember the automorhism as * we didn't memorize the path that was invariant * equal to the best one and passed through the * component. * Thus we can only backtrack to the previous level */ child_node.cmp_to_best_path = -1; if (!child_node.fp_cert_equal) { continue; } } else { assert(child_node.cmp_to_best_path > 0); if (old_info.in_best_path) { stats.nof_canupdates++; /* * Update canonical labeling and its inverse */ for (unsigned int i = 0; i < N; i++) { if (p.get_cell(p.elements[i])->is_unit()) { best_path_labeling[p.elements[i]] = i; best_path_labeling_inv[i] = p.elements[i]; } } // update_labeling_and_its_inverse(best_path_labeling, // best_path_labeling_inv); /* Reset best path automorphism */ reset_permutation(best_path_automorphism); /* Reset best path orbit structure */ best_path_orbits.reset(); /* Mark to be the best one and save prefix */ unsigned int postfix_start = cep.creation_level; assert(postfix_start < best_path_info.size()); while (p.get_cell( best_path_info[postfix_start].splitting_element) ->is_unit()) { postfix_start++; assert(postfix_start < best_path_info.size()); } unsigned int postfix_start_cert = best_path_info[postfix_start].certificate_index; std::vector best_path_temp = best_path_info; best_path_info.clear(); for (unsigned int i = 0; i < search_stack.size(); i++) { TreeNode& ss_info = search_stack[i]; PathInfo bp_info; ss_info.cmp_to_best_path = 0; ss_info.in_best_path = true; bp_info.splitting_element = ss_info.split_element; bp_info.certificate_index = ss_info.certificate_index; bp_info.subcertificate_length = ss_info.subcertificate_length; bp_info.eqref_hash = ss_info.eqref_hash; best_path_info.push_back(bp_info); } /* Copy the postfix of the previous best path */ for (unsigned int i = postfix_start; i < best_path_temp.size(); i++) { best_path_info.push_back(best_path_temp[i]); best_path_info[best_path_info.size() - 1] .certificate_index = best_path_info[best_path_info.size() - 2] .certificate_index + best_path_info[best_path_info.size() - 2] .subcertificate_length; } std::vector certificate_best_path_old = certificate_best_path; certificate_best_path = certificate_current_path; for (unsigned int i = postfix_start_cert; i < certificate_best_path_old.size(); i++) certificate_best_path.push_back( certificate_best_path_old[i]); assert(certificate_best_path.size() == best_path_info.back().certificate_index + best_path_info.back().subcertificate_length); /* Backtrack to the previous level */ continue; } } } } /* No backtracking performed, go to next componenet */ cr_level = cep.next_cr_level; cr_cep_index = cep.next_cep_index; } /* Check if the current component has been split into * new non-uniformity subcomponents */ // if(nucr_find_first_component(cr_level) == true and // p.nof_discrete_cells() + cr_component_elements < // cr_cep_stack[cr_cep_index].discrete_cell_limit) if (nucr_find_first_component(cr_level, cr_component, cr_component_elements, next_split_cell) == true and p.nof_discrete_cells() + cr_component_elements < cr_cep_stack[cr_cep_index].discrete_cell_limit) { const unsigned int next_cr_level = p.cr_split_level(cr_level, cr_component); CR_CEP cep; cep.creation_level = search_stack.size(); cep.discrete_cell_limit = p.nof_discrete_cells() + cr_component_elements; cep.next_cr_level = cr_level; cep.next_cep_index = cr_cep_index; cep.first_checked = false; cep.best_checked = false; cr_cep_index = cr_cep_stack.size(); cr_cep_stack.push_back(cep); cr_level = next_cr_level; } } /* * Build the next node info */ /* Find the next cell to be splitted */ if (!next_split_cell) next_split_cell = find_next_cell_to_be_splitted( p.get_cell(p.elements[current_node.split_cell_first])); // Partition::Cell * const next_split_cell = // find_next_cell_to_be_splitted(p.get_cell(p.elements[current_node.split_cell_first])); child_node.split_cell_first = next_split_cell->first; child_node.split_element = TreeNode::SPLIT_START; child_node.certificate_index = certificate_index; child_node.partition_bt_point = p.set_backtrack_point(); child_node.long_prune_redundant.clear(); child_node.long_prune_begin = current_node.long_prune_begin; /* Save component recursion info for backtracking */ child_node.cr_level = cr_level; child_node.cr_cep_stack_size = cr_cep_stack.size(); child_node.cr_cep_index = cr_cep_index; search_stack.push_back(child_node); continue; } /* * A leaf node not in the first path or equivalent to the first path */ if (child_node.cmp_to_best_path > 0) { /* * A new, better representative found */ // fprintf(stdout, "Level %u: NEW BEST\n", child_level); fflush(stdout); stats.nof_canupdates++; /* * Update canonical labeling and its inverse */ update_labeling_and_its_inverse(best_path_labeling, best_path_labeling_inv); /* Reset best path automorphism */ reset_permutation(best_path_automorphism); /* Reset best path orbit structure */ best_path_orbits.reset(); /* * Mark the current path to be the best one and save it */ const unsigned int base_size = search_stack.size(); assert(current_level + 1 == base_size); best_path_info.clear(); for (unsigned int i = 0; i < base_size; i++) { search_stack[i].cmp_to_best_path = 0; search_stack[i].in_best_path = true; PathInfo path_info; path_info.splitting_element = search_stack[i].split_element; path_info.certificate_index = search_stack[i].certificate_index; path_info.subcertificate_length = search_stack[i].subcertificate_length; path_info.eqref_hash = search_stack[i].eqref_hash; best_path_info.push_back(path_info); } certificate_best_path = certificate_current_path; /* * Backtrack to the previous level */ continue; } handle_best_path_automorphism: /* * * Best path automorphism handling * */ { /* * Equal to the previous best path */ if (p.is_discrete()) { #if defined(BLISS_CONSISTENCY_CHECKS) /* Verify that the automorphism is correctly built */ for (unsigned int i = 0; i < N; i++) assert(best_path_automorphism[i] == p.elements[best_path_labeling[i]]); #endif } else { /* An automorphism that was found before the partition was discrete. * Set the image of all elements in non-disrete cells accordingly */ for (Partition::Cell* c = p.first_nonsingleton_cell; c; c = c->next_nonsingleton) { for (unsigned int i = c->first; i < c->first + c->length; i++) if (p.get_cell(p.elements[best_path_labeling[p.elements[i]]]) ->is_unit()) best_path_automorphism [p.elements[best_path_labeling[p.elements[i]]]] = p.elements[i]; else best_path_automorphism[p.elements[i]] = p.elements[i]; } } #if defined(BLISS_VERIFY_AUTOMORPHISMS) /* Verify that it really is an automorphism */ if (!is_automorphism(best_path_automorphism)) fatal_error("Best path automorhism validation check failed"); #endif unsigned int gca_level_with_first = 0; for (unsigned int i = search_stack.size(); i > 0; i--) { if ((int)first_path_info[gca_level_with_first].splitting_element != search_stack[gca_level_with_first].split_element) break; gca_level_with_first++; } unsigned int gca_level_with_best = 0; for (unsigned int i = search_stack.size(); i > 0; i--) { if ((int)best_path_info[gca_level_with_best].splitting_element != search_stack[gca_level_with_best].split_element) break; gca_level_with_best++; } if (opt_use_long_prune) { /* Record automorphism */ long_prune_add_automorphism(best_path_automorphism); } /* * Update orbit information */ update_orbit_information(best_path_orbits, best_path_automorphism); /* * Update orbit information */ const unsigned int nof_old_orbits = first_path_orbits.nof_orbits(); update_orbit_information(first_path_orbits, best_path_automorphism); if (nof_old_orbits != first_path_orbits.nof_orbits()) { /* Some orbits were merged */ /* Report automorphism */ if (report_hook) (*report_hook)(report_user_param, get_nof_vertices(), best_path_automorphism); /* Update statistics */ stats.nof_generators++; } /* * Compute backjumping level */ unsigned int backjumping_level = current_level + 1 - 1; if (!first_path_orbits.is_minimal_representative( search_stack[gca_level_with_first].split_element)) { backjumping_level = gca_level_with_first; } else { assert(!best_path_orbits.is_minimal_representative( search_stack[gca_level_with_best].split_element)); backjumping_level = gca_level_with_best; } /* Backtrack */ search_stack.resize(backjumping_level + 1); continue; } _INTERNAL_ERROR(); handle_first_path_automorphism: /* * * A first-path automorphism: aut[i] = elements[first_path_labeling[i]] * */ if (p.is_discrete()) { #if defined(BLISS_CONSISTENCY_CHECKS) /* Verify that the complete automorphism is correctly built */ for (unsigned int i = 0; i < N; i++) assert(first_path_automorphism[i] == p.elements[first_path_labeling[i]]); #endif } else { /* An automorphism that was found before the partition was discrete. * Set the image of all elements in non-disrete cells accordingly */ for (Partition::Cell* c = p.first_nonsingleton_cell; c; c = c->next_nonsingleton) { for (unsigned int i = c->first; i < c->first + c->length; i++) if (p.get_cell(p.elements[first_path_labeling[p.elements[i]]]) ->is_unit()) first_path_automorphism [p.elements[first_path_labeling[p.elements[i]]]] = p.elements[i]; else first_path_automorphism[p.elements[i]] = p.elements[i]; } } #if defined(BLISS_VERIFY_AUTOMORPHISMS) /* Verify that it really is an automorphism */ if (!is_automorphism(first_path_automorphism)) fatal_error("First path automorphism validation check failed"); #endif if (opt_use_long_prune) { long_prune_add_automorphism(first_path_automorphism); } /* * Update orbit information */ update_orbit_information(first_path_orbits, first_path_automorphism); /* * Compute backjumping level */ for (unsigned int i = 0; i < search_stack.size(); i++) { TreeNode& n = search_stack[i]; if (n.fp_on) { ; } else { n.fp_extendable = TreeNode::YES; } } /* Report automorphism by calling the user defined hook function */ if (report_hook) (*report_hook)(report_user_param, get_nof_vertices(), first_path_automorphism); /* Update statistics */ stats.nof_generators++; continue; } /* while(!search_stack.empty()) */ /* Free "long prune" technique memory */ if (opt_use_long_prune) long_prune_deallocate(); /* Release component recursion data in partition */ if (opt_use_comprec) p.cr_free(); } ================================================ FILE: external/bliss/bliss/uintseqhash.hh ================================================ #ifndef BLISS_UINTSEQHASH_HH #define BLISS_UINTSEQHASH_HH #include namespace bliss { static unsigned int rtab[256] = { 0xAEAA35B8, 0x65632E16, 0x155EDBA9, 0x01349B39, 0x8EB8BD97, 0x8E4C5367, 0x8EA78B35, 0x2B1B4072, 0xC1163893, 0x269A8642, 0xC79D7F6D, 0x6A32DEA0, 0xD4D2DA56, 0xD96D4F47, 0x47B5F48A, 0x2587C6BF, 0x642B71D8, 0x5DBBAF58, 0x5C178169, 0xA16D9279, 0x75CDA063, 0x291BC48B, 0x01AC2F47, 0x5416DF7C, 0x45307514, 0xB3E1317B, 0xE1C7A8DE, 0x3ACDAC96, 0x11B96831, 0x32DE22DD, 0x6A1DA93B, 0x58B62381, 0x283810E2, 0xBC30E6A6, 0x8EE51705, 0xB06E8DFB, 0x729AB12A, 0xA9634922, 0x1A6E8525, 0x49DD4E19, 0xE5DB3D44, 0x8C5B3A02, 0xEBDE2864, 0xA9146D9F, 0x736D2CB4, 0xF5229F42, 0x712BA846, 0x20631593, 0x89C02603, 0xD5A5BF6A, 0x823F4E18, 0x5BE5DEFF, 0x1C4EBBFA, 0x5FAB8490, 0x6E559B0C, 0x1FE528D6, 0xB3198066, 0x4A965EB5, 0xFE8BB3D5, 0x4D2F6234, 0x5F125AA4, 0xBCC640FA, 0x4F8BC191, 0xA447E537, 0xAC474D3C, 0x703BFA2C, 0x617DC0E7, 0xF26299D7, 0xC90FD835, 0x33B71C7B, 0x6D83E138, 0xCBB1BB14, 0x029CF5FF, 0x7CBD093D, 0x4C9825EF, 0x845C4D6D, 0x124349A5, 0x53942D21, 0x800E60DA, 0x2BA6EB7F, 0xCEBF30D3, 0xEB18D449, 0xE281F724, 0x58B1CB09, 0xD469A13D, 0x9C7495C3, 0xE53A7810, 0xA866C08E, 0x832A038B, 0xDDDCA484, 0xD5FE0DDE, 0x0756002B, 0x2FF51342, 0x60FEC9C8, 0x061A53E3, 0x47B1884E, 0xDC17E461, 0xA17A6A37, 0x3158E7E2, 0xA40D873B, 0x45AE2140, 0xC8F36149, 0x63A4EE2D, 0xD7107447, 0x6F90994F, 0x5006770F, 0xC1F3CA9A, 0x91B317B2, 0xF61B4406, 0xA8C9EE8F, 0xC6939B75, 0xB28BBC3B, 0x36BF4AEF, 0x3B12118D, 0x4D536ECF, 0x9CF4B46B, 0xE8AB1E03, 0x8225A360, 0x7AE4A130, 0xC4EE8B50, 0x50651797, 0x5BB4C59F, 0xD120EE47, 0x24F3A386, 0xBE579B45, 0x3A378EFC, 0xC5AB007B, 0x3668942B, 0x2DBDCC3A, 0x6F37F64C, 0xC24F862A, 0xB6F97FCF, 0x9E4FA23D, 0x551AE769, 0x46A8A5A6, 0xDC1BCFDD, 0x8F684CF9, 0x501D811B, 0x84279F80, 0x2614E0AC, 0x86445276, 0xAEA0CE71, 0x0812250F, 0xB586D18A, 0xC68D721B, 0x44514E1D, 0x37CDB99A, 0x24731F89, 0xFA72E589, 0x81E6EBA2, 0x15452965, 0x55523D9D, 0x2DC47E14, 0x2E7FA107, 0xA7790F23, 0x40EBFDBB, 0x77E7906B, 0x6C1DB960, 0x1A8B9898, 0x65FA0D90, 0xED28B4D8, 0x34C3ED75, 0x768FD2EC, 0xFAB60BCB, 0x962C75F4, 0x304F0498, 0x0A41A36B, 0xF7DE2A4A, 0xF4770FE2, 0x73C93BBB, 0xD21C82C5, 0x6C387447, 0x8CDB4CB9, 0x2CC243E8, 0x41859E3D, 0xB667B9CB, 0x89681E8A, 0x61A0526C, 0x883EDDDC, 0x539DE9A4, 0xC29E1DEC, 0x97C71EC5, 0x4A560A66, 0xBD7ECACF, 0x576AE998, 0x31CE5616, 0x97172A6C, 0x83D047C4, 0x274EA9A8, 0xEB31A9DA, 0x327209B5, 0x14D1F2CB, 0x00FE1D96, 0x817DBE08, 0xD3E55AED, 0xF2D30AFC, 0xFB072660, 0x866687D6, 0x92552EB9, 0xEA8219CD, 0xF7927269, 0xF1948483, 0x694C1DF5, 0xB7D8B7BF, 0xFFBC5D2F, 0x2E88B849, 0x883FD32B, 0xA0331192, 0x8CB244DF, 0x41FAF895, 0x16902220, 0x97FB512A, 0x2BEA3CC4, 0xAF9CAE61, 0x41ACD0D5, 0xFD2F28FF, 0xE780ADFA, 0xB3A3A76E, 0x7112AD87, 0x7C3D6058, 0x69E64FFF, 0xE5F8617C, 0x8580727C, 0x41F54F04, 0xD72BE498, 0x653D1795, 0x1275A327, 0x14B499D4, 0x4E34D553, 0x4687AA39, 0x68B64292, 0x5C18ABC3, 0x41EABFCC, 0x92A85616, 0x82684CF8, 0x5B9F8A4E, 0x35382FFE, 0xFB936318, 0x52C08E15, 0x80918B2E, 0x199EDEE0, 0xA9470163, 0xEC44ACDD, 0x612D6735, 0x8F88EA7D, 0x759F5EA4, 0xE5CC7240, 0x68CFEB8B, 0x04725601, 0x0C22C23E, 0x5BC97174, 0x89965841, 0x5D939479, 0x690F338A, 0x3C2D4380, 0xDAE97F2B }; // A hash for sequences of unsigned ints. class UintSeqHash { protected: unsigned int h; public: UintSeqHash() {h = 0; } UintSeqHash(const UintSeqHash &other) {h = other.h; } UintSeqHash& operator=(const UintSeqHash &other) {h = other.h; return *this; } /** Reset the hash value. */ void reset() {h = 0; } /** Add the unsigned int \a n to the sequence. */ void update(unsigned int i) { i++; while(i > 0) { h ^= rtab[i & 0xff]; const unsigned int b = (h & 0x80000000) >> 31; i = i >> 8; h = (h << 1) | b; } } /** Get the hash value of the sequence seen so far. */ unsigned int get_value() const {return h; } /** Compare the hash values of this and \a other. * Return -1/0/1 if the value of this is smaller/equal/greater than * that of \a other. */ int cmp(const UintSeqHash &other) const { return (h < other.h)?-1:((h == other.h)?0:1); } /** An abbreviation for cmp(other) < 0 */ bool is_lt(const UintSeqHash &other) const {return(cmp(other) < 0); } /** An abbreviation for cmp(other) <= 0 */ bool is_le(const UintSeqHash &other) const {return(cmp(other) <= 0); } /** An abbreviation for cmp(other) == 0 */ bool is_equal(const UintSeqHash &other) const {return(cmp(other) == 0); } }; } // namespace bliss #endif ================================================ FILE: external/bliss/bliss/utils.hh ================================================ #ifndef BLISS_UTILS_HH #define BLISS_UTILS_HH /* Copyright (c) 2003-2015 Tommi Junttila Released under the GNU Lesser General Public License version 3. This file is part of bliss. bliss is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, version 3 of the License. bliss is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with bliss. If not, see . */ /** * \file * \brief Some small utilities. * */ #include namespace bliss { /** * Print the permutation \a perm of {0,...,N-1} in the cycle format * in the file stream \a fp. * The amount \a offset is added to each element before printing, * e.g. the permutation (2 4) is printed as (3 5) when \a offset is 1. */ void print_permutation(FILE* fp, const unsigned int N, const unsigned int* perm, const unsigned int offset = 0); /** * Print the permutation \a perm of {0,...,N-1} in the cycle format * in the file stream \a fp. * The amount \a offset is added to each element before printing, * e.g. the permutation (2 4) is printed as (3 5) when \a offset is 1. */ void print_permutation(FILE* fp, const std::vector& perm, const unsigned int offset = 0); /** * Check whether \a perm is a valid permutation on {0,...,N-1}. * Slow, mainly for debugging and validation purposes. */ bool is_permutation(const unsigned int N, const unsigned int* perm); /** * Check whether \a perm is a valid permutation on {0,...,N-1}. * Slow, mainly for debugging and validation purposes. */ bool is_permutation(const std::vector& perm); } // namespace bliss #endif ================================================ FILE: inputs/CMakeLists.txt ================================================ file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/download.cmake "file(DOWNLOAD http://iss.oden.utexas.edu/projects/galois/downloads/small_inputs_for_lonestar_test.tar.gz ${CMAKE_CURRENT_BINARY_DIR}/lonestar-cpu-inputs.tar.gz SHOW_PROGRESS)") add_custom_command( OUTPUT lonestar-cpu-inputs.tar.gz COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_BINARY_DIR}/download.cmake ) add_custom_command( OUTPUT small_inputs COMMAND ${CMAKE_COMMAND} -E tar xJf lonestar-cpu-inputs.tar.gz DEPENDS lonestar-cpu-inputs.tar.gz WORKING_DIRECTORY ${PROJECT_BINARY_DIR}/inputs COMMENT "Unpacking lonestar-cpu-inputs.tar.gz" VERBATIM ) add_custom_target(input DEPENDS small_inputs) ================================================ FILE: inputs/cholesky/matrix1.txt ================================================ 4 0 0 2 0 1 0 7 2 0 0 3 0 2 3 0 0 0 2 0 0 6 3 0 0 0 0 3 7 3 1 3 0 0 3 11 ================================================ FILE: inputs/cholesky/matrix1.txt.choleskyedges ================================================ 0 0 2.000000 0 3 1.000000 0 5 0.500000 1 1 2.645751 1 2 0.755929 1 5 1.133893 2 2 1.558387 2 5 -0.550019 3 3 2.236068 3 4 1.341641 3 5 -0.223607 4 4 2.280351 4 5 1.447146 5 5 2.649063 ================================================ FILE: inputs/cholesky/matrix1.txt.dep ================================================ 0 1 2 3 4 5 ================================================ FILE: inputs/cholesky/matrix1.txt.filled ================================================ 0 0 4.000000 0 3 2.000000 0 5 1.000000 1 1 7.000000 1 2 2.000000 1 5 3.000000 2 2 3.000000 2 5 0.000000 3 3 6.000000 3 4 3.000000 3 5 0.000000 4 4 7.000000 4 5 3.000000 5 5 11.000000 ================================================ FILE: inputs/cholesky/very-sparse.txt ================================================ 576.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 256.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 256.0 0.0 0.0 1369.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 144.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 144.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2304.0 0.0 1200.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 324.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1200.0 0.0 641.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 9.0 0.0 0.0 256.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1040.0 ================================================ FILE: inputs/cholesky/very-sparse.txt.choleskyedges ================================================ 0 0 24.000000 1 1 16.000000 1 9 16.000000 2 2 37.000000 3 3 12.000000 4 4 12.000000 5 5 48.000000 5 7 25.000000 6 6 18.000000 7 7 4.000000 8 8 3.000000 9 9 28.000000 ================================================ FILE: inputs/cholesky/very-sparse.txt.dep ================================================ 0 1 2 3 4 5 6 7 8 9 ================================================ FILE: inputs/cholesky/very-sparse.txt.filled ================================================ 0 0 576.000000 1 1 256.000000 1 9 256.000000 2 2 1369.000000 3 3 144.000000 4 4 144.000000 5 5 2304.000000 5 7 1200.000000 6 6 324.000000 7 7 641.000000 8 8 9.000000 9 9 1040.000000 ================================================ FILE: libcusp/CMakeLists.txt ================================================ add_library(galois_cusp INTERFACE) add_library(Galois::cusp ALIAS galois_cusp) set_target_properties(galois_cusp PROPERTIES EXPORT_NAME cusp) add_dependencies(lib galois_cusp) target_include_directories(galois_cusp INTERFACE $ $ ) target_link_libraries(galois_cusp INTERFACE galois_dist_async) install( DIRECTORY include/ DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" COMPONENT dev FILES_MATCHING PATTERN "*.h" ) install(TARGETS galois_cusp EXPORT GaloisTargets LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT shlib ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT lib INCLUDES DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" ) ================================================ FILE: libcusp/include/galois/graphs/BasePolicies.h ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2019, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ /** * @file BasePolicies.h * * Header file that includes the base classes for defining CuSP partitioning * policies. */ #ifndef _GALOIS_CUSP_PSCAFFOLD_H_ #define _GALOIS_CUSP_PSCAFFOLD_H_ namespace galois { namespace graphs { /** * Default fields and functions all CuSP partitioners use; this is a class to * inherit from. */ class PartitioningScaffold { protected: uint32_t _hostID; //!< host ID of owner of this object uint32_t _numHosts; //!< total number of hosts uint64_t _numNodes; //!< number of nodes in graph uint64_t _numEdges; //!< number of edges in graph //! maps from host id to nodes that host as read from disk std::vector> _gid2host; public: /** * Constructor for Scaffold. * * @param hostID Host ID of caller * @param numHosts Total num hosts in execution * @param numNodes Total number of nodes in graph * @param numEdges Total number of edges in graph */ PartitioningScaffold(uint32_t hostID, uint32_t numHosts, uint64_t numNodes, uint64_t numEdges) : _hostID(hostID), _numHosts(numHosts), _numNodes(numNodes), _numEdges(numEdges) {} /** * Save a provided map from host to nodes a host has read into this object * * @param gid2host Map of hosts to read nodes to save */ void saveGIDToHost(std::vector>& gid2host) { _gid2host = gid2host; } }; /** * Policies that use the read assignment of nodes as the masters. Does not * need to go through a master assignment phase, saving overhead. */ class ReadMasterAssignment : public PartitioningScaffold { public: /** * Constructor simply calls parent constructor. */ ReadMasterAssignment(uint32_t hostID, uint32_t numHosts, uint64_t numNodes, uint64_t numEdges) : PartitioningScaffold(hostID, numHosts, numNodes, numEdges) {} /** * Returns the host ID of the host that read a particular node and its edges * from disk. * * @param gid GID of node to get master of * @returns Host ID of host that read the node specified by the GID. */ uint32_t retrieveMaster(uint32_t gid) const { for (auto h = 0U; h < _numHosts; ++h) { uint64_t start, end; std::tie(start, end) = _gid2host[h]; if (gid >= start && gid < end) { return h; } } assert(false); return _numHosts; } // below all unused if not assigning masters in default manner, but must be // defined or compiler complains /** * Returns false as this partitioning policy doesn't have a master assignment * phase. */ bool masterAssignPhase() const { return false; } /** * Does nothing as this policy doesn't have a master assignment phase */ void enterStage2() {} /** * Does nothing because this policy doesn't have a master assignment phase. * (uses read assignment) */ template uint32_t getMaster(uint32_t, galois::graphs::BufferedGraph&, const std::vector&, std::unordered_map&, const std::vector&, std::vector>&, const std::vector&, std::vector>&) { return 0; } /** * No-op because no master assignment phase. */ void saveGID2HostInfo(std::unordered_map&, std::vector&, uint64_t) {} /** * Technically doesn't nothing and should never be called because no master * assignment phase. */ bool addMasterMapping(uint32_t, uint32_t) { return false; } }; /** * Policies that use a custom assignment of masters (from the user). * Needs to go through a master assignment phase, which adds overhead * to partitioning, but may get better quality partitions. */ class CustomMasterAssignment : public PartitioningScaffold { protected: char _status; //!< Specifies what phase of master assignment partitioner is on //! Metadata for determining where a node's master is std::vector _localNodeToMaster; //! Map GID to its master std::unordered_map _gid2masters; //! This host's node offset (each host reads a distinct contiguous portion //! of graph uint64_t _nodeOffset; /** * Return the reader of a particular node. * @param gid GID of node to get reader of * @return Host reader of node passed in as param */ unsigned getHostReader(uint64_t gid) const { for (auto i = 0U; i < _numHosts; ++i) { uint64_t start, end; std::tie(start, end) = _gid2host[i]; if (gid >= start && gid < end) { return i; } } return -1; } public: //! Calls parent constructor to initialize common data CustomMasterAssignment(uint32_t hostID, uint32_t numHosts, uint64_t numNodes, uint64_t numEdges) : PartitioningScaffold(hostID, numHosts, numNodes, numEdges), _status(0) { } /** * Retrieves a saved master mapping: does not fail if a GID * mapping is not found but instead returns -1 if in stage 1, else * fails. * * @param gid GID to get master of * @returns Master of specified GID, -1, unsigned, if not found */ uint32_t retrieveMaster(uint32_t gid) const { if (_status != 0) { // use map if not a locally read node, else use vector if (getHostReader(gid) != _hostID) { auto gidMasterIter = _gid2masters.find(gid); // found in map if (gidMasterIter != _gid2masters.end()) { uint32_t mappedMaster = gidMasterIter->second; // galois::gDebug("[", _hostID, "] ", gid, " found with master ", // mappedMaster, "!"); // make sure host is in bounds assert(mappedMaster < _numHosts); return mappedMaster; } else { // NOT FOUND (not necessarily a bad thing, and required for // some cases) galois::gDebug("[", _hostID, "] ", gid, " not found!"); if (_status == 2) { // die if we expect all gids to be mapped already (stage 2) GALOIS_DIE("should not fail to find a GID after stage 2 " "of master assignment phase"); } return (uint32_t)-1; } } else { // determine offset uint32_t offsetIntoMap = gid - _nodeOffset; assert(offsetIntoMap != (uint32_t)-1); assert(offsetIntoMap < _localNodeToMaster.size()); return _localNodeToMaster[offsetIntoMap]; } } else { // stage 0 = this function shouldn't be called GALOIS_DIE("master setup incomplete"); return (uint32_t)-1; } } /** * Given gid to master mapping info, save it into a local map. * * @param gid2offsets Map a GID to an offset into a vector containing master * mapping information * @param localNodeToMaster Vector that represents the master mapping of * local nodes * @param nodeOffset First GID of nodes read by this host */ void saveGID2HostInfo(std::unordered_map& gid2offsets, std::vector& localNodeToMaster, uint64_t nodeOffset) { #ifndef NDEBUG size_t originalSize = _gid2masters.size(); #endif for (auto i = gid2offsets.begin(); i != gid2offsets.end(); i++) { assert(i->second < localNodeToMaster.size()); galois::gDebug("Map ", i->first, " to ", localNodeToMaster[i->second]); _gid2masters[i->first] = localNodeToMaster[i->second]; } assert(_gid2masters.size() == (originalSize + gid2offsets.size())); // get memory back gid2offsets.clear(); size_t myLocalNodes = _gid2host[_hostID].second - _gid2host[_hostID].first; assert((myLocalNodes + _gid2masters.size() - originalSize) == localNodeToMaster.size()); // copy over to this structure _localNodeToMaster = std::move(localNodeToMaster); assert(myLocalNodes <= _localNodeToMaster.size()); // resize to fit only this host's read nodes _localNodeToMaster.resize(myLocalNodes); _nodeOffset = nodeOffset; // stage 1 setup complete _status = 1; } //! Returns true as policies that inherit from this should define master //! assignment function bool masterAssignPhase() const { return true; } //! Shifts master assignment phase to stage 2. void enterStage2() { _status = 2; } /** * CuSP's "getMaster" function. * This function should be defined by user in child class to assign a node to * a host. * * @todo Consolidate metadata into single struct to clean up function. * @returns Host id in which to assing a node */ template uint32_t getMaster(uint32_t, galois::graphs::BufferedGraph&, const std::vector&, std::unordered_map&, const std::vector&, std::vector>&, const std::vector&, std::vector>&) { return (uint32_t)-1; } /** * Add a new master mapping to the local map: needs to be in stage 1 * * @param gid GID to map; should not be a GID read by this host (won't * cause problems, but would just be a waste of compute resouces) * @param mappedMaster master to map a GID to * @returns true if new mapping added; false if already existed in map */ bool addMasterMapping(uint32_t gid, uint32_t mappedMaster) { assert(mappedMaster < _numHosts); if (_status <= 1) { auto offsetIntoMapIter = _gid2masters.find(gid); if (offsetIntoMapIter == _gid2masters.end()) { // NOT FOUND galois::gDebug("[", _hostID, "] ", gid, " not found; mapping!"); _gid2masters[gid] = mappedMaster; return true; } else { // already mapped galois::gDebug("[", _hostID, "] ", gid, " already mapped with master ", offsetIntoMapIter->second, "!"); assert(offsetIntoMapIter->second == mappedMaster); return false; } } else { GALOIS_DIE("unexpected status in add master mapping: ", _status); return false; } } }; } // end namespace graphs } // end namespace galois #endif ================================================ FILE: libcusp/include/galois/graphs/CuSPPartitioner.h ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2019, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ /** * @file CuSPPartitioner.h * * Contains the main CuSP partitioning function. */ #ifndef _GALOIS_CUSP_ #define _GALOIS_CUSP_ #include "galois/DistGalois.h" #include "galois/graphs/DistributedGraph.h" #include "galois/graphs/NewGeneric.h" #include "galois/graphs/GenericPartitioners.h" namespace galois { //! Enum for the input/output format of the partitioner. enum CUSP_GRAPH_TYPE { CUSP_CSR, //!< Compressed sparse row graph format, i.e. outgoing edges CUSP_CSC //!< Compressed sparse column graph format, i.e. incoming edges }; template using DistGraphPtr = std::unique_ptr>; /** * Main CuSP function: partitions a graph on disk, one partition per host. * * @param graphFile Graph file to read in the Galois binary CSR format * @param inputType Specifies which input format (CSR or CSC) should be given * to the partitioner * @param outputType Specifies the output format (CSR or CSC) that each * partition will be created in * @param symmetricGraph This should be "true" if the passed in graphFile * is a symmetric graph * @param transposeGraphFile Transpose graph of graphFile in Galois binary * CSC format (i.e. give it the transpose version of graphFile). Ignore * this argument if the graph is symmetric. * @param masterBlockFile * @param cuspAsync Toggles asynchronous master assignment phase during * partitioning * @param cuspStateRounds Toggles number of rounds used to synchronize * partitioning state during master assignment phase * @param readPolicy Determines how each host should divide the reading * load of the graph on disk * @param nodeWeight When using a read policy that involves nodes and edges, * this argument assigns a weight to give each node. * @param edgeWeight When using a read policy that involves nodes and edges, * this argument assigns a weight to give each edge. * * @tparam PartitionPolicy Partitioning policy object that specifies the * placement of nodes/edges during partitioning. * @tparam NodeData Data structure to be created for each node in the graph * @tparam EdgeData Type of data to be stored on each edge. Currently * only guarantee support for void or uint32_t; all other types may cause * undefined behavior. * * @returns A local partition of the passed in graph as a DistributedGraph * * @todo Look into making void node data work in LargeArray for D-Galois; * void specialization. For now, use char as default type */ template DistGraphPtr cuspPartitionGraph(std::string graphFile, CUSP_GRAPH_TYPE inputType, CUSP_GRAPH_TYPE outputType, bool symmetricGraph = false, std::string transposeGraphFile = "", std::string masterBlockFile = "", bool cuspAsync = true, uint32_t cuspStateRounds = 100, galois::graphs::MASTERS_DISTRIBUTION readPolicy = galois::graphs::BALANCED_EDGES_OF_MASTERS, uint32_t nodeWeight = 0, uint32_t edgeWeight = 0) { auto& net = galois::runtime::getSystemNetworkInterface(); using DistGraphConstructor = galois::graphs::NewDistGraphGeneric; // TODO @todo bring back graph saving/reading functionality? if (!symmetricGraph) { // out edges or in edges std::string inputToUse; // depending on output type may need to transpose edges bool useTranspose; // see what input is specified if (inputType == CUSP_CSR) { inputToUse = graphFile; if (outputType == CUSP_CSR) { useTranspose = false; } else if (outputType == CUSP_CSC) { useTranspose = true; } else { GALOIS_DIE("CuSP output graph type is invalid"); } } else if (inputType == CUSP_CSC) { inputToUse = transposeGraphFile; if (outputType == CUSP_CSR) { useTranspose = true; } else if (outputType == CUSP_CSC) { useTranspose = false; } else { GALOIS_DIE("CuSP output graph type is invalid"); } } else { GALOIS_DIE("Invalid input graph type specified in CuSP partitioner"); } return std::make_unique( inputToUse, net.ID, net.Num, cuspAsync, cuspStateRounds, useTranspose, readPolicy, nodeWeight, edgeWeight, masterBlockFile); } else { // symmetric graph path: assume the passed in graphFile is a symmetric // graph; output is also symmetric return std::make_unique( graphFile, net.ID, net.Num, cuspAsync, cuspStateRounds, false, readPolicy, nodeWeight, edgeWeight, masterBlockFile); } } } // end namespace galois #endif ================================================ FILE: libcusp/include/galois/graphs/DistributedGraph.h ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ /** * @file DistributedGraph.h * * Contains the implementation for DistGraph. Command line argument definitions * are found in DistributedGraph.cpp. */ #ifndef _GALOIS_DIST_HGRAPH_H_ #define _GALOIS_DIST_HGRAPH_H_ #include #include #include "galois/graphs/LC_CSR_Graph.h" #include "galois/graphs/BufferedGraph.h" #include "galois/runtime/DistStats.h" #include "galois/graphs/OfflineGraph.h" #include "galois/DynamicBitset.h" /* * Headers for boost serialization */ namespace galois { namespace graphs { /** * Enums specifying how masters are to be distributed among hosts. */ enum MASTERS_DISTRIBUTION { //! balance nodes BALANCED_MASTERS, //! balance edges BALANCED_EDGES_OF_MASTERS, //! balance nodes and edges BALANCED_MASTERS_AND_EDGES }; /** * Base DistGraph class that all distributed graphs extend from. * * @tparam NodeTy type of node data for the graph * @tparam EdgeTy type of edge data for the graph */ template class DistGraph { private: //! Graph name used for printing things constexpr static const char* const GRNAME = "dGraph"; using GraphTy = galois::graphs::LC_CSR_Graph; // vector for determining range objects for master nodes + nodes // with edges (which includes masters) //! represents split of all nodes among threads to balance edges std::vector allNodesRanges; //! represents split of master nodes among threads to balance edges std::vector masterRanges; //! represents split of nodes with edges (includes masters) among threads to //! balance edges std::vector withEdgeRanges; //! represents split of all nodes among threads to balance in-edges std::vector allNodesRangesIn; //! represents split of master nodes among threads to balance in-edges std::vector masterRangesIn; using NodeRangeType = galois::runtime::SpecificRange>; //! Vector of ranges that stores the 3 different range objects that a user is //! able to access std::vector specificRanges; //! Like specificRanges, but for in edges std::vector specificRangesIn; protected: //! The internal graph used by DistGraph to represent the graph GraphTy graph; //! Marks if the graph is transposed or not. bool transposed; // global graph variables uint64_t numGlobalNodes; //!< Total nodes in the global unpartitioned graph. uint64_t numGlobalEdges; //!< Total edges in the global unpartitioned graph. uint32_t numNodes; //!< Num nodes in this graph in total uint64_t numEdges; //!< Num edges in this graph in total const unsigned id; //!< ID of the machine. const uint32_t numHosts; //!< Total number of machines // local graph // size() = Number of nodes created on this host (masters + mirrors) uint32_t numOwned; //!< Number of nodes owned (masters) by this host. //!< size() - numOwned = mirrors on this host uint32_t beginMaster; //!< Local id of the beginning of master nodes. //!< beginMaster + numOwned = local id of the end of //!< master nodes uint32_t numNodesWithEdges; //!< Number of nodes (masters + mirrors) that have //!< outgoing edges //! Information that converts host to range of nodes that host reads std::vector> gid2host; //! Mirror nodes from different hosts. For reduce std::vector> mirrorNodes; //! GID = localToGlobalVector[LID] std::vector localToGlobalVector; //! LID = globalToLocalMap[GID] std::unordered_map globalToLocalMap; //! Increments evilPhase, a phase counter used by communication. void inline increment_evilPhase() { ++galois::runtime::evilPhase; if (galois::runtime::evilPhase >= static_cast( std::numeric_limits::max())) { // limit defined by MPI or // LCI galois::runtime::evilPhase = 1; } } //! Returns evilPhase + 1, handling loop around as necessary unsigned inline evilPhasePlus1() { unsigned result = galois::runtime::evilPhase + 1; // limit defined by MPI or LCI if (result >= uint32_t{std::numeric_limits::max()}) { return 1; } return result; } //! used to sort edges in the sort edges function template struct IdLess { bool operator()(const galois::graphs::EdgeSortValue& e1, const galois::graphs::EdgeSortValue& e2) const { return e1.dst < e2.dst; } }; private: /** * Given an OfflineGraph, compute the masters for each node by * evenly (or unevenly as specified by scale factor) * blocking the nodes off to assign to each host. Considers * ONLY nodes and not edges. * * @param g The offline graph which has loaded the graph you want * to get the masters for * @param scalefactor A vector that specifies if a particular host * should have more or less than other hosts * @param DecomposeFactor Specifies how decomposed the blocking * of nodes should be. For example, a factor of 2 will make 2 blocks * out of 1 block had the decompose factor been set to 1. */ void computeMastersBlockedNodes(galois::graphs::OfflineGraph& g, const std::vector& scalefactor, unsigned DecomposeFactor = 1) { uint64_t numNodes_to_divide = g.size(); if (scalefactor.empty() || (numHosts * DecomposeFactor == 1)) { for (unsigned i = 0; i < numHosts * DecomposeFactor; ++i) gid2host.push_back(galois::block_range(uint64_t{0}, numNodes_to_divide, i, numHosts * DecomposeFactor)); return; } // TODO: not compatible with DecomposeFactor. assert(scalefactor.size() == numHosts); unsigned numBlocks = 0; for (unsigned i = 0; i < numHosts; ++i) { numBlocks += scalefactor[i]; } std::vector> blocks; for (unsigned i = 0; i < numBlocks; ++i) { blocks.push_back( galois::block_range(uint64_t{0}, numNodes_to_divide, i, numBlocks)); } std::vector prefixSums; prefixSums.push_back(0); for (unsigned i = 1; i < numHosts; ++i) { prefixSums.push_back(prefixSums[i - 1] + scalefactor[i - 1]); } for (unsigned i = 0; i < numHosts; ++i) { unsigned firstBlock = prefixSums[i]; unsigned lastBlock = prefixSums[i] + scalefactor[i] - 1; gid2host.push_back( std::make_pair(blocks[firstBlock].first, blocks[lastBlock].second)); } } /** * Given an OfflineGraph, compute the masters for each node by * evenly (or unevenly as specified by scale factor) * blocking the nodes off to assign to each host while taking * into consideration the only edges of the node to get * even blocks. * * @param g The offline graph which has loaded the graph you want * to get the masters for * @param scalefactor A vector that specifies if a particular host * should have more or less than other hosts * @param DecomposeFactor Specifies how decomposed the blocking * of nodes should be. For example, a factor of 2 will make 2 blocks * out of 1 block had the decompose factor been set to 1. */ void computeMastersBalancedEdges(galois::graphs::OfflineGraph& g, const std::vector& scalefactor, uint32_t edgeWeight, unsigned DecomposeFactor = 1) { if (edgeWeight == 0) { edgeWeight = 1; } auto& net = galois::runtime::getSystemNetworkInterface(); gid2host.resize(numHosts * DecomposeFactor); for (unsigned d = 0; d < DecomposeFactor; ++d) { auto r = g.divideByNode(0, edgeWeight, (id + d * numHosts), numHosts * DecomposeFactor, scalefactor); gid2host[id + d * numHosts].first = *(r.first.first); gid2host[id + d * numHosts].second = *(r.first.second); } for (unsigned h = 0; h < numHosts; ++h) { if (h == id) { continue; } galois::runtime::SendBuffer b; for (unsigned d = 0; d < DecomposeFactor; ++d) { galois::runtime::gSerialize(b, gid2host[id + d * numHosts]); } net.sendTagged(h, galois::runtime::evilPhase, b); } net.flush(); unsigned received = 1; while (received < numHosts) { decltype(net.recieveTagged(galois::runtime::evilPhase, nullptr)) p; do { p = net.recieveTagged(galois::runtime::evilPhase, nullptr); } while (!p); assert(p->first != id); auto& b = p->second; for (unsigned d = 0; d < DecomposeFactor; ++d) { galois::runtime::gDeserialize(b, gid2host[p->first + d * numHosts]); } ++received; } increment_evilPhase(); #ifndef NDEBUG for (unsigned h = 0; h < numHosts; h++) { if (h == 0) { assert(gid2host[h].first == 0); } else if (h == numHosts - 1) { assert(gid2host[h].first == gid2host[h - 1].second); assert(gid2host[h].second == g.size()); } else { assert(gid2host[h].first == gid2host[h - 1].second); assert(gid2host[h].second == gid2host[h + 1].first); } } #endif } /** * Given an OfflineGraph, compute the masters for each node by * evenly (or unevenly as specified by scale factor) * blocking the nodes off to assign to each host while taking * into consideration the edges of the node AND the node itself. * * @param g The offline graph which has loaded the graph you want * to get the masters for * @param scalefactor A vector that specifies if a particular host * should have more or less than other hosts * @param DecomposeFactor Specifies how decomposed the blocking * of nodes should be. For example, a factor of 2 will make 2 blocks * out of 1 block had the decompose factor been set to 1. Ignored * in this function currently. * * @todo make this function work with decompose factor */ void computeMastersBalancedNodesAndEdges( galois::graphs::OfflineGraph& g, const std::vector& scalefactor, uint32_t nodeWeight, uint32_t edgeWeight, unsigned) { if (nodeWeight == 0) { nodeWeight = g.sizeEdges() / g.size(); // average degree } if (edgeWeight == 0) { edgeWeight = 1; } auto& net = galois::runtime::getSystemNetworkInterface(); gid2host.resize(numHosts); auto r = g.divideByNode(nodeWeight, edgeWeight, id, numHosts, scalefactor); gid2host[id].first = *r.first.first; gid2host[id].second = *r.first.second; for (unsigned h = 0; h < numHosts; ++h) { if (h == id) continue; galois::runtime::SendBuffer b; galois::runtime::gSerialize(b, gid2host[id]); net.sendTagged(h, galois::runtime::evilPhase, b); } net.flush(); unsigned received = 1; while (received < numHosts) { decltype(net.recieveTagged(galois::runtime::evilPhase, nullptr)) p; do { p = net.recieveTagged(galois::runtime::evilPhase, nullptr); } while (!p); assert(p->first != id); auto& b = p->second; galois::runtime::gDeserialize(b, gid2host[p->first]); ++received; } increment_evilPhase(); } protected: /** * Wrapper call that will call into more specific compute masters * functions that compute masters based on nodes, edges, or both. * * @param masters_distribution method of masters distribution to use * @param g The offline graph which has loaded the graph you want * to get the masters for * @param scalefactor A vector that specifies if a particular host * should have more or less than other hosts * @param nodeWeight weight to give nodes when computing balance * @param edgeWeight weight to give edges when computing balance * @param DecomposeFactor Specifies how decomposed the blocking * of nodes should be. For example, a factor of 2 will make 2 blocks * out of 1 block had the decompose factor been set to 1. */ uint64_t computeMasters(MASTERS_DISTRIBUTION masters_distribution, galois::graphs::OfflineGraph& g, const std::vector& scalefactor, uint32_t nodeWeight = 0, uint32_t edgeWeight = 0, unsigned DecomposeFactor = 1) { galois::Timer timer; timer.start(); g.reset_seek_counters(); uint64_t numNodes_to_divide = g.size(); // compute masters for all nodes switch (masters_distribution) { case BALANCED_MASTERS: computeMastersBlockedNodes(g, scalefactor, DecomposeFactor); break; case BALANCED_MASTERS_AND_EDGES: computeMastersBalancedNodesAndEdges(g, scalefactor, nodeWeight, edgeWeight, DecomposeFactor); break; case BALANCED_EDGES_OF_MASTERS: default: computeMastersBalancedEdges(g, scalefactor, edgeWeight, DecomposeFactor); break; } timer.stop(); galois::runtime::reportStatCond_Tmax( GRNAME, "MasterDistTime", timer.get()); galois::gPrint( "[", id, "] Master distribution time : ", timer.get_usec() / 1000000.0f, " seconds to read ", g.num_bytes_read(), " bytes in ", g.num_seeks(), " seeks (", g.num_bytes_read() / (float)timer.get_usec(), " MBPS)\n"); return numNodes_to_divide; } //! reader assignment from a file //! corresponds to master assignment if using an edge cut void readersFromFile(galois::graphs::OfflineGraph& g, std::string filename) { // read file lines std::ifstream mappings(filename); std::string curLine; unsigned timesToRead = id + 1; for (unsigned i = 0; i < timesToRead; i++) { std::getline(mappings, curLine); } std::vector modifyLine(curLine.begin(), curLine.end()); char* tokenizedString = modifyLine.data(); char* token; token = strtok(tokenizedString, " "); // loop 6 more times for (unsigned i = 0; i < 6; i++) { token = strtok(NULL, " "); } std::string left(token); // 3 more times for right for (unsigned i = 0; i < 3; i++) { token = strtok(NULL, " "); } std::string right(token); gid2host.resize(numHosts); gid2host[id].first = std::stoul(left); gid2host[id].second = std::stoul(right) + 1; galois::gPrint("[", id, "] Left: ", gid2host[id].first, ", Right: ", gid2host[id].second, "\n"); ///////////////////////// // send/recv from other hosts ///////////////////////// auto& net = galois::runtime::getSystemNetworkInterface(); for (unsigned h = 0; h < numHosts; ++h) { if (h == id) continue; galois::runtime::SendBuffer b; galois::runtime::gSerialize(b, gid2host[id]); net.sendTagged(h, galois::runtime::evilPhase, b); } net.flush(); unsigned received = 1; while (received < numHosts) { decltype(net.recieveTagged(galois::runtime::evilPhase, nullptr)) p; do { p = net.recieveTagged(galois::runtime::evilPhase, nullptr); } while (!p); assert(p->first != id); auto& b = p->second; galois::runtime::gDeserialize(b, gid2host[p->first]); ++received; } increment_evilPhase(); // sanity checking assignment for (unsigned h = 0; h < numHosts; h++) { if (h == 0) { GALOIS_ASSERT(gid2host[h].first == 0); } else if (h == numHosts - 1) { GALOIS_ASSERT(gid2host[h].first == gid2host[h - 1].second, gid2host[h].first, " ", gid2host[h - 1].second); GALOIS_ASSERT(gid2host[h].second == g.size(), gid2host[h].second, " ", g.size()); } else { GALOIS_ASSERT(gid2host[h].first == gid2host[h - 1].second, gid2host[h].first, " ", gid2host[h - 1].second); GALOIS_ASSERT(gid2host[h].second == gid2host[h + 1].first, gid2host[h].second, " ", gid2host[h + 1].first); } } } uint32_t G2L(uint64_t gid) const { assert(isLocal(gid)); return globalToLocalMap.at(gid); } uint64_t L2G(uint32_t lid) const { return localToGlobalVector[lid]; } public: //! Type representing a node in this graph using GraphNode = typename GraphTy::GraphNode; //! Expose EdgeTy to other classes using EdgeType = EdgeTy; //! iterator type over nodes using iterator = typename GraphTy::iterator; //! constant iterator type over nodes using const_iterator = typename GraphTy::const_iterator; //! iterator type over edges using edge_iterator = typename GraphTy::edge_iterator; /** * Constructor for DistGraph. Initializes metadata fields. * * @param host host number that this graph resides on * @param numHosts total number of hosts in the currently executing program */ DistGraph(unsigned host, unsigned numHosts) : transposed(false), id(host), numHosts(numHosts) { mirrorNodes.resize(numHosts); numGlobalNodes = 0; numGlobalEdges = 0; } /** * Return a vector of pairs denoting mirror node ranges. * * Assumes all mirror nodes occur after the masters: this invariant should be * held by CuSP. */ std::vector> getMirrorRanges() const { std::vector> mirrorRangesVector; // order of nodes locally is masters, outgoing mirrors, incoming mirrors, // so just get from numOwned to end if (numOwned != numNodes) { assert(numOwned < numNodes); mirrorRangesVector.push_back(std::make_pair(numOwned, numNodes)); } return mirrorRangesVector; } std::vector>& getMirrorNodes() { return mirrorNodes; } private: virtual unsigned getHostIDImpl(uint64_t) const = 0; virtual bool isOwnedImpl(uint64_t) const = 0; virtual bool isLocalImpl(uint64_t) const = 0; virtual bool isVertexCutImpl() const = 0; virtual std::pair cartesianGridImpl() const { return std::make_pair(0u, 0u); } public: virtual ~DistGraph() {} //! Determines which host has the master for a particular node //! @returns Host id of node in question inline unsigned getHostID(uint64_t gid) const { return getHostIDImpl(gid); } //! Determine if a node has a master on this host. //! @returns True if passed in global id has a master on this host inline bool isOwned(uint64_t gid) const { return isOwnedImpl(gid); } //! Determine if a node has a proxy on this host //! @returns True if passed in global id has a proxy on this host inline bool isLocal(uint64_t gid) const { return isLocalImpl(gid); } /** * Returns true if current partition is a vertex cut * @returns true if partition being stored in this graph is a vertex cut */ inline bool is_vertex_cut() const { return isVertexCutImpl(); } /** * Returns Cartesian split (if it exists, else returns pair of 0s */ inline std::pair cartesianGrid() const { return cartesianGridImpl(); } bool isTransposed() { return transposed; } /** * Converts a local node id into a global node id * * @param nodeID local node id * @returns global node id corresponding to the local one */ inline uint64_t getGID(const uint32_t nodeID) const { return L2G(nodeID); } /** * Converts a global node id into a local node id * * @param nodeID global node id * @returns local node id corresponding to the global one */ inline uint32_t getLID(const uint64_t nodeID) const { return G2L(nodeID); } /** * Get data of a node. * * @param N node to get the data of * @param mflag access flag for node data * @returns A node data object */ inline typename GraphTy::node_data_reference getData(GraphNode N, galois::MethodFlag mflag = galois::MethodFlag::UNPROTECTED) { auto& r = graph.getData(N, mflag); return r; } /** * Get the edge data for a particular edge in the graph. * * @param ni edge to get the data of * @param mflag access flag for edge data * @returns The edge data for the requested edge */ inline typename GraphTy::edge_data_reference getEdgeData(edge_iterator ni, galois::MethodFlag mflag = galois::MethodFlag::UNPROTECTED) { auto& r = graph.getEdgeData(ni, mflag); return r; } /** * Gets edge destination of edge ni. * * @param ni edge id to get destination of * @returns Local ID of destination of edge ni */ GraphNode getEdgeDst(edge_iterator ni) { return graph.getEdgeDst(ni); } /** * Gets the first edge of some node. * * @param N node to get the edge of * @returns iterator to first edge of N */ inline edge_iterator edge_begin(GraphNode N) { return graph.edge_begin(N, galois::MethodFlag::UNPROTECTED); } /** * Gets the end edge boundary of some node. * * @param N node to get the edge of * @returns iterator to the end of the edges of node N, i.e. the first edge * of the next node (or an "end" iterator if there is no next node) */ inline edge_iterator edge_end(GraphNode N) { return graph.edge_end(N, galois::MethodFlag::UNPROTECTED); } /** * Returns an iterable object over the edges of a particular node in the * graph. * * @param N node to get edges iterator over */ inline galois::runtime::iterable> edges(GraphNode N) { return galois::graphs::internal::make_no_deref_range(edge_begin(N), edge_end(N)); } /** * Gets number of nodes on this (local) graph. * * @returns number of nodes present in this (local) graph */ inline size_t size() const { return graph.size(); } /** * Gets number of edges on this (local) graph. * * @returns number of edges present in this (local) graph */ inline size_t sizeEdges() const { return graph.sizeEdges(); } /** * Gets number of nodes on this (local) graph. * * @returns number of nodes present in this (local) graph */ inline size_t numMasters() const { return numOwned; } /** * Gets number of nodes with edges (may include nodes without edges) * on this (local) graph. * * @returns number of nodes with edges (may include nodes without edges * as it measures a contiguous range) */ inline size_t getNumNodesWithEdges() const { return numNodesWithEdges; } /** * Gets number of nodes on the global unpartitioned graph. * * @returns number of nodes present in the global unpartitioned graph */ inline size_t globalSize() const { return numGlobalNodes; } /** * Gets number of edges on the global unpartitioned graph. * * @returns number of edges present in the global unpartitioned graph */ inline size_t globalSizeEdges() const { return numGlobalEdges; } /** * Returns a range object that encapsulates all nodes of the graph. * * @returns A range object that contains all the nodes in this graph */ inline const NodeRangeType& allNodesRange() const { assert(specificRanges.size() == 3); return specificRanges[0]; } /** * Returns a range object that encapsulates only master nodes in this * graph. * * @returns A range object that contains the master nodes in this graph */ inline const NodeRangeType& masterNodesRange() const { assert(specificRanges.size() == 3); return specificRanges[1]; } /** * Returns a range object that encapsulates master nodes and nodes * with edges in this graph. * * @returns A range object that contains the master nodes and the nodes * with outgoing edges in this graph */ inline const NodeRangeType& allNodesWithEdgesRange() const { assert(specificRanges.size() == 3); return specificRanges[2]; } /** * Returns a vector object that contains the global IDs (in order) of * the master nodes in this graph. * * @returns A vector object that contains the global IDs (in order) of * the master nodes in this graph */ std::vector getMasterGlobalIDs() { std::vector IDs; IDs.reserve(numMasters()); for (auto node : masterNodesRange()) { IDs.push_back(getGID(node)); } return IDs; } protected: /** * Uses a pre-computed prefix sum to determine division of nodes among * threads. * * The call uses binary search to determine the ranges. */ inline void determineThreadRanges() { allNodesRanges = galois::graphs::determineUnitRangesFromPrefixSum( galois::runtime::activeThreads, graph.getEdgePrefixSum()); } /** * Determines the thread ranges for master nodes only and saves them to * the object. * * Only call after graph is constructed + only call once */ inline void determineThreadRangesMaster() { // make sure this hasn't been called before assert(masterRanges.size() == 0); // first check if we even need to do any work; if already calculated, // use already calculated vector if (beginMaster == 0 && (beginMaster + numOwned) == size()) { masterRanges = allNodesRanges; } else if (beginMaster == 0 && (beginMaster + numOwned) == numNodesWithEdges && withEdgeRanges.size() != 0) { masterRanges = withEdgeRanges; } else { galois::gDebug("Manually det. master thread ranges"); masterRanges = galois::graphs::determineUnitRangesFromGraph( graph, galois::runtime::activeThreads, beginMaster, beginMaster + numOwned, 0); } } /** * Determines the thread ranges for nodes with edges only and saves them to * the object. * * Only call after graph is constructed + only call once */ inline void determineThreadRangesWithEdges() { // make sure not called before assert(withEdgeRanges.size() == 0); // first check if we even need to do any work; if already calculated, // use already calculated vector if (numNodesWithEdges == size()) { withEdgeRanges = allNodesRanges; } else if (beginMaster == 0 && (beginMaster + numOwned) == numNodesWithEdges && masterRanges.size() != 0) { withEdgeRanges = masterRanges; } else { galois::gDebug("Manually det. with edges thread ranges"); withEdgeRanges = galois::graphs::determineUnitRangesFromGraph( graph, galois::runtime::activeThreads, 0, numNodesWithEdges, 0); } } /** * Initializes the 3 range objects that a user can access to iterate * over the graph in different ways. */ void initializeSpecificRanges() { assert(specificRanges.size() == 0); // TODO/FIXME assertion likely not safe if a host gets no nodes // make sure the thread ranges have already been calculated // for the 3 ranges assert(allNodesRanges.size() != 0); assert(masterRanges.size() != 0); assert(withEdgeRanges.size() != 0); // 0 is all nodes specificRanges.push_back(galois::runtime::makeSpecificRange( boost::counting_iterator(0), boost::counting_iterator(size()), allNodesRanges.data())); // 1 is master nodes specificRanges.push_back(galois::runtime::makeSpecificRange( boost::counting_iterator(beginMaster), boost::counting_iterator(beginMaster + numOwned), masterRanges.data())); // 2 is with edge nodes specificRanges.push_back(galois::runtime::makeSpecificRange( boost::counting_iterator(0), boost::counting_iterator(numNodesWithEdges), withEdgeRanges.data())); assert(specificRanges.size() == 3); } /** * Specific range editor: makes the range for edges equivalent to the range * for masters. */ void edgesEqualMasters() { specificRanges[2] = specificRanges[1]; } public: /** * Write the local LC_CSR graph to the file on a disk. * * @todo revive this */ void save_local_graph_to_file(std::string) { GALOIS_DIE("not implemented"); } /** * Read the local LC_CSR graph from the file on a disk. * * @todo revive this */ void read_local_graph_from_file(std::string) { GALOIS_DIE("not implemented"); } /** * Deallocates underlying LC CSR Graph */ void deallocate() { galois::gDebug("Deallocating CSR in DistGraph"); graph.deallocate(); } /** * Sort the underlying LC_CSR_Graph by ID (destinations) * It sorts edges of the nodes by destination. */ void sortEdgesByDestination() { using GN = typename GraphTy::GraphNode; galois::do_all( galois::iterate(graph), [&](GN n) { graph.sortEdges(n, IdLess()); }, galois::no_stats(), galois::loopname("CSREdgeSort"), galois::steal()); } }; template constexpr const char* const galois::graphs::DistGraph::GRNAME; } // end namespace graphs } // end namespace galois #endif //_GALOIS_DIST_HGRAPH_H ================================================ FILE: libcusp/include/galois/graphs/GenericPartitioners.h ================================================ #ifndef _GALOIS_DIST_GENERICPARTS_H #define _GALOIS_DIST_GENERICPARTS_H #include "DistributedGraph.h" #include "BasePolicies.h" #include #include #include class NoCommunication : public galois::graphs::ReadMasterAssignment { public: NoCommunication(uint32_t, uint32_t numHosts, uint64_t, uint64_t) : galois::graphs::ReadMasterAssignment(0, numHosts, 0, 0) {} uint32_t getEdgeOwner(uint32_t src, uint32_t, uint64_t) const { return retrieveMaster(src); } bool noCommunication() { return true; } bool isVertexCut() const { return false; } void serializePartition(boost::archive::binary_oarchive&) {} void deserializePartition(boost::archive::binary_iarchive&) {} std::pair cartesianGrid() { return std::make_pair(0u, 0u); } }; /** */ class MiningPolicyNaive : public galois::graphs::ReadMasterAssignment { public: MiningPolicyNaive(uint32_t, uint32_t numHosts, uint64_t, uint64_t, std::vector&) : galois::graphs::ReadMasterAssignment(0, numHosts, 0, 0) {} static bool needNodeDegrees() { return false; } bool keepEdge(uint32_t src, uint32_t dst) const { return src < dst; } }; class MiningPolicyDegrees : public galois::graphs::ReadMasterAssignment { std::vector& ndegrees; public: MiningPolicyDegrees(uint32_t, uint32_t numHosts, uint64_t, uint64_t, std::vector& _ndeg) : galois::graphs::ReadMasterAssignment(0, numHosts, 0, 0), ndegrees(_ndeg) {} static bool needNodeDegrees() { return true; } bool keepEdge(uint32_t src, uint32_t dst) const { uint64_t sourceDegree = ndegrees[src]; uint64_t destDegree = ndegrees[dst]; if ((destDegree > sourceDegree) || ((destDegree == sourceDegree) && (src < dst))) { return true; } else { return false; } } }; //////////////////////////////////////////////////////////////////////////////// class GenericCVC : public galois::graphs::ReadMasterAssignment { unsigned numRowHosts; unsigned numColumnHosts; unsigned _h_offset; void factorizeHosts() { numColumnHosts = sqrt(_numHosts); while ((_numHosts % numColumnHosts) != 0) numColumnHosts--; numRowHosts = _numHosts / numColumnHosts; assert(numRowHosts >= numColumnHosts); // if (moreColumnHosts) { // std::swap(numRowHosts, numColumnHosts); //} if (_hostID == 0) { galois::gPrint("Cartesian grid: ", numRowHosts, " x ", numColumnHosts, "\n"); } } //! Returns the grid row ID of this host unsigned gridRowID() const { return (_hostID / numColumnHosts); } //! Returns the grid row ID of the specified host unsigned gridRowID(unsigned id) const { return (id / numColumnHosts); } //! Returns the grid column ID of this host unsigned gridColumnID() const { return (_hostID % numColumnHosts); } //! Returns the grid column ID of the specified host unsigned gridColumnID(unsigned id) const { return (id % numColumnHosts); } //! Find the column of a particular node unsigned getColumnOfNode(uint64_t gid) const { return gridColumnID(retrieveMaster(gid)); } public: GenericCVC(uint32_t hostID, uint32_t numHosts, uint64_t numNodes, uint64_t numEdges) : galois::graphs::ReadMasterAssignment(hostID, numHosts, numNodes, numEdges) { factorizeHosts(); _h_offset = gridRowID() * numColumnHosts; } uint32_t getEdgeOwner(uint32_t, uint32_t dst, uint64_t) const { int i = getColumnOfNode(dst); return _h_offset + i; } bool noCommunication() { return false; } bool isVertexCut() const { if ((numRowHosts == 1) || (numColumnHosts == 1)) return false; return true; } void serializePartition(boost::archive::binary_oarchive& ar) { ar << numRowHosts; ar << numColumnHosts; } void deserializePartition(boost::archive::binary_iarchive& ar) { ar >> numRowHosts; ar >> numColumnHosts; } std::pair cartesianGrid() { return std::make_pair(numRowHosts, numColumnHosts); } }; //////////////////////////////////////////////////////////////////////////////// // same as above, except columns are flipped (changes behavior of vertex cut // call as well) class GenericCVCColumnFlip : public galois::graphs::ReadMasterAssignment { unsigned numRowHosts; unsigned numColumnHosts; unsigned _h_offset; void factorizeHosts() { numColumnHosts = sqrt(_numHosts); while ((_numHosts % numColumnHosts) != 0) numColumnHosts--; numRowHosts = _numHosts / numColumnHosts; assert(numRowHosts >= numColumnHosts); // column flip std::swap(numRowHosts, numColumnHosts); if (_hostID == 0) { galois::gPrint("Cartesian grid: ", numRowHosts, " x ", numColumnHosts, "\n"); } } //! Returns the grid row ID of this host unsigned gridRowID() const { return (_hostID / numColumnHosts); } //! Returns the grid row ID of the specified host unsigned gridRowID(unsigned id) const { return (id / numColumnHosts); } //! Returns the grid column ID of this host unsigned gridColumnID() const { return (_hostID % numColumnHosts); } //! Returns the grid column ID of the specified host unsigned gridColumnID(unsigned id) const { return (id % numColumnHosts); } //! Find the column of a particular node unsigned getColumnOfNode(uint64_t gid) const { return gridColumnID(retrieveMaster(gid)); } public: GenericCVCColumnFlip(uint32_t hostID, uint32_t numHosts, uint64_t numNodes, uint64_t numEdges) : galois::graphs::ReadMasterAssignment(hostID, numHosts, numNodes, numEdges) { factorizeHosts(); _h_offset = gridRowID() * numColumnHosts; } uint32_t getEdgeOwner(uint32_t, uint32_t dst, uint64_t) const { int i = getColumnOfNode(dst); return _h_offset + i; } bool noCommunication() { return false; } bool isVertexCut() const { if ((numRowHosts == 1) && (numColumnHosts == 1)) return false; return true; } void serializePartition(boost::archive::binary_oarchive& ar) { ar << numRowHosts; ar << numColumnHosts; } void deserializePartition(boost::archive::binary_iarchive& ar) { ar >> numRowHosts; ar >> numColumnHosts; } std::pair cartesianGrid() { return std::make_pair(numRowHosts, numColumnHosts); } }; //////////////////////////////////////////////////////////////////////////////// class GenericHVC : public galois::graphs::ReadMasterAssignment { uint32_t _vCutThreshold; public: GenericHVC(uint32_t hostID, uint32_t numHosts, uint64_t numNodes, uint64_t numEdges) : galois::graphs::ReadMasterAssignment(hostID, numHosts, numNodes, numEdges) { _vCutThreshold = 1000; // can be changed, but default seems to be 1000 } uint32_t getEdgeOwner(uint32_t src, uint32_t dst, uint64_t numEdges) const { if (numEdges > _vCutThreshold) { return retrieveMaster(dst); } else { return retrieveMaster(src); } } bool noCommunication() { return false; } // TODO I should be able to make this runtime detectable bool isVertexCut() const { return true; } void serializePartition(boost::archive::binary_oarchive&) {} void deserializePartition(boost::archive::binary_iarchive&) {} std::pair cartesianGrid() { return std::make_pair(0u, 0u); } }; //////////////////////////////////////////////////////////////////////////////// class GingerP : public galois::graphs::CustomMasterAssignment { // used in hybrid cut uint32_t _vCutThreshold; // ginger scoring constants double _gamma; double _alpha; // ginger node/edge ratio double _neRatio; /** * Returns Ginger's composite balance parameter for a given host */ double getCompositeBalanceParam( unsigned host, const std::vector& nodeLoads, const std::vector>& nodeAccum, const std::vector& edgeLoads, const std::vector>& edgeAccum) { // get node/edge loads uint64_t hostNodeLoad = nodeLoads[host] + nodeAccum[host].load(); uint64_t hostEdgeLoad = edgeLoads[host] + edgeAccum[host].load(); return (hostNodeLoad + (_neRatio * hostEdgeLoad)) / 2; } /** * Use FENNEL balance equation to get a score value for partition * scoring */ double getFennelBalanceScore(double param) { return _alpha * _gamma * pow(param, _gamma - 1); } public: GingerP(uint32_t hostID, uint32_t numHosts, uint64_t numNodes, uint64_t numEdges) : galois::graphs::CustomMasterAssignment(hostID, numHosts, numNodes, numEdges) { _vCutThreshold = 1000; _gamma = 1.5; _alpha = numEdges * pow(numHosts, _gamma - 1.0) / pow(numNodes, _gamma); _neRatio = (double)numNodes / (double)numEdges; } template uint32_t getMaster(uint32_t src, galois::graphs::BufferedGraph& bufGraph, const std::vector& localNodeToMaster, std::unordered_map& gid2offsets, const std::vector& nodeLoads, std::vector>& nodeAccum, const std::vector& edgeLoads, std::vector>& edgeAccum) { auto ii = bufGraph.edgeBegin(src); auto ee = bufGraph.edgeEnd(src); // number of edges uint64_t ne = std::distance(ii, ee); // high in-degree nodes masters stay the same if (ne > _vCutThreshold) { return _hostID; } else { // low in degree masters move based on augmented FENNEL scoring metric // initialize array to hold scores galois::PODResizeableArray scores; scores.resize(_numHosts); for (unsigned i = 0; i < _numHosts; i++) { scores[i] = 0.0; } for (; ii < ee; ++ii) { uint64_t dst = bufGraph.edgeDestination(*ii); size_t offsetIntoMap = (unsigned)-1; auto it = gid2offsets.find(dst); if (it != gid2offsets.end()) { offsetIntoMap = it->second; } else { // determine offset offsetIntoMap = dst - bufGraph.getNodeOffset(); } assert(offsetIntoMap != (unsigned)-1); assert(offsetIntoMap < localNodeToMaster.size()); unsigned currentAssignment = localNodeToMaster[offsetIntoMap]; if (currentAssignment != (unsigned)-1) { scores[currentAssignment] += 1.0; } else { galois::gDebug("[", _hostID, "] ", dst, " unassigned"); } } // subtraction of the composite balance term for (unsigned i = 0; i < _numHosts; i++) { scores[i] -= getFennelBalanceScore(getCompositeBalanceParam( i, nodeLoads, nodeAccum, edgeLoads, edgeAccum)); } unsigned bestHost = -1; double bestScore = std::numeric_limits::lowest(); // find max score for (unsigned i = 0; i < _numHosts; i++) { if (scores[i] >= bestScore) { // galois::gDebug("best score ", bestScore, " beaten by ", scores[i]); bestScore = scores[i]; bestHost = i; } } galois::gDebug("[", _hostID, "] ", src, " assigned to ", bestHost, " with num edge ", ne); // update metadata; TODO make this a nicer interface galois::atomicAdd(nodeAccum[bestHost], (uint64_t)1); galois::atomicAdd(edgeAccum[bestHost], ne); return bestHost; } } uint32_t getEdgeOwner(uint32_t src, uint32_t dst, uint64_t numEdges) const { // if high indegree, then move to source (which is dst), else stay on // dst (which is src) // note "dst" here is actually the source on the actual graph // since we're reading transpose if (numEdges > _vCutThreshold) { return retrieveMaster(dst); } else { return retrieveMaster(src); } } bool noCommunication() { return false; } // TODO I should be able to make this runtime detectable bool isVertexCut() const { return true; } void serializePartition(boost::archive::binary_oarchive&) {} void deserializePartition(boost::archive::binary_iarchive&) {} std::pair cartesianGrid() { return std::make_pair(0u, 0u); } }; class FennelP : public galois::graphs::CustomMasterAssignment { // used in hybrid cut uint32_t _vCutThreshold; // ginger scoring constants double _gamma; double _alpha; // ginger node/edge ratio double _neRatio; /** * Returns Ginger's composite balance parameter for a given host */ double getCompositeBalanceParam( unsigned host, const std::vector& nodeLoads, const std::vector>& nodeAccum, const std::vector& edgeLoads, const std::vector>& edgeAccum) { // get node/edge loads uint64_t hostNodeLoad = nodeLoads[host] + nodeAccum[host].load(); uint64_t hostEdgeLoad = edgeLoads[host] + edgeAccum[host].load(); return (hostNodeLoad + (_neRatio * hostEdgeLoad)) / 2; } /** * Use FENNEL balance equation to get a score value for partition * scoring */ double getFennelBalanceScore(double param) { return _alpha * _gamma * pow(param, _gamma - 1); } public: FennelP(uint32_t hostID, uint32_t numHosts, uint64_t numNodes, uint64_t numEdges) : galois::graphs::CustomMasterAssignment(hostID, numHosts, numNodes, numEdges) { _vCutThreshold = 1000; _gamma = 1.5; _alpha = numEdges * pow(numHosts, _gamma - 1.0) / pow(numNodes, _gamma); _neRatio = (double)numNodes / (double)numEdges; } template uint32_t getMaster(uint32_t src, galois::graphs::BufferedGraph& bufGraph, const std::vector& localNodeToMaster, std::unordered_map& gid2offsets, const std::vector& nodeLoads, std::vector>& nodeAccum, const std::vector& edgeLoads, std::vector>& edgeAccum) { auto ii = bufGraph.edgeBegin(src); auto ee = bufGraph.edgeEnd(src); // number of edges uint64_t ne = std::distance(ii, ee); // high degree nodes masters stay the same if (ne > _vCutThreshold) { return _hostID; } else { // low degree masters move based on augmented FENNEL scoring metric // initialize array to hold scores galois::PODResizeableArray scores; scores.resize(_numHosts); for (unsigned i = 0; i < _numHosts; i++) { scores[i] = 0.0; } for (; ii < ee; ++ii) { uint64_t dst = bufGraph.edgeDestination(*ii); size_t offsetIntoMap = (unsigned)-1; auto it = gid2offsets.find(dst); if (it != gid2offsets.end()) { offsetIntoMap = it->second; } else { // determine offset offsetIntoMap = dst - bufGraph.getNodeOffset(); } assert(offsetIntoMap != (unsigned)-1); assert(offsetIntoMap < localNodeToMaster.size()); unsigned currentAssignment = localNodeToMaster[offsetIntoMap]; if (currentAssignment != (unsigned)-1) { scores[currentAssignment] += 1.0; } else { galois::gDebug("[", _hostID, "] ", dst, " unassigned"); } } // subtraction of the composite balance term for (unsigned i = 0; i < _numHosts; i++) { scores[i] -= getFennelBalanceScore(getCompositeBalanceParam( i, nodeLoads, nodeAccum, edgeLoads, edgeAccum)); } unsigned bestHost = -1; double bestScore = std::numeric_limits::lowest(); // find max score for (unsigned i = 0; i < _numHosts; i++) { if (scores[i] >= bestScore) { // galois::gDebug("best score ", bestScore, " beaten by ", scores[i]); bestScore = scores[i]; bestHost = i; } } galois::gDebug("[", _hostID, "] ", src, " assigned to ", bestHost, " with num edge ", ne); // update metadata; TODO make this a nicer interface galois::atomicAdd(nodeAccum[bestHost], (uint64_t)1); galois::atomicAdd(edgeAccum[bestHost], ne); return bestHost; } } // Fennel is an edge cut: all edges on source uint32_t getEdgeOwner(uint32_t src, uint32_t, uint64_t) const { return retrieveMaster(src); } bool noCommunication() { return false; } // TODO I should be able to make this runtime detectable bool isVertexCut() const { return false; } void serializePartition(boost::archive::binary_oarchive&) {} void deserializePartition(boost::archive::binary_iarchive&) {} std::pair cartesianGrid() { return std::make_pair(0u, 0u); } }; class SugarP : public galois::graphs::CustomMasterAssignment { // used in hybrid cut uint32_t _vCutThreshold; // ginger scoring constants double _gamma; double _alpha; // ginger node/edge ratio double _neRatio; unsigned numRowHosts; unsigned numColumnHosts; void factorizeHosts() { numColumnHosts = sqrt(_numHosts); while ((_numHosts % numColumnHosts) != 0) numColumnHosts--; numRowHosts = _numHosts / numColumnHosts; assert(numRowHosts >= numColumnHosts); if (_hostID == 0) { galois::gPrint("Cartesian grid: ", numRowHosts, " x ", numColumnHosts, "\n"); } } //! Returns the grid row ID of this host unsigned gridRowID() const { return (_hostID / numColumnHosts); } //! Returns the grid row ID of the specified host unsigned gridRowID(unsigned id) const { return (id / numColumnHosts); } //! Returns the grid column ID of this host unsigned gridColumnID() const { return (_hostID % numColumnHosts); } //! Returns the grid column ID of the specified host unsigned gridColumnID(unsigned id) const { return (id % numColumnHosts); } //! Find the row of a particular node unsigned getRowOfNode(uint64_t gid) const { return gridRowID(retrieveMaster(gid)); } //! Find the column of a particular node unsigned getColumnOfNode(uint64_t gid) const { return gridColumnID(retrieveMaster(gid)); } /** * Returns Ginger's composite balance parameter for a given host */ double getCompositeBalanceParam( unsigned host, const std::vector& nodeLoads, const std::vector>& nodeAccum, const std::vector& edgeLoads, const std::vector>& edgeAccum) { // get node/edge loads uint64_t hostNodeLoad = nodeLoads[host] + nodeAccum[host].load(); uint64_t hostEdgeLoad = edgeLoads[host] + edgeAccum[host].load(); return (hostNodeLoad + (_neRatio * hostEdgeLoad)) / 2; } /** * Use FENNEL balance equation to get a score value for partition * scoring */ double getFennelBalanceScore(double param) { return _alpha * _gamma * pow(param, _gamma - 1); } public: SugarP(uint32_t hostID, uint32_t numHosts, uint64_t numNodes, uint64_t numEdges) : galois::graphs::CustomMasterAssignment(hostID, numHosts, numNodes, numEdges) { _vCutThreshold = 1000; _gamma = 1.5; _alpha = numEdges * pow(numHosts, _gamma - 1.0) / pow(numNodes, _gamma); _neRatio = (double)numNodes / (double)numEdges; // CVC things factorizeHosts(); } template uint32_t getMaster(uint32_t src, galois::graphs::BufferedGraph& bufGraph, const std::vector& localNodeToMaster, std::unordered_map& gid2offsets, const std::vector& nodeLoads, std::vector>& nodeAccum, const std::vector& edgeLoads, std::vector>& edgeAccum) { auto ii = bufGraph.edgeBegin(src); auto ee = bufGraph.edgeEnd(src); // number of edges uint64_t ne = std::distance(ii, ee); // high degree nodes masters stay the same if (ne > _vCutThreshold) { return _hostID; } else { // low degree masters move based on augmented FENNEL scoring metric // initialize array to hold scores galois::PODResizeableArray scores; scores.resize(_numHosts); for (unsigned i = 0; i < _numHosts; i++) { scores[i] = 0.0; } for (; ii < ee; ++ii) { uint64_t dst = bufGraph.edgeDestination(*ii); size_t offsetIntoMap = (unsigned)-1; auto it = gid2offsets.find(dst); if (it != gid2offsets.end()) { offsetIntoMap = it->second; } else { // determine offset offsetIntoMap = dst - bufGraph.getNodeOffset(); } assert(offsetIntoMap != (unsigned)-1); assert(offsetIntoMap < localNodeToMaster.size()); unsigned currentAssignment = localNodeToMaster[offsetIntoMap]; if (currentAssignment != (unsigned)-1) { scores[currentAssignment] += 1.0; } else { // galois::gDebug("[", _hostID, "] ", dst, " unassigned"); } } // subtraction of the composite balance term for (unsigned i = 0; i < _numHosts; i++) { scores[i] -= getFennelBalanceScore(getCompositeBalanceParam( i, nodeLoads, nodeAccum, edgeLoads, edgeAccum)); } unsigned bestHost = -1; double bestScore = std::numeric_limits::lowest(); // find max score for (unsigned i = 0; i < _numHosts; i++) { if (scores[i] >= bestScore) { // galois::gDebug("best score ", bestScore, " beaten by ", scores[i]); bestScore = scores[i]; bestHost = i; } } galois::gDebug("[", _hostID, "] ", src, " assigned to ", bestHost, " with num edge ", ne); // update metadata; TODO make this a nicer interface galois::atomicAdd(nodeAccum[bestHost], (uint64_t)1); galois::atomicAdd(edgeAccum[bestHost], ne); return bestHost; } } /** * return owner of edge using cartesian edge owner determination */ uint32_t getEdgeOwner(uint32_t src, uint32_t dst, uint64_t) const { unsigned blockedRowOffset = getRowOfNode(src) * numColumnHosts; unsigned cyclicColumnOffset = getColumnOfNode(dst); return blockedRowOffset + cyclicColumnOffset; } bool noCommunication() { return false; } bool isVertexCut() const { if ((numRowHosts == 1) || (numColumnHosts == 1)) return false; return true; } void serializePartition(boost::archive::binary_oarchive& ar) { ar << numRowHosts; ar << numColumnHosts; } void deserializePartition(boost::archive::binary_iarchive& ar) { ar >> numRowHosts; ar >> numColumnHosts; } std::pair cartesianGrid() { return std::make_pair(numRowHosts, numColumnHosts); } }; class SugarColumnFlipP : public galois::graphs::CustomMasterAssignment { // used in hybrid cut uint32_t _vCutThreshold; // ginger scoring constants double _gamma; double _alpha; // ginger node/edge ratio double _neRatio; unsigned numRowHosts; unsigned numColumnHosts; void factorizeHosts() { numColumnHosts = sqrt(_numHosts); while ((_numHosts % numColumnHosts) != 0) numColumnHosts--; numRowHosts = _numHosts / numColumnHosts; assert(numRowHosts >= numColumnHosts); // column flip std::swap(numRowHosts, numColumnHosts); if (_hostID == 0) { galois::gPrint("Cartesian grid: ", numRowHosts, " x ", numColumnHosts, "\n"); } } //! Returns the grid row ID of this host unsigned gridRowID() const { return (_hostID / numColumnHosts); } //! Returns the grid row ID of the specified host unsigned gridRowID(unsigned id) const { return (id / numColumnHosts); } //! Returns the grid column ID of this host unsigned gridColumnID() const { return (_hostID % numColumnHosts); } //! Returns the grid column ID of the specified host unsigned gridColumnID(unsigned id) const { return (id % numColumnHosts); } //! Find the row of a particular node unsigned getRowOfNode(uint64_t gid) const { return gridRowID(retrieveMaster(gid)); } //! Find the column of a particular node unsigned getColumnOfNode(uint64_t gid) const { return gridColumnID(retrieveMaster(gid)); } /** * Returns Ginger's composite balance parameter for a given host */ double getCompositeBalanceParam( unsigned host, const std::vector& nodeLoads, const std::vector>& nodeAccum, const std::vector& edgeLoads, const std::vector>& edgeAccum) { // get node/edge loads uint64_t hostNodeLoad = nodeLoads[host] + nodeAccum[host].load(); uint64_t hostEdgeLoad = edgeLoads[host] + edgeAccum[host].load(); return (hostNodeLoad + (_neRatio * hostEdgeLoad)) / 2; } /** * Use FENNEL balance equation to get a score value for partition * scoring */ double getFennelBalanceScore(double param) { return _alpha * _gamma * pow(param, _gamma - 1); } public: SugarColumnFlipP(uint32_t hostID, uint32_t numHosts, uint64_t numNodes, uint64_t numEdges) : galois::graphs::CustomMasterAssignment(hostID, numHosts, numNodes, numEdges) { _vCutThreshold = 1000; _gamma = 1.5; _alpha = numEdges * pow(numHosts, _gamma - 1.0) / pow(numNodes, _gamma); _neRatio = (double)numNodes / (double)numEdges; // CVC things factorizeHosts(); } template uint32_t getMaster(uint32_t src, galois::graphs::BufferedGraph& bufGraph, const std::vector& localNodeToMaster, std::unordered_map& gid2offsets, const std::vector& nodeLoads, std::vector>& nodeAccum, const std::vector& edgeLoads, std::vector>& edgeAccum) { auto ii = bufGraph.edgeBegin(src); auto ee = bufGraph.edgeEnd(src); // number of edges uint64_t ne = std::distance(ii, ee); // high degree nodes masters stay the same if (ne > _vCutThreshold) { return _hostID; } else { // low degree masters move based on augmented FENNEL scoring metric // initialize array to hold scores galois::PODResizeableArray scores; scores.resize(_numHosts); for (unsigned i = 0; i < _numHosts; i++) { scores[i] = 0.0; } for (; ii < ee; ++ii) { uint64_t dst = bufGraph.edgeDestination(*ii); size_t offsetIntoMap = (unsigned)-1; auto it = gid2offsets.find(dst); if (it != gid2offsets.end()) { offsetIntoMap = it->second; } else { // determine offset offsetIntoMap = dst - bufGraph.getNodeOffset(); } assert(offsetIntoMap != (unsigned)-1); assert(offsetIntoMap < localNodeToMaster.size()); unsigned currentAssignment = localNodeToMaster[offsetIntoMap]; if (currentAssignment != (unsigned)-1) { scores[currentAssignment] += 1.0; } else { galois::gDebug("[", _hostID, "] ", dst, " unassigned"); } } // subtraction of the composite balance term for (unsigned i = 0; i < _numHosts; i++) { scores[i] -= getFennelBalanceScore(getCompositeBalanceParam( i, nodeLoads, nodeAccum, edgeLoads, edgeAccum)); } unsigned bestHost = -1; double bestScore = std::numeric_limits::lowest(); // find max score for (unsigned i = 0; i < _numHosts; i++) { if (scores[i] >= bestScore) { // galois::gDebug("best score ", bestScore, " beaten by ", scores[i]); bestScore = scores[i]; bestHost = i; } } galois::gDebug("[", _hostID, "] ", src, " assigned to ", bestHost, " with num edge ", ne); // update metadata; TODO make this a nicer interface galois::atomicAdd(nodeAccum[bestHost], (uint64_t)1); galois::atomicAdd(edgeAccum[bestHost], ne); return bestHost; } } /** * return owner of edge using cartesian edge owner determination */ uint32_t getEdgeOwner(uint32_t src, uint32_t dst, uint64_t) const { unsigned blockedRowOffset = getRowOfNode(src) * numColumnHosts; unsigned cyclicColumnOffset = getColumnOfNode(dst); return blockedRowOffset + cyclicColumnOffset; } bool noCommunication() { return false; } bool isVertexCut() const { if ((numRowHosts == 1) && (numColumnHosts == 1)) return false; return true; } void serializePartition(boost::archive::binary_oarchive& ar) { ar << numRowHosts; ar << numColumnHosts; } void deserializePartition(boost::archive::binary_iarchive& ar) { ar >> numRowHosts; ar >> numColumnHosts; } std::pair cartesianGrid() { return std::make_pair(numRowHosts, numColumnHosts); } }; #endif ================================================ FILE: libcusp/include/galois/graphs/MiningPartitioner.h ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2019, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ /** * @file MiningPartitioner.h * * Graph mining partitioning that duplicates edges. Currently only supports an * outgoing edge cut. * * TODO lots of code dpulication here with regular cusp partitioner; need to * merge */ #ifndef _GALOIS_DIST_MINING_H #define _GALOIS_DIST_MINING_H #include "galois/graphs/DistributedGraph.h" #include "galois/DReducible.h" namespace galois { namespace graphs { /** * @tparam NodeTy type of node data for the graph * @tparam EdgeTy type of edge data for the graph * * @todo fully document and clean up code * @warning not meant for public use + not fully documented yet */ template class MiningGraph : public DistGraph { //! size used to buffer edge sends during partitioning constexpr static unsigned edgePartitionSendBufSize = 8388608; constexpr static const char* const GRNAME = "dGraph_Mining"; std::unique_ptr graphPartitioner; uint32_t G2LEdgeCut(uint64_t gid, uint32_t globalOffset) const { assert(base_DistGraph::isLocal(gid)); // optimized for edge cuts if (gid >= globalOffset && gid < globalOffset + base_DistGraph::numOwned) return gid - globalOffset; return base_DistGraph::globalToLocalMap.at(gid); } /** * Free memory of a vector by swapping an empty vector with it */ template void freeVector(V& vectorToKill) { V dummyVector; vectorToKill.swap(dummyVector); } uint32_t nodesToReceive; uint64_t myKeptEdges; uint64_t myReadEdges; uint64_t globalKeptEdges; uint64_t totalEdgeProxies; std::vector> mirrorEdges; std::unordered_map localEdgeGIDToLID; std::vector getNodeDegrees(const std::string filename, uint32_t numNodes) { std::vector nodeDegrees; nodeDegrees.resize(numNodes); // read in prefix sum from GR on disk std::ifstream graphFile(filename.c_str()); graphFile.seekg(sizeof(uint64_t) * 4); uint64_t* outIndexBuffer = (uint64_t*)malloc(sizeof(uint64_t) * numNodes); if (outIndexBuffer == nullptr) { GALOIS_DIE("out of memory"); } uint64_t numBytesToLoad = numNodes * sizeof(uint64_t); uint64_t bytesRead = 0; while (numBytesToLoad > 0) { graphFile.read(((char*)outIndexBuffer) + bytesRead, numBytesToLoad); size_t numRead = graphFile.gcount(); numBytesToLoad -= numRead; bytesRead += numRead; } assert(numBytesToLoad == 0); galois::do_all( galois::iterate(0u, numNodes), [&](unsigned n) { if (n != 0) { nodeDegrees[n] = outIndexBuffer[n] - outIndexBuffer[n - 1]; } else { nodeDegrees[n] = outIndexBuffer[0]; } // galois::gDebug(n, " degree ", nodeDegrees[n]); }, galois::loopname("GetNodeDegrees"), galois::no_stats()); free(outIndexBuffer); #ifndef NDEBUG if (base_DistGraph::id == 0) { galois::gDebug("Sanity checking node degrees"); } galois::GAccumulator edgeCount; galois::do_all( galois::iterate(0u, numNodes), [&](unsigned n) { edgeCount += nodeDegrees[n]; }, galois::loopname("SanityCheckDegrees"), galois::no_stats()); GALOIS_ASSERT(edgeCount.reduce() == base_DistGraph::numGlobalEdges); #endif return nodeDegrees; } virtual unsigned getHostIDImpl(uint64_t gid) const { assert(gid < base_DistGraph::numGlobalNodes); return graphPartitioner->retrieveMaster(gid); } virtual bool isOwnedImpl(uint64_t gid) const { assert(gid < base_DistGraph::numGlobalNodes); return (graphPartitioner->retrieveMaster(gid) == base_DistGraph::id); } virtual bool isLocalImpl(uint64_t gid) const { assert(gid < base_DistGraph::numGlobalNodes); return (base_DistGraph::globalToLocalMap.find(gid) != base_DistGraph::globalToLocalMap.end()); } virtual bool isVertexCutImpl() const { return false; } public: //! typedef for base DistGraph class using base_DistGraph = DistGraph; /** * Returns edges owned by this graph (i.e. read). */ uint64_t numOwnedEdges() const { return myKeptEdges; } /** * Returns # edges kept in all graphs. */ uint64_t globalEdges() const { return globalKeptEdges; } std::vector>& getMirrorEdges() { return mirrorEdges; } /** * Return the reader of a particular node. * @param gid GID of node to get reader of * @return Host reader of node passed in as param */ unsigned getHostReader(uint64_t gid) const { for (auto i = 0U; i < base_DistGraph::numHosts; ++i) { uint64_t start, end; std::tie(start, end) = base_DistGraph::gid2host[i]; if (gid >= start && gid < end) { return i; } } return -1; } /** * Constructor */ MiningGraph( const std::string& filename, unsigned host, unsigned _numHosts, bool setupGluon = true, bool doSort = false, galois::graphs::MASTERS_DISTRIBUTION md = BALANCED_EDGES_OF_MASTERS, uint32_t nodeWeight = 0, uint32_t edgeWeight = 0) : base_DistGraph(host, _numHosts) { galois::runtime::reportParam(GRNAME, "MiningGraph", "0"); galois::CondStatTimer Tgraph_construct( "GraphPartitioningTime", GRNAME); Tgraph_construct.start(); //////////////////////////////////////////////////////////////////////////// galois::graphs::OfflineGraph g(filename); base_DistGraph::numGlobalNodes = g.size(); base_DistGraph::numGlobalEdges = g.sizeEdges(); std::vector dummy; // not actually getting masters, but getting assigned readers for nodes base_DistGraph::computeMasters(md, g, dummy, nodeWeight, edgeWeight); std::vector ndegrees; if (Partitioner::needNodeDegrees()) { if (base_DistGraph::id == 0) { galois::gInfo("Calculating node degrees for partitioner"); } galois::runtime::reportParam(GRNAME, "UsingDegreeOrdering", "1"); ndegrees = getNodeDegrees(filename, base_DistGraph::numGlobalNodes); } graphPartitioner = std::make_unique( host, _numHosts, base_DistGraph::numGlobalNodes, base_DistGraph::numGlobalEdges, ndegrees); graphPartitioner->saveGIDToHost(base_DistGraph::gid2host); //////////////////////////////////////////////////////////////////////////// uint64_t nodeBegin = base_DistGraph::gid2host[base_DistGraph::id].first; typename galois::graphs::OfflineGraph::edge_iterator edgeBegin = g.edge_begin(nodeBegin); uint64_t nodeEnd = base_DistGraph::gid2host[base_DistGraph::id].second; typename galois::graphs::OfflineGraph::edge_iterator edgeEnd = g.edge_begin(nodeEnd); galois::gPrint("[", base_DistGraph::id, "] Starting graph reading.\n"); // never read edge data from disk galois::graphs::BufferedGraph bufGraph; bufGraph.resetReadCounters(); galois::StatTimer graphReadTimer("GraphReading", GRNAME); graphReadTimer.start(); bufGraph.loadPartialGraph(filename, nodeBegin, nodeEnd, *edgeBegin, *edgeEnd, base_DistGraph::numGlobalNodes, base_DistGraph::numGlobalEdges); graphReadTimer.stop(); galois::gPrint("[", base_DistGraph::id, "] Reading graph complete.\n"); //////////////////////////////////////////////////////////////////////////// galois::StatTimer inspectionTimer("EdgeInspection", GRNAME); inspectionTimer.start(); bufGraph.resetReadCounters(); galois::gstl::Vector prefixSumOfEdges; base_DistGraph::numOwned = nodeEnd - nodeBegin; prefixSumOfEdges.resize(base_DistGraph::numOwned); // initial pass; set up lid-gid mappings, determine which proxies exist on // this host; prefix sum of edges cna be set up up to the last master // node galois::DynamicBitSet presentProxies = edgeInspectionRound1(bufGraph, prefixSumOfEdges); // set my read nodes on present proxies // TODO parallel? for (uint64_t i = nodeBegin; i < nodeEnd; i++) { presentProxies.set(i); } // vector to store bitsets received from other hosts std::vector proxiesOnOtherHosts; proxiesOnOtherHosts.resize(_numHosts); // send off mirror proxies that exist on this host to other hosts communicateProxyInfo(presentProxies, proxiesOnOtherHosts); // signifies how many outgoing edges a particular host should expect from // this host std::vector> numOutgoingEdges; numOutgoingEdges.resize(base_DistGraph::numHosts); // edge inspection phase 2: determine how many edges to send to each host // don't actually send yet edgeInspectionRound2(bufGraph, numOutgoingEdges, proxiesOnOtherHosts); // prefix sum finalization finalizePrefixSum(numOutgoingEdges, prefixSumOfEdges); // doubly make sure the data is cleared freeVector(numOutgoingEdges); // should no longer use this variable inspectionTimer.stop(); //////////////////////////////////////////////////////////////////////////// galois::StatTimer allocationTimer("GraphAllocation", GRNAME); allocationTimer.start(); // Graph construction related calls base_DistGraph::beginMaster = 0; // Allocate and construct the graph base_DistGraph::graph.allocateFrom(base_DistGraph::numNodes, base_DistGraph::numEdges); base_DistGraph::graph.constructNodes(); // edge end fixing auto& base_graph = base_DistGraph::graph; galois::do_all( galois::iterate((uint32_t)0, base_DistGraph::numNodes), [&](uint64_t n) { base_graph.fixEndEdge(n, prefixSumOfEdges[n]); }, #if MORE_DIST_STATS galois::loopname("FixEndEdgeLoop"), #endif galois::no_stats()); // get memory from prefix sum back prefixSumOfEdges.clear(); freeVector(prefixSumOfEdges); // should no longer use this variable allocationTimer.stop(); //////////////////////////////////////////////////////////////////////////// if (setupGluon) { galois::CondStatTimer TfillMirrors("FillMirrors", GRNAME); TfillMirrors.start(); fillMirrors(); TfillMirrors.stop(); } //////////////////////////////////////////////////////////////////////////// loadEdges(base_DistGraph::graph, bufGraph, proxiesOnOtherHosts); // TODO this might be useful to keep around proxiesOnOtherHosts.clear(); ndegrees.clear(); // SORT EDGES if (doSort) { base_DistGraph::sortEdgesByDestination(); } if (setupGluon) { galois::CondStatTimer TfillMirrorsEdges( "FillMirrorsEdges", GRNAME); TfillMirrorsEdges.start(); // edges mirrorEdges.resize(base_DistGraph::numHosts); galois::gPrint("[", base_DistGraph::id, "] Filling mirrors and creating " "mirror map\n"); fillMirrorsEdgesAndCreateMirrorMap(); TfillMirrorsEdges.stop(); } //////////////////////////////////////////////////////////////////////////// galois::CondStatTimer Tthread_ranges("ThreadRangesTime", GRNAME); galois::gPrint("[", base_DistGraph::id, "] Determining thread ranges\n"); Tthread_ranges.start(); base_DistGraph::determineThreadRanges(); base_DistGraph::determineThreadRangesMaster(); base_DistGraph::determineThreadRangesWithEdges(); base_DistGraph::initializeSpecificRanges(); Tthread_ranges.stop(); Tgraph_construct.stop(); galois::gPrint("[", base_DistGraph::id, "] Graph construction complete.\n"); galois::DGAccumulator accumer; accumer.reset(); accumer += base_DistGraph::sizeEdges(); totalEdgeProxies = accumer.reduce(); uint64_t totalNodeProxies; accumer.reset(); accumer += base_DistGraph::size(); totalNodeProxies = accumer.reduce(); // report some statistics if (base_DistGraph::id == 0) { galois::runtime::reportStat_Single( GRNAME, std::string("TotalNodeProxies"), totalNodeProxies); galois::runtime::reportStat_Single( GRNAME, std::string("TotalEdgeProxies"), totalEdgeProxies); galois::runtime::reportStat_Single(GRNAME, std::string("OriginalNumberEdges"), base_DistGraph::globalSizeEdges()); galois::runtime::reportStat_Single(GRNAME, std::string("TotalKeptEdges"), globalKeptEdges); GALOIS_ASSERT(globalKeptEdges * 2 == base_DistGraph::globalSizeEdges()); galois::runtime::reportStat_Single( GRNAME, std::string("ReplicationFactorNodes"), (totalNodeProxies) / (double)base_DistGraph::globalSize()); galois::runtime::reportStat_Single( GRNAME, std::string("ReplicatonFactorEdges"), (totalEdgeProxies) / (double)globalKeptEdges); } } private: galois::DynamicBitSet edgeInspectionRound1(galois::graphs::BufferedGraph& bufGraph, galois::gstl::Vector& prefixSumOfEdges) { galois::DynamicBitSet incomingMirrors; incomingMirrors.resize(base_DistGraph::numGlobalNodes); incomingMirrors.reset(); uint32_t myID = base_DistGraph::id; uint64_t globalOffset = base_DistGraph::gid2host[base_DistGraph::id].first; // already set before this is called base_DistGraph::localToGlobalVector.resize(base_DistGraph::numOwned); galois::DGAccumulator keptEdges; keptEdges.reset(); galois::GAccumulator allEdges; allEdges.reset(); auto& ltgv = base_DistGraph::localToGlobalVector; galois::do_all( galois::iterate(base_DistGraph::gid2host[base_DistGraph::id].first, base_DistGraph::gid2host[base_DistGraph::id].second), [&](size_t n) { uint64_t edgeCount = 0; auto ii = bufGraph.edgeBegin(n); auto ee = bufGraph.edgeEnd(n); allEdges += std::distance(ii, ee); for (; ii < ee; ++ii) { uint32_t dst = bufGraph.edgeDestination(*ii); if (graphPartitioner->keepEdge(n, dst)) { edgeCount++; keptEdges += 1; // which mirrors do I have if (graphPartitioner->retrieveMaster(dst) != myID) { incomingMirrors.set(dst); } } } prefixSumOfEdges[n - globalOffset] = edgeCount; ltgv[n - globalOffset] = n; }, #if MORE_DIST_STATS galois::loopname("EdgeInspectionLoop"), #endif galois::steal(), galois::no_stats()); myKeptEdges = keptEdges.read_local(); myReadEdges = allEdges.reduce(); globalKeptEdges = keptEdges.reduce(); // get incoming mirrors ready for creation uint32_t additionalMirrorCount = incomingMirrors.count(); base_DistGraph::localToGlobalVector.resize( base_DistGraph::localToGlobalVector.size() + additionalMirrorCount); // note prefix sum will get finalized in a later step if (base_DistGraph::numOwned > 0) { prefixSumOfEdges.resize(prefixSumOfEdges.size() + additionalMirrorCount, 0); } else { prefixSumOfEdges.resize(additionalMirrorCount, 0); } // map creation: lid to gid if (additionalMirrorCount > 0) { uint32_t totalNumNodes = base_DistGraph::numGlobalNodes; uint32_t activeThreads = galois::getActiveThreads(); std::vector threadPrefixSums(activeThreads); galois::on_each([&](unsigned tid, unsigned nthreads) { size_t beginNode; size_t endNode; std::tie(beginNode, endNode) = galois::block_range(0u, totalNumNodes, tid, nthreads); uint64_t count = 0; for (size_t i = beginNode; i < endNode; i++) { if (incomingMirrors.test(i)) ++count; } threadPrefixSums[tid] = count; }); // get prefix sums for (unsigned int i = 1; i < threadPrefixSums.size(); i++) { threadPrefixSums[i] += threadPrefixSums[i - 1]; } assert(threadPrefixSums.back() == additionalMirrorCount); uint32_t startingNodeIndex = base_DistGraph::numOwned; // do actual work, second on_each galois::on_each([&](unsigned tid, unsigned nthreads) { size_t beginNode; size_t endNode; std::tie(beginNode, endNode) = galois::block_range(0u, totalNumNodes, tid, nthreads); // start location to start adding things into prefix sums/vectors uint32_t threadStartLocation = 0; if (tid != 0) { threadStartLocation = threadPrefixSums[tid - 1]; } uint32_t handledNodes = 0; for (size_t i = beginNode; i < endNode; i++) { if (incomingMirrors.test(i)) { base_DistGraph::localToGlobalVector[startingNodeIndex + threadStartLocation + handledNodes] = i; handledNodes++; } } }); } base_DistGraph::numNodes = base_DistGraph::numOwned + additionalMirrorCount; base_DistGraph::numNodesWithEdges = base_DistGraph::numNodes; assert(base_DistGraph::localToGlobalVector.size() == base_DistGraph::numNodes); // g2l mapping base_DistGraph::globalToLocalMap.reserve(base_DistGraph::numNodes); for (unsigned i = 0; i < base_DistGraph::numNodes; i++) { // global to local map construction base_DistGraph::globalToLocalMap[base_DistGraph::localToGlobalVector[i]] = i; } assert(base_DistGraph::globalToLocalMap.size() == base_DistGraph::numNodes); return incomingMirrors; } /** * Communicate to other hosts which proxies exist on this host. * * @param presentProxies Bitset marking which proxies are present on this host * @param proxiesOnOtherHosts Vector to deserialize received bitsets into */ void communicateProxyInfo( galois::DynamicBitSet& presentProxies, std::vector& proxiesOnOtherHosts) { auto& net = galois::runtime::getSystemNetworkInterface(); // Send proxies on this host to other hosts for (unsigned h = 0; h < base_DistGraph::numHosts; ++h) { if (h != base_DistGraph::id) { galois::runtime::SendBuffer bitsetBuffer; galois::runtime::gSerialize(bitsetBuffer, presentProxies); net.sendTagged(h, galois::runtime::evilPhase, bitsetBuffer); } } // receive loop for (unsigned h = 0; h < net.Num - 1; h++) { decltype(net.recieveTagged(galois::runtime::evilPhase, nullptr)) p; do { p = net.recieveTagged(galois::runtime::evilPhase, nullptr); } while (!p); uint32_t sendingHost = p->first; // deserialize proxiesOnOtherHosts galois::runtime::gDeserialize(p->second, proxiesOnOtherHosts[sendingHost]); } base_DistGraph::increment_evilPhase(); } void edgeInspectionRound2( galois::graphs::BufferedGraph& bufGraph, std::vector>& numOutgoingEdges, std::vector& proxiesOnOtherHosts) { auto& net = galois::runtime::getSystemNetworkInterface(); // allocate vectors for counting edges that must be sent // number of nodes that this host has read from disk uint32_t numRead = base_DistGraph::gid2host[base_DistGraph::id].second - base_DistGraph::gid2host[base_DistGraph::id].first; // allocate space for outgoing edges for (uint32_t i = 0; i < base_DistGraph::numHosts; ++i) { numOutgoingEdges[i].assign(numRead, 0); } uint64_t globalOffset = base_DistGraph::gid2host[base_DistGraph::id].first; galois::DynamicBitSet hostHasOutgoing; hostHasOutgoing.resize(base_DistGraph::numHosts); hostHasOutgoing.reset(); // flip loop order, this can be optimized // for each host, loop over my local nodes galois::do_all( galois::iterate(base_DistGraph::gid2host[base_DistGraph::id].first, base_DistGraph::gid2host[base_DistGraph::id].second), [&](size_t n) { auto ii = bufGraph.edgeBegin(n); auto ee = bufGraph.edgeEnd(n); for (; ii < ee; ++ii) { uint32_t dst = bufGraph.edgeDestination(*ii); // make sure this edge is going to be kept and not dropped if (graphPartitioner->keepEdge(n, dst)) { for (unsigned h = 0; h < net.Num; h++) { if (h != net.ID) { if (proxiesOnOtherHosts[h].test(n)) { // if kept, make sure destination exists on that host if (proxiesOnOtherHosts[h].test(dst)) { // if it does, this edge must be duplicated on that host; // increment count numOutgoingEdges[h][n - globalOffset] += 1; hostHasOutgoing.set(h); } } } } } } }, #if MORE_DIST_STATS galois::loopname("EdgeInspectionRound2Loop"), #endif galois::steal(), galois::no_stats()); // send data off, then receive it sendInspectionData(numOutgoingEdges, hostHasOutgoing); recvInspectionData(numOutgoingEdges); base_DistGraph::increment_evilPhase(); } /** * Send data out from inspection to other hosts. * * @param[in,out] numOutgoingEdges specifies which nodes on a host will have * outgoing edges * @param[in] hostHasOutgoing bitset tracking which hosts have outgoing * edges from this host */ void sendInspectionData(std::vector>& numOutgoingEdges, galois::DynamicBitSet& hostHasOutgoing) { auto& net = galois::runtime::getSystemNetworkInterface(); galois::GAccumulator bytesSent; bytesSent.reset(); for (unsigned h = 0; h < net.Num; h++) { if (h == net.ID) { continue; } // send outgoing edges data off to comm partner galois::runtime::SendBuffer b; // only send if non-zeros exist if (hostHasOutgoing.test(h)) { galois::runtime::gSerialize(b, 1); // token saying data exists galois::runtime::gSerialize(b, numOutgoingEdges[h]); } else { galois::runtime::gSerialize(b, 0); // token saying no data exists } numOutgoingEdges[h].clear(); bytesSent.update(b.size()); // send buffer and free memory net.sendTagged(h, galois::runtime::evilPhase, b); b.getVec().clear(); } galois::runtime::reportStat_Tsum( GRNAME, std::string("EdgeInspectionBytesSent"), bytesSent.reduce()); galois::gPrint("[", base_DistGraph::id, "] Inspection sends complete.\n"); } /** * Receive data from inspection from other hosts. Processes the incoming * edge bitsets/offsets. * * @param[in,out] numOutgoingEdges specifies which nodes on a host will have * outgoing edges */ void recvInspectionData(std::vector>& numOutgoingEdges) { auto& net = galois::runtime::getSystemNetworkInterface(); for (unsigned h = 0; h < net.Num - 1; h++) { // expect data from comm partner back decltype(net.recieveTagged(galois::runtime::evilPhase, nullptr)) p; do { p = net.recieveTagged(galois::runtime::evilPhase, nullptr); } while (!p); uint32_t sendingHost = p->first; // get outgoing edges; first get status var uint32_t outgoingExists = 2; galois::runtime::gDeserialize(p->second, outgoingExists); if (outgoingExists == 1) { // actual data sent galois::runtime::gDeserialize(p->second, numOutgoingEdges[sendingHost]); } else if (outgoingExists == 0) { // no data sent; just clear again numOutgoingEdges[sendingHost].clear(); } else { GALOIS_DIE("unreachable: ", outgoingExists); } } galois::gPrint("[", base_DistGraph::id, "] Inspection receives complete.\n"); } /** * Take inspection metadata and begin mapping nodes/creating prefix sums, * return the prefix sum. */ galois::gstl::Vector finalizePrefixSum(std::vector>& numOutgoingEdges, galois::gstl::Vector& prefixSumOfEdges) { base_DistGraph::numEdges = 0; inspectOutgoingNodes(numOutgoingEdges, prefixSumOfEdges); finalizeInspection(prefixSumOfEdges); galois::gDebug("[", base_DistGraph::id, "] To receive this many nodes: ", nodesToReceive); galois::gPrint("[", base_DistGraph::id, "] Inspection allocation complete.\n"); return prefixSumOfEdges; } /** * Outgoing inspection: loop over proxy nodes, determnine if need to receive * edges. */ void inspectOutgoingNodes(std::vector>& numOutgoingEdges, galois::gstl::Vector& prefixSumOfEdges) { galois::GAccumulator toReceive; toReceive.reset(); uint32_t proxyStart = base_DistGraph::numOwned; uint32_t proxyEnd = base_DistGraph::numNodes; assert(proxyEnd == prefixSumOfEdges.size()); galois::GAccumulator edgesToReceive; edgesToReceive.reset(); // loop over proxy nodes, see if edges need to be sent from another host // by looking at results of edge inspection galois::do_all( galois::iterate(proxyStart, proxyEnd), [&](uint32_t lid) { uint64_t gid = base_DistGraph::localToGlobalVector[lid]; assert(gid < base_DistGraph::numGlobalNodes); unsigned hostReader = getHostReader(gid); assert(hostReader < base_DistGraph::numHosts); assert(hostReader != base_DistGraph::id); // self shouldn't be proxy uint64_t nodeOffset = base_DistGraph::gid2host[hostReader].first; if (numOutgoingEdges[hostReader].size()) { if (numOutgoingEdges[hostReader][gid - nodeOffset]) { // if this host is going to send me edges, note it for future use prefixSumOfEdges[lid] = numOutgoingEdges[hostReader][gid - nodeOffset]; edgesToReceive += numOutgoingEdges[hostReader][gid - nodeOffset]; toReceive += 1; } } }, galois::loopname("OutgoingNodeInspection"), galois::steal(), galois::no_stats()); galois::gPrint("[", base_DistGraph::id, "] Need receive ", edgesToReceive.reduce(), " edges; self is ", myKeptEdges, "\n"); // get memory back numOutgoingEdges.clear(); nodesToReceive = toReceive.reduce(); } /** * finalize metadata maps */ void finalizeInspection(galois::gstl::Vector& prefixSumOfEdges) { for (unsigned i = 1; i < base_DistGraph::numNodes; i++) { // finalize prefix sum prefixSumOfEdges[i] += prefixSumOfEdges[i - 1]; } if (prefixSumOfEdges.size() != 0) { base_DistGraph::numEdges = prefixSumOfEdges.back(); } else { base_DistGraph::numEdges = 0; } } //////////////////////////////////////////////////////////////////////////////// public: galois::GAccumulator lgMapAccesses; /** * Construct a map from local edge GIDs to LID */ void constructLocalEdgeGIDMap() { lgMapAccesses.reset(); galois::StatTimer mapConstructTimer("GID2LIDMapConstructTimer", GRNAME); mapConstructTimer.start(); localEdgeGIDToLID.reserve(base_DistGraph::sizeEdges()); uint64_t count = 0; for (unsigned src = 0; src < base_DistGraph::size(); src++) { for (auto edge = base_DistGraph::edge_begin(src); edge != base_DistGraph::edge_end(src); edge++) { assert((*edge) == count); unsigned dst = base_DistGraph::getEdgeDst(edge); uint64_t localGID = getEdgeGIDFromSD(src, dst); // insert into map localEdgeGIDToLID.insert(std::make_pair(localGID, count)); count++; } } GALOIS_ASSERT(localEdgeGIDToLID.size() == base_DistGraph::sizeEdges()); GALOIS_ASSERT(count == base_DistGraph::sizeEdges()); mapConstructTimer.stop(); } void reportAccessBefore() { galois::runtime::reportStat_Single(GRNAME, std::string("MapAccessesBefore"), lgMapAccesses.reduce()); } void reportAccess() { galois::runtime::reportStat_Single(GRNAME, std::string("MapAccesses"), lgMapAccesses.reduce()); } /** * checks map constructed above to see which local id corresponds * to a node/edge (if it exists) * * assumes map is generated */ std::pair getLIDFromMap(unsigned src, unsigned dst) { lgMapAccesses += 1; // try to find gid in map uint64_t localGID = getEdgeGIDFromSD(src, dst); auto findResult = localEdgeGIDToLID.find(localGID); // return if found, else return a false if (findResult != localEdgeGIDToLID.end()) { return std::make_pair(findResult->second, true); } else { // not found return std::make_pair((uint64_t)-1, false); } } uint64_t getEdgeLID(uint64_t gid) { uint64_t sourceNodeGID = edgeGIDToSource(gid); uint64_t sourceNodeLID = base_DistGraph::getLID(sourceNodeGID); uint64_t destNodeLID = base_DistGraph::getLID(edgeGIDToDest(gid)); for (auto edge : base_DistGraph::edges(sourceNodeLID)) { uint64_t edgeDst = base_DistGraph::getEdgeDst(edge); if (edgeDst == destNodeLID) { return *edge; } } GALOIS_DIE("unreachable"); return (uint64_t)-1; } uint32_t findSourceFromEdge(uint64_t lid) { // TODO binary search // uint32_t left = 0; // uint32_t right = base_DistGraph::numNodes; // uint32_t mid = (left + right) / 2; for (uint32_t mid = 0; mid < base_DistGraph::numNodes; mid++) { uint64_t edge_left = *(base_DistGraph::edge_begin(mid)); uint64_t edge_right = *(base_DistGraph::edge_begin(mid + 1)); if (edge_left <= lid && lid < edge_right) { return mid; } } GALOIS_DIE("unreachable"); return (uint32_t)-1; } uint64_t getEdgeGID(uint64_t lid) { uint32_t src = base_DistGraph::getGID(findSourceFromEdge(lid)); uint32_t dst = base_DistGraph::getGID(base_DistGraph::getEdgeDst(lid)); return getEdgeGIDFromSD(src, dst); } private: // https://www.quora.com/ // Is-there-a-mathematical-function-that-converts-two-numbers-into-one-so- // that-the-two-numbers-can-always-be-extracted-again // GLOBAL IDS ONLY uint64_t getEdgeGIDFromSD(uint32_t source, uint32_t dest) { return source + (dest % base_DistGraph::numGlobalNodes) * base_DistGraph::numGlobalNodes; } uint64_t edgeGIDToSource(uint64_t gid) { return gid % base_DistGraph::numGlobalNodes; } uint64_t edgeGIDToDest(uint64_t gid) { // assuming this floors return gid / base_DistGraph::numGlobalNodes; } /** * Fill up mirror arrays. * TODO make parallel? */ void fillMirrors() { base_DistGraph::mirrorNodes.reserve(base_DistGraph::numNodes - base_DistGraph::numOwned); for (uint32_t i = base_DistGraph::numOwned; i < base_DistGraph::numNodes; i++) { uint32_t globalID = base_DistGraph::localToGlobalVector[i]; base_DistGraph::mirrorNodes[graphPartitioner->retrieveMaster(globalID)] .push_back(globalID); } } void fillMirrorsEdgesAndCreateMirrorMap() { for (uint32_t src = base_DistGraph::numOwned; src < base_DistGraph::numNodes; src++) { auto ee = base_DistGraph::edge_begin(src); auto ee_end = base_DistGraph::edge_end(src); uint32_t globalSource = base_DistGraph::getGID(src); unsigned sourceOwner = graphPartitioner->retrieveMaster(globalSource); for (; ee != ee_end; ++ee) { // create mirror array uint64_t edgeGID = getEdgeGIDFromSD( globalSource, base_DistGraph::getGID(base_DistGraph::getEdgeDst(ee))); mirrorEdges[sourceOwner].push_back(edgeGID); } } } //////////////////////////////////////////////////////////////////////////////// template void loadEdges(GraphTy& graph, galois::graphs::BufferedGraph& bufGraph, std::vector& proxiesOnOtherHosts) { galois::StatTimer loadEdgeTimer("EdgeLoading", GRNAME); loadEdgeTimer.start(); bufGraph.resetReadCounters(); std::atomic receivedNodes; receivedNodes.store(0); // sends data sendEdges(graph, bufGraph, receivedNodes, proxiesOnOtherHosts); // uint64_t bufBytesRead = bufGraph.getBytesRead(); // get data from graph back (don't need it after sending things out) bufGraph.resetAndFree(); // receives data galois::on_each( [&](unsigned GALOIS_UNUSED(tid), unsigned GALOIS_UNUSED(nthreads)) { receiveEdges(graph, receivedNodes); }); base_DistGraph::increment_evilPhase(); loadEdgeTimer.stop(); galois::gPrint("[", base_DistGraph::id, "] Edge loading time: ", loadEdgeTimer.get_usec() / 1000000.0f, " seconds\n"); } // no edge data version template void sendEdges(GraphTy& graph, galois::graphs::BufferedGraph& bufGraph, std::atomic& receivedNodes, std::vector& proxiesOnOtherHosts) { using DstVecType = std::vector>; using SendBufferVecTy = std::vector; galois::substrate::PerThreadStorage gdst_vecs( base_DistGraph::numHosts); galois::substrate::PerThreadStorage sendBuffers( base_DistGraph::numHosts); auto& net = galois::runtime::getSystemNetworkInterface(); const unsigned& id = this->base_DistGraph::id; const unsigned& numHosts = this->base_DistGraph::numHosts; galois::GAccumulator messagesSent; galois::GAccumulator bytesSent; galois::GReduceMax maxBytesSent; messagesSent.reset(); bytesSent.reset(); maxBytesSent.reset(); // Go over assigned nodes and distribute edges. galois::do_all( galois::iterate(base_DistGraph::gid2host[base_DistGraph::id].first, base_DistGraph::gid2host[base_DistGraph::id].second), [&](uint64_t src) { uint32_t lsrc = 0; uint64_t curEdge = 0; if (base_DistGraph::isLocal(src)) { lsrc = this->G2L(src); curEdge = *graph.edge_begin(lsrc, galois::MethodFlag::UNPROTECTED); } auto ee = bufGraph.edgeBegin(src); auto ee_end = bufGraph.edgeEnd(src); auto& gdst_vec = *gdst_vecs.getLocal(); for (unsigned i = 0; i < numHosts; ++i) { gdst_vec[i].clear(); } for (; ee != ee_end; ++ee) { uint32_t gdst = bufGraph.edgeDestination(*ee); // make sure this edge is going to be kept and not dropped if (graphPartitioner->keepEdge(src, gdst)) { assert(base_DistGraph::isLocal(src)); uint32_t ldst = this->G2L(gdst); graph.constructEdge(curEdge++, ldst); for (unsigned h = 0; h < net.Num; h++) { if (h != net.ID) { if (proxiesOnOtherHosts[h].test(src)) { // if kept, make sure destination exists on that host if (proxiesOnOtherHosts[h].test(gdst)) { // if it does, this edge must be duplicated on that host; // increment count gdst_vec[h].push_back(gdst); } } } } } } // make sure all edges accounted for if local if (base_DistGraph::isLocal(src)) { assert(curEdge == (*graph.edge_end(lsrc))); } // send for (uint32_t h = 0; h < numHosts; ++h) { if (h == id) continue; if (gdst_vec[h].size() > 0) { auto& b = (*sendBuffers.getLocal())[h]; galois::runtime::gSerialize(b, src); galois::runtime::gSerialize(b, gdst_vec[h]); // send if over limit if (b.size() > edgePartitionSendBufSize) { messagesSent += 1; bytesSent.update(b.size()); maxBytesSent.update(b.size()); net.sendTagged(h, galois::runtime::evilPhase, b); b.getVec().clear(); b.getVec().reserve(edgePartitionSendBufSize * 1.25); } } } // overlap receives auto buffer = net.recieveTagged(galois::runtime::evilPhase, nullptr); this->processReceivedEdgeBuffer(buffer, graph, receivedNodes); }, #if MORE_DIST_STATS galois::loopname("EdgeLoading"), #endif galois::steal(), galois::no_stats()); // flush buffers for (unsigned threadNum = 0; threadNum < sendBuffers.size(); ++threadNum) { auto& sbr = *sendBuffers.getRemote(threadNum); for (unsigned h = 0; h < this->base_DistGraph::numHosts; ++h) { if (h == this->base_DistGraph::id) continue; auto& sendBuffer = sbr[h]; if (sendBuffer.size() > 0) { messagesSent += 1; bytesSent.update(sendBuffer.size()); maxBytesSent.update(sendBuffer.size()); net.sendTagged(h, galois::runtime::evilPhase, sendBuffer); sendBuffer.getVec().clear(); } } } net.flush(); galois::runtime::reportStat_Tsum( GRNAME, std::string("EdgeLoadingMessagesSent"), messagesSent.reduce()); galois::runtime::reportStat_Tsum( GRNAME, std::string("EdgeLoadingBytesSent"), bytesSent.reduce()); galois::runtime::reportStat_Tmax( GRNAME, std::string("EdgeLoadingMaxBytesSent"), maxBytesSent.reduce()); } //! @copydoc DistGraphHybridCut::processReceivedEdgeBuffer template void processReceivedEdgeBuffer( std::optional>& buffer, GraphTy& graph, std::atomic& receivedNodes) { if (buffer) { auto& rb = buffer->second; while (rb.r_size() > 0) { uint64_t n; std::vector gdst_vec; galois::runtime::gDeserialize(rb, n); galois::runtime::gDeserialize(rb, gdst_vec); assert(base_DistGraph::isLocal(n)); uint32_t lsrc = this->G2L(n); uint64_t cur = *graph.edge_begin(lsrc, galois::MethodFlag::UNPROTECTED); uint64_t cur_end = *graph.edge_end(lsrc); assert((cur_end - cur) == gdst_vec.size()); deserializeEdges(graph, gdst_vec, cur, cur_end); ++receivedNodes; } } } /** * Receive the edge dest/data assigned to this host from other hosts * that were responsible for reading them. */ template void receiveEdges(GraphTy& graph, std::atomic& receivedNodes) { auto& net = galois::runtime::getSystemNetworkInterface(); // receive edges for all mirror nodes while (receivedNodes < nodesToReceive) { decltype(net.recieveTagged(galois::runtime::evilPhase, nullptr)) p; p = net.recieveTagged(galois::runtime::evilPhase, nullptr); processReceivedEdgeBuffer(p, graph, receivedNodes); } } template void deserializeEdges(GraphTy& graph, std::vector& gdst_vec, uint64_t& cur, uint64_t& cur_end) { uint64_t i = 0; while (cur < cur_end) { uint64_t gdst = gdst_vec[i++]; uint32_t ldst = this->G2L(gdst); graph.constructEdge(cur++, ldst); } } }; // make GRNAME visible to public template constexpr const char* const galois::graphs::MiningGraph::GRNAME; } // end namespace graphs } // end namespace galois #endif ================================================ FILE: libcusp/include/galois/graphs/NewGeneric.h ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2020, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ /** * @file NewGeneric.h * * Contains the main graph class as well as the partitioning logic that CuSP * uses. */ #ifndef _GALOIS_DIST_NEWGENERIC_H #define _GALOIS_DIST_NEWGENERIC_H #include "galois/graphs/DistributedGraph.h" #include "galois/DReducible.h" #include #include #define CUSP_PT_TIMER 0 namespace galois { namespace graphs { /** * @tparam NodeTy type of node data for the graph * @tparam EdgeTy type of edge data for the graph * * @todo fully document and clean up code * @warning not meant for public use + not fully documented yet */ template class NewDistGraphGeneric : public DistGraph { //! size used to buffer edge sends during partitioning constexpr static unsigned edgePartitionSendBufSize = 8388608; constexpr static const char* const GRNAME = "dGraph_Generic"; std::unique_ptr graphPartitioner; //! How many rounds to sync state during edge assignment phase uint32_t _edgeStateRounds; std::vector> hostLoads; std::vector old_hostLoads; uint32_t G2LEdgeCut(uint64_t gid, uint32_t globalOffset) const { assert(base_DistGraph::isLocal(gid)); // optimized for edge cuts if (gid >= globalOffset && gid < globalOffset + base_DistGraph::numOwned) return gid - globalOffset; return base_DistGraph::globalToLocalMap.at(gid); } /** * Free memory of a vector by swapping an empty vector with it */ template void freeVector(V& vectorToKill) { V dummyVector; vectorToKill.swap(dummyVector); } uint32_t nodesToReceive; public: //! typedef for base DistGraph class using base_DistGraph = DistGraph; private: virtual unsigned getHostIDImpl(uint64_t gid) const { assert(gid < base_DistGraph::numGlobalNodes); return graphPartitioner->retrieveMaster(gid); } virtual bool isOwnedImpl(uint64_t gid) const { assert(gid < base_DistGraph::numGlobalNodes); return (graphPartitioner->retrieveMaster(gid) == base_DistGraph::id); } virtual bool isLocalImpl(uint64_t gid) const { assert(gid < base_DistGraph::numGlobalNodes); return (base_DistGraph::globalToLocalMap.find(gid) != base_DistGraph::globalToLocalMap.end()); } // TODO current uses graph partitioner // TODO make it so user doens't have to specify; can be done by tracking // if an outgoing mirror is marked as having an incoming edge on any // host virtual bool isVertexCutImpl() const { return graphPartitioner->isVertexCut(); } virtual std::pair cartesianGridImpl() const { return graphPartitioner->cartesianGrid(); } public: /** * Reset load balance on host reducibles. */ void resetEdgeLoad() { if (_edgeStateRounds > 1) { if (!graphPartitioner->noCommunication()) { for (unsigned i = 0; i < base_DistGraph::numHosts; i++) { hostLoads[i].reset(); old_hostLoads[i] = 0; } } } } /** * Sync load balance on hosts using reducibles. */ void syncEdgeLoad() { if (_edgeStateRounds > 1) { if (!graphPartitioner->noCommunication()) { for (unsigned i = 0; i < base_DistGraph::numHosts; i++) { old_hostLoads[i] += hostLoads[i].reduce(); hostLoads[i].reset(); } } } } /** * Debug function: prints host loads. */ void printEdgeLoad() { if (_edgeStateRounds > 1) { if (!graphPartitioner->noCommunication()) { if (base_DistGraph::id == 0) { for (unsigned i = 0; i < base_DistGraph::numHosts; i++) { galois::gDebug("[", base_DistGraph::id, "] ", i, " ", old_hostLoads[i], "\n"); } } } } } /** * Constructor */ NewDistGraphGeneric( const std::string& filename, unsigned host, unsigned _numHosts, bool cuspAsync = true, uint32_t stateRounds = 100, bool transpose = false, galois::graphs::MASTERS_DISTRIBUTION md = BALANCED_EDGES_OF_MASTERS, uint32_t nodeWeight = 0, uint32_t edgeWeight = 0, std::string masterBlockFile = "", bool readFromFile = false, std::string localGraphFileName = "local_graph", uint32_t edgeStateRounds = 1) : base_DistGraph(host, _numHosts), _edgeStateRounds(edgeStateRounds) { galois::runtime::reportParam("dGraph", "GenericPartitioner", "0"); galois::CondStatTimer Tgraph_construct( "GraphPartitioningTime", GRNAME); Tgraph_construct.start(); if (readFromFile) { galois::gPrint("[", base_DistGraph::id, "] Reading local graph from file ", localGraphFileName, "\n"); base_DistGraph::read_local_graph_from_file(localGraphFileName); Tgraph_construct.stop(); return; } galois::graphs::OfflineGraph g(filename); base_DistGraph::numGlobalNodes = g.size(); base_DistGraph::numGlobalEdges = g.sizeEdges(); std::vector dummy; // not actually getting masters, but getting assigned readers for nodes if (masterBlockFile == "") { base_DistGraph::computeMasters(md, g, dummy, nodeWeight, edgeWeight); } else { galois::gInfo("Getting reader assignment from file"); base_DistGraph::readersFromFile(g, masterBlockFile); } graphPartitioner = std::make_unique( host, _numHosts, base_DistGraph::numGlobalNodes, base_DistGraph::numGlobalEdges); // TODO abstract this away somehow graphPartitioner->saveGIDToHost(base_DistGraph::gid2host); uint64_t nodeBegin = base_DistGraph::gid2host[base_DistGraph::id].first; typename galois::graphs::OfflineGraph::edge_iterator edgeBegin = g.edge_begin(nodeBegin); uint64_t nodeEnd = base_DistGraph::gid2host[base_DistGraph::id].second; typename galois::graphs::OfflineGraph::edge_iterator edgeEnd = g.edge_begin(nodeEnd); // signifies how many outgoing edges a particular host should expect from // this host std::vector> numOutgoingEdges; // signifies if a host should create a node because it has an incoming edge std::vector hasIncomingEdge; // only need to use for things that need communication if (!graphPartitioner->noCommunication()) { if (_edgeStateRounds > 1) { hostLoads.resize(base_DistGraph::numHosts); old_hostLoads.resize(base_DistGraph::numHosts); resetEdgeLoad(); } numOutgoingEdges.resize(base_DistGraph::numHosts); hasIncomingEdge.resize(base_DistGraph::numHosts); } // phase 0 galois::gPrint("[", base_DistGraph::id, "] Starting graph reading.\n"); galois::graphs::BufferedGraph bufGraph; bufGraph.resetReadCounters(); galois::StatTimer graphReadTimer("GraphReading", GRNAME); graphReadTimer.start(); bufGraph.loadPartialGraph(filename, nodeBegin, nodeEnd, *edgeBegin, *edgeEnd, base_DistGraph::numGlobalNodes, base_DistGraph::numGlobalEdges); graphReadTimer.stop(); galois::gPrint("[", base_DistGraph::id, "] Reading graph complete.\n"); if (graphPartitioner->masterAssignPhase()) { // loop over all nodes, determine where neighbors are, assign masters galois::StatTimer phase0Timer("Phase0", GRNAME); galois::gPrint("[", base_DistGraph::id, "] Starting master assignment.\n"); phase0Timer.start(); phase0(bufGraph, cuspAsync, stateRounds); phase0Timer.stop(); galois::gPrint("[", base_DistGraph::id, "] Master assignment complete.\n"); } galois::StatTimer inspectionTimer("EdgeInspection", GRNAME); inspectionTimer.start(); bufGraph.resetReadCounters(); galois::gstl::Vector prefixSumOfEdges; // assign edges to other nodes if (!graphPartitioner->noCommunication()) { edgeInspection(bufGraph, numOutgoingEdges, hasIncomingEdge, inspectionTimer); galois::DynamicBitSet& finalIncoming = hasIncomingEdge[base_DistGraph::id]; galois::StatTimer mapTimer("NodeMapping", GRNAME); mapTimer.start(); nodeMapping(numOutgoingEdges, finalIncoming, prefixSumOfEdges); mapTimer.stop(); finalIncoming.resize(0); } else { base_DistGraph::numOwned = nodeEnd - nodeBegin; uint64_t edgeOffset = *bufGraph.edgeBegin(nodeBegin); // edge prefix sum, no comm required edgeCutInspection(bufGraph, inspectionTimer, edgeOffset, prefixSumOfEdges); } // inspection timer is stopped in edgeInspection function // flip partitioners that have a master assignment phase to stage 2 // (meaning all nodes and masters that will be on this host are present in // the partitioner's metadata) if (graphPartitioner->masterAssignPhase()) { graphPartitioner->enterStage2(); } // get memory back from inspection metadata numOutgoingEdges.clear(); hasIncomingEdge.clear(); // doubly make sure the data is cleared freeVector(numOutgoingEdges); // should no longer use this variable freeVector(hasIncomingEdge); // should no longer use this variable // Graph construction related calls base_DistGraph::beginMaster = 0; // Allocate and construct the graph base_DistGraph::graph.allocateFrom(base_DistGraph::numNodes, base_DistGraph::numEdges); base_DistGraph::graph.constructNodes(); // edge end fixing auto& base_graph = base_DistGraph::graph; galois::do_all( galois::iterate((uint32_t)0, base_DistGraph::numNodes), [&](uint64_t n) { base_graph.fixEndEdge(n, prefixSumOfEdges[n]); }, #if MORE_DIST_STATS galois::loopname("FixEndEdgeLoop"), #endif galois::no_stats()); // get memory from prefix sum back prefixSumOfEdges.clear(); freeVector(prefixSumOfEdges); // should no longer use this variable galois::CondStatTimer TfillMirrors("FillMirrors", GRNAME); TfillMirrors.start(); fillMirrors(); TfillMirrors.stop(); if (_edgeStateRounds > 1) { // reset edge load since we need exact same answers again resetEdgeLoad(); } // Edge loading if (!graphPartitioner->noCommunication()) { loadEdges(base_DistGraph::graph, bufGraph); } else { // Edge cut construction edgeCutLoad(base_DistGraph::graph, bufGraph); bufGraph.resetAndFree(); } // Finalization // TODO this is a hack; fix it somehow // if vertex cut but not a cart cut is the condition if (graphPartitioner->isVertexCut() && graphPartitioner->cartesianGrid().first == 0) { base_DistGraph::numNodesWithEdges = base_DistGraph::numNodes; } if (transpose) { base_DistGraph::transposed = true; base_DistGraph::numNodesWithEdges = base_DistGraph::numNodes; if (base_DistGraph::numNodes > 0) { // consider all nodes to have outgoing edges (TODO better way to do // this?) for now it's fine I guess base_DistGraph::graph.transpose(GRNAME); } } galois::CondStatTimer Tthread_ranges("ThreadRangesTime", GRNAME); Tthread_ranges.start(); base_DistGraph::determineThreadRanges(); Tthread_ranges.stop(); base_DistGraph::determineThreadRangesMaster(); base_DistGraph::determineThreadRangesWithEdges(); base_DistGraph::initializeSpecificRanges(); Tgraph_construct.stop(); galois::gPrint("[", base_DistGraph::id, "] Graph construction complete.\n"); // report state rounds if (base_DistGraph::id == 0) { galois::runtime::reportStat_Single(GRNAME, "CuSPStateRounds", (uint32_t)stateRounds); } } private: galois::runtime::SpecificRange> getSpecificThreadRange(galois::graphs::BufferedGraph& bufGraph, std::vector& assignedThreadRanges, uint64_t startNode, uint64_t endNode) { galois::StatTimer threadRangeTime("Phase0ThreadRangeTime"); threadRangeTime.start(); uint64_t numLocalNodes = endNode - startNode; galois::PODResizeableArray edgePrefixSum; edgePrefixSum.resize(numLocalNodes); // get thread ranges with a prefix sum galois::do_all( galois::iterate(startNode, endNode), [&](unsigned n) { uint64_t offset = n - startNode; edgePrefixSum[offset] = bufGraph.edgeEnd(n) - bufGraph.edgeBegin(n); }, galois::no_stats()); for (unsigned i = 1; i < numLocalNodes; i++) { edgePrefixSum[i] += edgePrefixSum[i - 1]; } assignedThreadRanges = galois::graphs::determineUnitRangesFromPrefixSum( galois::runtime::activeThreads, edgePrefixSum); for (unsigned i = 0; i < galois::runtime::activeThreads + 1; i++) { assignedThreadRanges[i] += startNode; } auto toReturn = galois::runtime::makeSpecificRange( boost::counting_iterator(startNode), boost::counting_iterator(startNode + numLocalNodes), assignedThreadRanges.data()); threadRangeTime.stop(); return toReturn; } /** * For each other host, determine which nodes that this host needs to get * info from * * @param bufGraph Buffered graph used to loop over edges * @param ghosts bitset; at end * of execution, marked bits signify neighbors on this host that that other * host has read (and therefore must sync with me) */ // steps 1 and 2 of neighbor location setup: memory allocation, bitset setting void phase0BitsetSetup(galois::graphs::BufferedGraph& bufGraph, galois::DynamicBitSet& ghosts) { galois::StatTimer bitsetSetupTimer("Phase0BitsetSetup", GRNAME); bitsetSetupTimer.start(); ghosts.resize(bufGraph.size()); ghosts.reset(); std::vector rangeVector; auto start = base_DistGraph::gid2host[base_DistGraph::id].first; auto end = base_DistGraph::gid2host[base_DistGraph::id].second; galois::runtime::SpecificRange> work = getSpecificThreadRange(bufGraph, rangeVector, start, end); // Step 2: loop over all local nodes, determine neighbor locations galois::do_all( galois::iterate(work), // galois::iterate(base_DistGraph::gid2host[base_DistGraph::id].first, // base_DistGraph::gid2host[base_DistGraph::id].second), [&](unsigned n) { // ptt.start(); // galois::gPrint("[", base_DistGraph::id, " ", // galois::substrate::getThreadPool().getTID(), "] ", n, "\n"); auto ii = bufGraph.edgeBegin(n); auto ee = bufGraph.edgeEnd(n); for (; ii < ee; ++ii) { uint32_t dst = bufGraph.edgeDestination(*ii); if ((dst < start) || (dst >= end)) { // not owned by this host // set on bitset ghosts.set(dst); } } // ptt.stop(); }, galois::loopname("Phase0BitsetSetup_DetermineNeighborLocations"), galois::steal(), galois::no_stats()); bitsetSetupTimer.stop(); } // sets up the gid to lid mapping for phase 0 /** * Set up the GID to LID mapping for phase 0: In the mapping vector, * read nodes occupy the first chunk, and nodes read by other hosts follow. * * @param ghosts * @param gid2offsets mapping vector: element at an offset corresponds to a * particular GID (and its master) * @param syncNodes one vector of nodes for each host: at the end of * execution, will contain mirrors on this host whose master is on that host * @returns Number of set bits */ uint64_t phase0MapSetup( galois::DynamicBitSet& ghosts, std::unordered_map& gid2offsets, galois::gstl::Vector>& syncNodes) { galois::StatTimer mapSetupTimer("Phase0MapSetup", GRNAME); mapSetupTimer.start(); uint32_t numLocal = base_DistGraph::gid2host[base_DistGraph::id].second - base_DistGraph::gid2host[base_DistGraph::id].first; uint32_t lid = numLocal; uint64_t numToReserve = ghosts.count(); gid2offsets.reserve(numToReserve); // TODO: parallelize using prefix sum? for (unsigned h = 0; h < base_DistGraph::numHosts; ++h) { if (h == base_DistGraph::id) continue; auto start = base_DistGraph::gid2host[h].first; auto end = base_DistGraph::gid2host[h].second; for (uint64_t gid = start; gid < end; ++gid) { if (ghosts.test(gid)) { gid2offsets[gid] = lid; syncNodes[h].push_back(gid - start); lid++; } } galois::gDebug("[", base_DistGraph::id, " -> ", h, "] bitset size ", (end - start) / 64, " vs. vector size ", syncNodes[h].size() / 2); } lid -= numLocal; assert(lid == numToReserve); galois::gDebug("[", base_DistGraph::id, "] total bitset size ", (ghosts.size() - numLocal) / 64, " vs. total vector size ", numToReserve / 2); // TODO: should not be used after this - refactor to make this clean ghosts.resize(0); mapSetupTimer.stop(); return lid; } // steps 4 and 5 of neighbor location setup /** * Let other hosts know which nodes they need to send to me by giving them * the bitset marked with nodes I am interested in on the other host. * * @param syncNodes one vector of nodes for each host: at the begin of * execution, will contain mirrors on this host whose master is on that host; * at the end of execution, will contain masters on this host whose mirror * is on that host */ void phase0SendRecv( galois::gstl::Vector>& syncNodes) { auto& net = galois::runtime::getSystemNetworkInterface(); galois::StatTimer p0BitsetCommTimer("Phase0SendRecvBitsets", GRNAME); p0BitsetCommTimer.start(); uint64_t bytesSent = 0; // Step 4: send bitset to other hosts for (unsigned h = 0; h < base_DistGraph::numHosts; h++) { galois::runtime::SendBuffer bitsetBuffer; if (h != base_DistGraph::id) { galois::runtime::gSerialize(bitsetBuffer, syncNodes[h]); bytesSent += bitsetBuffer.size(); net.sendTagged(h, galois::runtime::evilPhase, bitsetBuffer); } } // Step 5: recv bitset to other hosts; this indicates which local nodes each // other host needs to be informed of updates of for (unsigned h = 0; h < net.Num - 1; h++) { decltype(net.recieveTagged(galois::runtime::evilPhase, nullptr)) p; do { p = net.recieveTagged(galois::runtime::evilPhase, nullptr); } while (!p); uint32_t sendingHost = p->first; // deserialize into neighbor bitsets galois::runtime::gDeserialize(p->second, syncNodes[sendingHost]); } p0BitsetCommTimer.stop(); galois::runtime::reportStat_Tsum( GRNAME, std::string("Phase0SendRecvBitsetsBytesSent"), bytesSent); // comm phase complete base_DistGraph::increment_evilPhase(); } /** * Given a set of loads in a vector and the accumulation to those loads, * synchronize them across hosts and do the accumulation into the vector * of loads. * * @param loads Vector of loads to accumulate to * @param accums Vector of accuulations to loads that occured since last * sync */ void syncLoad(std::vector& loads, std::vector>& accums) { assert(loads.size() == accums.size()); // use DG accumulator to force barrier on all hosts to sync this data galois::DGAccumulator syncer; // sync accum for each host one by one for (unsigned i = 0; i < loads.size(); i++) { syncer.reset(); syncer += (accums[i].load()); accums[i].store(0); uint64_t accumulation = syncer.reduce(); loads[i] += accumulation; } } /** * Given a copyable atomic vector, get data from it, save to a * PODResizeableArray, and reset value in the atomic array. * * @param atomic Atomic vector to extract and reset * @param nonAtomic PODarray to extract data into */ template void extractAtomicToPODArray(std::vector>& atomic, galois::PODResizeableArray& nonAtomic) { nonAtomic.resize(atomic.size()); galois::do_all( galois::iterate((size_t)0, atomic.size()), [&](size_t i) { nonAtomic[i] = atomic[i].load(); atomic[i].store(0); }, galois::no_stats()); } /** * Send newly accumulated node and edge loads to all other hosts and reset * the accumulated values. No DG accmulator used. * * @param nodeAccum new node accumulation for each host in system * @param edgeAccum new edge accumulation for each host in system */ void asyncSendLoad(galois::PODResizeableArray& nodeAccum, galois::PODResizeableArray& edgeAccum) { auto& net = galois::runtime::getSystemNetworkInterface(); unsigned bytesSent = 0; galois::StatTimer sendTimer("Phase0AsyncSendLoadTime", GRNAME); sendTimer.start(); for (unsigned h = 0; h < base_DistGraph::numHosts; h++) { if (h != base_DistGraph::id) { // serialize node and edge accumulations with tag 4 (to avoid // conflict with other tags being used) and send galois::runtime::SendBuffer b; galois::runtime::gSerialize(b, 4); galois::runtime::gSerialize(b, nodeAccum); galois::runtime::gSerialize(b, edgeAccum); bytesSent += b.size(); // note the +1 on evil phase; load messages send using a different // phase to avoid conflicts net.sendTagged(h, base_DistGraph::evilPhasePlus1(), b); } } sendTimer.stop(); galois::runtime::reportStat_Tsum(GRNAME, "Phase0AsyncSendLoadBytesSent", bytesSent); } /** * Receive (if it exists) new node/edge loads from other hosts and add it to * our own loads. * * @param nodeLoads current node load information for each host in system * @param edgeLoads current edge load information for each host in system */ void asyncRecvLoad(std::vector& nodeLoads, std::vector& edgeLoads, galois::DynamicBitSet& loadsClear) { auto& net = galois::runtime::getSystemNetworkInterface(); decltype(net.recieveTagged(base_DistGraph::evilPhasePlus1(), nullptr)) p; galois::StatTimer recvTimer("Phase0AsyncRecvLoadTime", GRNAME); recvTimer.start(); do { // note the +1 p = net.recieveTagged(base_DistGraph::evilPhasePlus1(), nullptr); if (p) { unsigned messageType = (unsigned)-1; // deserialize message type galois::runtime::gDeserialize(p->second, messageType); if (messageType == 4) { galois::PODResizeableArray recvNodeAccum; galois::PODResizeableArray recvEdgeAccum; // loads to add galois::runtime::gDeserialize(p->second, recvNodeAccum); galois::runtime::gDeserialize(p->second, recvEdgeAccum); assert(recvNodeAccum.size() == recvEdgeAccum.size()); assert(recvNodeAccum.size() == nodeLoads.size()); assert(recvEdgeAccum.size() == edgeLoads.size()); galois::do_all( galois::iterate((size_t)0, recvNodeAccum.size()), [&](size_t i) { nodeLoads[i] += recvNodeAccum[i]; edgeLoads[i] += recvEdgeAccum[i]; }, galois::no_stats()); } else if (messageType == 3) { // all clear message from host uint32_t sendingHost = p->first; assert(!loadsClear.test(sendingHost)); loadsClear.set(sendingHost); } else { GALOIS_DIE("unexpected message type in async load synchronization: ", messageType); } } } while (p); recvTimer.stop(); } /** * Send out accumulated loads from a round of node assignments to all other * hosts and also receive loads from other hosts if they exist * (non-blocking). * * @param nodeLoads current known node loads on this host * @param nodeAccum newly accumulated node loads from a prior round of node * assignments * @param edgeLoads current known edge loads on this host * @param edgeAccum newly accumulated edge loads from a prior round of node * assignments * @param loadsClear Bitset tracking if we have received all loads from * a particular host */ void asyncSyncLoad(std::vector& nodeLoads, std::vector>& nodeAccum, std::vector& edgeLoads, std::vector>& edgeAccum, galois::DynamicBitSet& loadsClear) { assert(nodeLoads.size() == base_DistGraph::numHosts); assert(nodeAccum.size() == base_DistGraph::numHosts); assert(edgeLoads.size() == base_DistGraph::numHosts); assert(edgeAccum.size() == base_DistGraph::numHosts); galois::StatTimer syncTimer("Phase0AsyncSyncLoadTime", GRNAME); syncTimer.start(); // extract out data to send galois::PODResizeableArray nonAtomicNodeAccum; galois::PODResizeableArray nonAtomicEdgeAccum; extractAtomicToPODArray(nodeAccum, nonAtomicNodeAccum); extractAtomicToPODArray(edgeAccum, nonAtomicEdgeAccum); assert(nonAtomicNodeAccum.size() == base_DistGraph::numHosts); assert(nonAtomicEdgeAccum.size() == base_DistGraph::numHosts); // apply loads to self galois::do_all( galois::iterate((uint32_t)0, base_DistGraph::numHosts), [&](size_t i) { nodeLoads[i] += nonAtomicNodeAccum[i]; edgeLoads[i] += nonAtomicEdgeAccum[i]; }, galois::no_stats()); #ifndef NDEBUG for (unsigned i = 0; i < nodeAccum.size(); i++) { assert(nodeAccum[i].load() == 0); assert(edgeAccum[i].load() == 0); } #endif // send both nodes and edges accumulation at once asyncSendLoad(nonAtomicNodeAccum, nonAtomicEdgeAccum); asyncRecvLoad(nodeLoads, edgeLoads, loadsClear); syncTimer.stop(); } /** * Debug function: simply prints loads and accumulations * * @param loads Vector of loads to accumulate to * @param accums Vector of accuulations to loads that occured since last * sync */ void printLoad(std::vector& loads, std::vector>& accums) { assert(loads.size() == accums.size()); for (unsigned i = 0; i < loads.size(); i++) { galois::gDebug("[", base_DistGraph::id, "] ", i, " total ", loads[i], " accum ", accums[i].load()); } } /** * Given a vector of data and a bitset specifying which elements in the data * vector need to be extracted, extract the appropriate elements into * a vector. * * @param offsets Bitset specifying which elements in the data vector need * to be extracted. * @param dataVector Data vector to extract data from according to the bitset * @return Vector of extracted elements */ template std::vector getDataFromOffsets(std::vector& offsetVector, const std::vector& dataVector) { std::vector toReturn; toReturn.resize(offsetVector.size()); galois::do_all( galois::iterate((size_t)0, offsetVector.size()), [&](unsigned i) { toReturn[i] = dataVector[offsetVector[i]]; }, galois::no_stats()); return toReturn; } /** * Given a host, a bitset that marks offsets, and a vector, * send the data located at the offsets from the vector to the * specified host. If bitset is unmarked, send a no-op. * * @param targetHost Host to send data to * @param toSync Bitset that specifies which offsets in the data vector * to send * @param dataVector Data to be sent to the target host */ void sendOffsets(unsigned targetHost, galois::DynamicBitSet& toSync, std::vector& dataVector, std::string timerName = std::string()) { auto& net = galois::runtime::getSystemNetworkInterface(); std::string statString = std::string("Phase0SendOffsets_") + timerName; uint64_t bytesSent = 0; galois::StatTimer sendOffsetsTimer(statString.c_str(), GRNAME); sendOffsetsTimer.start(); // this means there are updates to send if (toSync.count()) { std::vector offsetVector = toSync.getOffsets(); // get masters to send into a vector std::vector mastersToSend = getDataFromOffsets(offsetVector, dataVector); assert(mastersToSend.size()); size_t num_selected = toSync.count(); size_t num_total = toSync.size(); // figure out how to send (most efficient method; either bitset // and data or offsets + data) size_t bitset_alloc_size = ((num_total + 63) / 64) * sizeof(uint64_t) + (2 * sizeof(size_t)); size_t bitsetDataSize = (num_selected * sizeof(uint32_t)) + bitset_alloc_size + sizeof(num_selected); size_t offsetsDataSize = (num_selected * sizeof(uint32_t)) + (num_selected * sizeof(unsigned int)) + sizeof(uint32_t) + sizeof(num_selected); galois::runtime::SendBuffer b; // tag with send method and do send if (bitsetDataSize < offsetsDataSize) { // send bitset, tag 1 galois::runtime::gSerialize(b, 1u); galois::runtime::gSerialize(b, toSync); galois::runtime::gSerialize(b, mastersToSend); } else { // send offsets, tag 2 galois::runtime::gSerialize(b, 2u); galois::runtime::gSerialize(b, offsetVector); galois::runtime::gSerialize(b, mastersToSend); } bytesSent += b.size(); net.sendTagged(targetHost, galois::runtime::evilPhase, b); } else { // send empty no-op message, tag 0 galois::runtime::SendBuffer b; galois::runtime::gSerialize(b, 0u); bytesSent += b.size(); net.sendTagged(targetHost, galois::runtime::evilPhase, b); } sendOffsetsTimer.stop(); galois::runtime::reportStat_Tsum(GRNAME, statString + "BytesSent", bytesSent); } /** * Send new master assignment updates to other hosts based on syncNodes * for each host prepared in advance. * * @param begin to end: which nodes on this host have been updated * @param numLocalNodes: number of owned nodes * @param localNodeToMaster Vector map: an offset corresponds to a particular * GID; indicates masters of GIDs * @param syncNodes one vector of nodes for each host: contains mirrors on * this host whose master is on that host */ void syncAssignmentSends( uint32_t begin, uint32_t end, uint32_t numLocalNodes, std::vector& localNodeToMaster, galois::gstl::Vector>& syncNodes) { galois::StatTimer p0assignSendTime("Phase0AssignmentSendTime", GRNAME); p0assignSendTime.start(); galois::DynamicBitSet toSync; toSync.resize(numLocalNodes); // send loop for (unsigned h = 0; h < base_DistGraph::numHosts; h++) { if (h != base_DistGraph::id) { toSync.reset(); // send if in [start,end) and present in syncNodes[h] galois::do_all( galois::iterate(syncNodes[h]), [&](uint32_t lid) { if ((lid >= begin) && (lid < end)) { toSync.set(lid); } }, galois::no_stats()); // do actual send based on sync bitset sendOffsets(h, toSync, localNodeToMaster, "NewAssignments"); } } p0assignSendTime.stop(); } /** * Send message to all hosts saying we're done with assignments. Can * specify a phase to distinguish between all clears for assignments * and loads */ void sendAllClears(unsigned phase = 0) { unsigned bytesSent = 0; auto& net = galois::runtime::getSystemNetworkInterface(); galois::StatTimer allClearTimer("Phase0SendAllClearTime", GRNAME); allClearTimer.start(); // send loop for (unsigned h = 0; h < base_DistGraph::numHosts; h++) { if (h != base_DistGraph::id) { galois::runtime::SendBuffer b; galois::runtime::gSerialize(b, 3u); bytesSent += b.size(); // assumes phase is 0 or 1 if (phase == 1) { net.sendTagged(h, base_DistGraph::evilPhasePlus1(), b); } else if (phase == 0) { net.sendTagged(h, galois::runtime::evilPhase, b); } else { GALOIS_DIE("unexpected phase: ", phase); } } } allClearTimer.stop(); galois::runtime::reportStat_Tsum(GRNAME, "Phase0SendAllClearBytesSent", bytesSent); } void saveReceivedMappings(std::vector& localNodeToMaster, std::unordered_map& gid2offsets, unsigned sendingHost, std::vector& receivedOffsets, std::vector& receivedMasters) { uint64_t hostOffset = base_DistGraph::gid2host[sendingHost].first; galois::gDebug("[", base_DistGraph::id, "] host ", sendingHost, " offset ", hostOffset); // if execution gets here, messageType was 1 or 2 assert(receivedMasters.size() == receivedOffsets.size()); galois::do_all( galois::iterate((size_t)0, receivedMasters.size()), [&](size_t i) { uint64_t curGID = hostOffset + receivedOffsets[i]; uint32_t indexIntoMap = gid2offsets[curGID]; galois::gDebug("[", base_DistGraph::id, "] gid ", curGID, " offset ", indexIntoMap); localNodeToMaster[indexIntoMap] = receivedMasters[i]; }, galois::no_stats()); } /** * Receive offsets and masters into the provided vectors and return sending * host and the message type. * * @param receivedOffsets vector to receive offsets into * @param receivedMasters vector to receive masters mappings into * @returns sending host and message type of received data */ std::pair recvOffsetsAndMasters(std::vector& receivedOffsets, std::vector& receivedMasters) { auto& net = galois::runtime::getSystemNetworkInterface(); decltype(net.recieveTagged(galois::runtime::evilPhase, nullptr)) p; do { p = net.recieveTagged(galois::runtime::evilPhase, nullptr); } while (!p); uint32_t sendingHost = p->first; unsigned messageType = (unsigned)-1; // deserialize message type galois::runtime::gDeserialize(p->second, messageType); if (messageType == 1) { // bitset; deserialize, then get offsets galois::DynamicBitSet receivedSet; galois::runtime::gDeserialize(p->second, receivedSet); receivedOffsets = receivedSet.getOffsets(); galois::runtime::gDeserialize(p->second, receivedMasters); } else if (messageType == 2) { // offsets galois::runtime::gDeserialize(p->second, receivedOffsets); galois::runtime::gDeserialize(p->second, receivedMasters); } else if (messageType != 0) { GALOIS_DIE("invalid message type for sync of master assignments: ", messageType); } galois::gDebug("[", base_DistGraph::id, "] host ", sendingHost, " send message type ", messageType); return std::make_pair(sendingHost, messageType); } /** * Receive offsets and masters into the provided vectors and return sending * host and the message type, async (i.e. does not have to receive anything * to exit function. * * @param receivedOffsets vector to receive offsets into * @param receivedMasters vector to receive masters mappings into */ void recvOffsetsAndMastersAsync( std::vector& localNodeToMaster, std::unordered_map& gid2offsets, galois::DynamicBitSet& hostFinished) { auto& net = galois::runtime::getSystemNetworkInterface(); decltype(net.recieveTagged(galois::runtime::evilPhase, nullptr)) p; // repeat loop until no message do { p = net.recieveTagged(galois::runtime::evilPhase, nullptr); if (p) { uint32_t sendingHost = p->first; unsigned messageType = (unsigned)-1; std::vector receivedOffsets; std::vector receivedMasters; // deserialize message type galois::runtime::gDeserialize(p->second, messageType); if (messageType == 1) { // bitset; deserialize, then get offsets galois::DynamicBitSet receivedSet; galois::runtime::gDeserialize(p->second, receivedSet); receivedOffsets = receivedSet.getOffsets(); galois::runtime::gDeserialize(p->second, receivedMasters); saveReceivedMappings(localNodeToMaster, gid2offsets, sendingHost, receivedOffsets, receivedMasters); } else if (messageType == 2) { // offsets galois::runtime::gDeserialize(p->second, receivedOffsets); galois::runtime::gDeserialize(p->second, receivedMasters); saveReceivedMappings(localNodeToMaster, gid2offsets, sendingHost, receivedOffsets, receivedMasters); } else if (messageType == 3) { // host indicating that it is done with all assignments from its // end; mark as such in bitset assert(!hostFinished.test(sendingHost)); hostFinished.set(sendingHost); } else if (messageType != 0) { GALOIS_DIE("invalid message type for sync of master assignments: ", messageType); } galois::gDebug("[", base_DistGraph::id, "] host ", sendingHost, " send message type ", messageType); } } while (p); } /** * Receive new master assignment updates from other hosts and update local * mappings. * * @param localNodeToMaster Vector map: an offset corresponds to a particular * GID; indicates masters of GIDs * @param gid2offsets Map of GIDs to the offset into the vector map that * corresponds to it */ void syncAssignmentReceives(std::vector& localNodeToMaster, std::unordered_map& gid2offsets) { galois::StatTimer p0assignReceiveTime("Phase0AssignmentReceiveTime", GRNAME); p0assignReceiveTime.start(); // receive loop for (unsigned h = 0; h < base_DistGraph::numHosts - 1; h++) { unsigned sendingHost; unsigned messageType; std::vector receivedOffsets; std::vector receivedMasters; std::tie(sendingHost, messageType) = recvOffsetsAndMasters(receivedOffsets, receivedMasters); if (messageType == 1 || messageType == 2) { saveReceivedMappings(localNodeToMaster, gid2offsets, sendingHost, receivedOffsets, receivedMasters); } } p0assignReceiveTime.stop(); } void syncAssignmentReceivesAsync( std::vector& localNodeToMaster, std::unordered_map& gid2offsets, galois::DynamicBitSet& hostFinished) { galois::StatTimer p0assignReceiveTime("Phase0AssignmentReceiveTimeAsync", GRNAME); p0assignReceiveTime.start(); recvOffsetsAndMastersAsync(localNodeToMaster, gid2offsets, hostFinished); p0assignReceiveTime.stop(); } /** * Send/receive new master assignment updates to other hosts. * * @param begin to end: which nodes on this host have been updated * @param numLocalNodes: number of owned nodes * @param localNodeToMaster Vector map: an offset corresponds to a particular * GID; indicates masters of GIDs * @param syncNodes one vector of nodes for each host: contains mirrors on * this host whose master is on that host * @param gid2offsets Map of GIDs to the offset into the vector map that * corresponds to it */ void syncAssignment( uint32_t begin, uint32_t end, uint32_t numLocalNodes, std::vector& localNodeToMaster, galois::gstl::Vector>& syncNodes, std::unordered_map& gid2offsets) { galois::StatTimer syncAssignmentTimer("Phase0SyncAssignmentTime", GRNAME); syncAssignmentTimer.start(); syncAssignmentSends(begin, end, numLocalNodes, localNodeToMaster, syncNodes); syncAssignmentReceives(localNodeToMaster, gid2offsets); syncAssignmentTimer.stop(); } void syncAssignmentAsync( uint32_t begin, uint32_t end, uint32_t numLocalNodes, std::vector& localNodeToMaster, galois::gstl::Vector>& syncNodes, std::unordered_map& gid2offsets, galois::DynamicBitSet& hostFinished) { galois::StatTimer syncAssignmentTimer("Phase0SyncAssignmentAsyncTime", GRNAME); syncAssignmentTimer.start(); syncAssignmentSends(begin, end, numLocalNodes, localNodeToMaster, syncNodes); syncAssignmentReceivesAsync(localNodeToMaster, gid2offsets, hostFinished); syncAssignmentTimer.stop(); } /** * Send masters mappings that were read on this host to their appropirate * owners * * @param localNodeToMaster local id to master mapping map * @param ghosts bitsets specifying which hosts have which neighbors * that this host has read */ void sendMastersToOwners( std::vector& localNodeToMaster, galois::gstl::Vector>& syncNodes) { uint32_t begin = base_DistGraph::gid2host[base_DistGraph::id].first; uint32_t end = base_DistGraph::gid2host[base_DistGraph::id].second; // for each host, determine which master assignments still need to be sent // (if a host is a master of a node, but that node is not present as a // neighbor on the host, then this host needs to send the master assignment) galois::DynamicBitSet toSend; toSend.resize(end - begin); for (unsigned h = 0; h < base_DistGraph::numHosts; ++h) { if (h != base_DistGraph::id) { toSend.reset(); // send if present in localNodeToMaster but not present in syncNodes galois::do_all( galois::iterate((uint32_t)0, end - begin), [&](uint32_t lid) { if (localNodeToMaster[lid] == h) { toSend.set(lid); } }, galois::no_stats()); galois::do_all( galois::iterate(syncNodes[h]), [&](uint32_t lid) { toSend.reset(lid); }, galois::no_stats()); sendOffsets(h, toSend, localNodeToMaster, "MastersToOwners"); } } } /** * Receive master mapping messages from hosts and add it to the graph * partitioner's map. */ void recvMastersToOwners() { for (unsigned h = 0; h < base_DistGraph::numHosts - 1; h++) { unsigned sendingHost; unsigned messageType; std::vector receivedOffsets; std::vector receivedMasters; std::tie(sendingHost, messageType) = recvOffsetsAndMasters(receivedOffsets, receivedMasters); if (messageType == 1 || messageType == 2) { assert(receivedMasters.size() == receivedOffsets.size()); uint64_t hostOffset = base_DistGraph::gid2host[sendingHost].first; // must be single threaded as map updating isn't thread-safe for (unsigned i = 0; i < receivedMasters.size(); i++) { uint64_t gidToMap = hostOffset + receivedOffsets[i]; #ifndef NDEBUG bool newMapped = #endif graphPartitioner->addMasterMapping(gidToMap, receivedMasters[i]); assert(newMapped); } } } } /** * Phase responsible for initial master assignment. * * @param bufGraph Locally read graph on this host * @param async Specifies whether or not do synchronization of node * assignments BSP style or asynchronous style. Note regardless of which * is chosen there is a barrier at the end of master assignment. */ void phase0(galois::graphs::BufferedGraph& bufGraph, bool async, const uint32_t stateRounds) { galois::DynamicBitSet ghosts; galois::gstl::Vector> syncNodes; // masterNodes syncNodes.resize(base_DistGraph::numHosts); // determine on which hosts that this host's read nodes havs neighbors on phase0BitsetSetup(bufGraph, ghosts); // gid to vector offset setup std::unordered_map gid2offsets; uint64_t neighborCount = phase0MapSetup(ghosts, gid2offsets, syncNodes); galois::gDebug("[", base_DistGraph::id, "] num neighbors found is ", neighborCount); // send off neighbor metadata phase0SendRecv(syncNodes); galois::StatTimer p0allocTimer("Phase0AllocationTime", GRNAME); p0allocTimer.start(); // setup other partitioning metadata: nodes on each host, edges on each // host (as determined by edge cut) std::vector nodeLoads; std::vector edgeLoads; std::vector> nodeAccum; std::vector> edgeAccum; nodeLoads.assign(base_DistGraph::numHosts, 0); edgeLoads.assign(base_DistGraph::numHosts, 0); nodeAccum.assign(base_DistGraph::numHosts, 0); edgeAccum.assign(base_DistGraph::numHosts, 0); uint32_t numLocalNodes = base_DistGraph::gid2host[base_DistGraph::id].second - base_DistGraph::gid2host[base_DistGraph::id].first; std::vector localNodeToMaster; localNodeToMaster.assign(numLocalNodes + neighborCount, (uint32_t)-1); // bitsets tracking termination of assignments and partitioning loads galois::DynamicBitSet hostFinished; galois::DynamicBitSet loadsClear; if (async) { if (base_DistGraph::id == 0) { galois::gPrint("Using asynchronous master determination sends.\n"); } hostFinished.resize(base_DistGraph::numHosts); loadsClear.resize(base_DistGraph::numHosts); } p0allocTimer.stop(); uint64_t globalOffset = base_DistGraph::gid2host[base_DistGraph::id].first; #ifndef NDEBUG for (uint32_t i : localNodeToMaster) { assert(i == (uint32_t)-1); } #endif if (base_DistGraph::id == 0) { galois::gPrint("Number of BSP sync rounds in master assignment: ", stateRounds, "\n"); } // galois::PerThreadTimer ptt( // GRNAME, "Phase0DetermineMaster_" + std::string(base_DistGraph::id) //); for (unsigned syncRound = 0; syncRound < stateRounds; syncRound++) { uint32_t beginNode; uint32_t endNode; std::tie(beginNode, endNode) = galois::block_range( globalOffset, base_DistGraph::gid2host[base_DistGraph::id].second, syncRound, stateRounds); // create specific range for this block std::vector rangeVec; auto work = getSpecificThreadRange(bufGraph, rangeVec, beginNode, endNode); // debug print // galois::on_each([&] (unsigned i, unsigned j) { // galois::gDebug("[", base_DistGraph::id, " ", i, "] sync round ", // syncRound, " local range ", // *work.local_begin(), " ", *work.local_end()); //}); galois::do_all( // iterate over my read nodes galois::iterate(work), // galois::iterate(beginNode, endNode), [&](uint32_t node) { // ptt.start(); // determine master function takes source node, iterator of // neighbors uint32_t assignedHost = graphPartitioner->getMaster( node, bufGraph, localNodeToMaster, gid2offsets, nodeLoads, nodeAccum, edgeLoads, edgeAccum); // != -1 means it was assigned a host assert(assignedHost != (uint32_t)-1); // update mapping; this is a local node, so can get position // on map with subtraction localNodeToMaster[node - globalOffset] = assignedHost; // galois::gDebug("[", base_DistGraph::id, "] state round ", // syncRound, // " set ", node, " ", node - globalOffset); // ptt.stop(); }, galois::loopname("Phase0DetermineMasters"), galois::steal(), galois::no_stats()); // do synchronization of master assignment of neighbors if (!async) { syncAssignment(beginNode - globalOffset, endNode - globalOffset, numLocalNodes, localNodeToMaster, syncNodes, gid2offsets); } else { // don't need to send anything if there is nothing to send unlike sync if (beginNode != endNode) { syncAssignmentAsync(beginNode - globalOffset, endNode - globalOffset, numLocalNodes, localNodeToMaster, syncNodes, gid2offsets, hostFinished); } } // sync node/edge loads galois::StatTimer loadSyncTimer("Phase0LoadSyncTime", GRNAME); loadSyncTimer.start(); if (!async) { syncLoad(nodeLoads, nodeAccum); syncLoad(edgeLoads, edgeAccum); } else { asyncSyncLoad(nodeLoads, nodeAccum, edgeLoads, edgeAccum, loadsClear); } loadSyncTimer.stop(); #ifndef NDEBUG if (async) { galois::gDebug("[", base_DistGraph::id, "] host count ", hostFinished.count()); } #endif } // if asynchronous, don't move on until everything is done if (async) { galois::StatTimer waitTime("Phase0AsyncWaitTime", GRNAME); // assignment clears sendAllClears(); // load clears sendAllClears(1); hostFinished.set(base_DistGraph::id); loadsClear.set(base_DistGraph::id); waitTime.start(); while (hostFinished.count() != base_DistGraph::numHosts || loadsClear.count() != base_DistGraph::numHosts) { //#ifndef NDEBUG // galois::gDebug("[", base_DistGraph::id, "] waiting for all hosts to // finish, ", // hostFinished.count()); // galois::gDebug("[", base_DistGraph::id, "] waiting for all hosts // loads " // "syncs to finish, ", loadsClear.count()); //#endif // make sure all assignments are done and all loads are done syncAssignmentReceivesAsync(localNodeToMaster, gid2offsets, hostFinished); asyncRecvLoad(nodeLoads, edgeLoads, loadsClear); } waitTime.stop(); } #ifndef NDEBUG printLoad(nodeLoads, nodeAccum); printLoad(edgeLoads, edgeAccum); #endif // sanity check for correctness (all should be assigned) for (uint32_t i = 0; i < localNodeToMaster.size(); i++) { if (localNodeToMaster[i] == (uint32_t)-1) { // galois::gDebug("[", base_DistGraph::id, "] bad index ", i); assert(localNodeToMaster[i] != (uint32_t)-1); } } base_DistGraph::increment_evilPhase(); // increment twice if async is used as async uses 2 phases if (async) { base_DistGraph::increment_evilPhase(); } galois::gPrint("[", base_DistGraph::id, "] Local master assignment " "complete.\n"); // one more step: let masters know of nodes they own (if they don't // have the node locally then this is the only way they will learn about // it) galois::StatTimer p0master2ownerTimer("Phase0MastersToOwners", GRNAME); p0master2ownerTimer.start(); sendMastersToOwners(localNodeToMaster, syncNodes); recvMastersToOwners(); p0master2ownerTimer.stop(); galois::gPrint("[", base_DistGraph::id, "] Received my master mappings.\n"); base_DistGraph::increment_evilPhase(); graphPartitioner->saveGID2HostInfo(gid2offsets, localNodeToMaster, bufGraph.getNodeOffset()); } void edgeCutInspection(galois::graphs::BufferedGraph& bufGraph, galois::StatTimer& inspectionTimer, uint64_t edgeOffset, galois::gstl::Vector& prefixSumOfEdges) { galois::DynamicBitSet incomingMirrors; incomingMirrors.resize(base_DistGraph::numGlobalNodes); incomingMirrors.reset(); uint32_t myID = base_DistGraph::id; uint64_t globalOffset = base_DistGraph::gid2host[base_DistGraph::id].first; // already set before this is called base_DistGraph::localToGlobalVector.resize(base_DistGraph::numOwned); prefixSumOfEdges.resize(base_DistGraph::numOwned); auto& ltgv = base_DistGraph::localToGlobalVector; galois::do_all( galois::iterate(base_DistGraph::gid2host[base_DistGraph::id].first, base_DistGraph::gid2host[base_DistGraph::id].second), [&](size_t n) { auto ii = bufGraph.edgeBegin(n); auto ee = bufGraph.edgeEnd(n); for (; ii < ee; ++ii) { uint32_t dst = bufGraph.edgeDestination(*ii); if (graphPartitioner->retrieveMaster(dst) != myID) { incomingMirrors.set(dst); } } prefixSumOfEdges[n - globalOffset] = (*ee) - edgeOffset; ltgv[n - globalOffset] = n; }, #if MORE_DIST_STATS galois::loopname("EdgeInspectionLoop"), #endif galois::steal(), galois::no_stats()); inspectionTimer.stop(); uint64_t allBytesRead = bufGraph.getBytesRead(); galois::gPrint( "[", base_DistGraph::id, "] Edge inspection time: ", inspectionTimer.get_usec() / 1000000.0f, " seconds to read ", allBytesRead, " bytes (", allBytesRead / (float)inspectionTimer.get_usec(), " MBPS)\n"); // get incoming mirrors ready for creation uint32_t additionalMirrorCount = incomingMirrors.count(); base_DistGraph::localToGlobalVector.resize( base_DistGraph::localToGlobalVector.size() + additionalMirrorCount); if (base_DistGraph::numOwned > 0) { // fill prefix sum with last number (incomings have no edges) prefixSumOfEdges.resize(prefixSumOfEdges.size() + additionalMirrorCount, prefixSumOfEdges.back()); } else { prefixSumOfEdges.resize(additionalMirrorCount); } if (additionalMirrorCount > 0) { // TODO move this part below into separate function uint32_t totalNumNodes = base_DistGraph::numGlobalNodes; uint32_t activeThreads = galois::getActiveThreads(); std::vector threadPrefixSums(activeThreads); galois::on_each([&](unsigned tid, unsigned nthreads) { size_t beginNode; size_t endNode; std::tie(beginNode, endNode) = galois::block_range(0u, totalNumNodes, tid, nthreads); uint64_t count = 0; for (size_t i = beginNode; i < endNode; i++) { if (incomingMirrors.test(i)) ++count; } threadPrefixSums[tid] = count; }); // get prefix sums for (unsigned int i = 1; i < threadPrefixSums.size(); i++) { threadPrefixSums[i] += threadPrefixSums[i - 1]; } assert(threadPrefixSums.back() == additionalMirrorCount); uint32_t startingNodeIndex = base_DistGraph::numOwned; // do actual work, second on_each galois::on_each([&](unsigned tid, unsigned nthreads) { size_t beginNode; size_t endNode; std::tie(beginNode, endNode) = galois::block_range(0u, totalNumNodes, tid, nthreads); // start location to start adding things into prefix sums/vectors uint32_t threadStartLocation = 0; if (tid != 0) { threadStartLocation = threadPrefixSums[tid - 1]; } uint32_t handledNodes = 0; for (size_t i = beginNode; i < endNode; i++) { if (incomingMirrors.test(i)) { base_DistGraph::localToGlobalVector[startingNodeIndex + threadStartLocation + handledNodes] = i; handledNodes++; } } }); } base_DistGraph::numNodes = base_DistGraph::numOwned + additionalMirrorCount; if (prefixSumOfEdges.size() != 0) { base_DistGraph::numEdges = prefixSumOfEdges.back(); } else { base_DistGraph::numEdges = 0; } assert(base_DistGraph::localToGlobalVector.size() == base_DistGraph::numNodes); assert(prefixSumOfEdges.size() == base_DistGraph::numNodes); // g2l mapping base_DistGraph::globalToLocalMap.reserve(base_DistGraph::numNodes); for (unsigned i = 0; i < base_DistGraph::numNodes; i++) { // global to local map construction base_DistGraph::globalToLocalMap[base_DistGraph::localToGlobalVector[i]] = i; } assert(base_DistGraph::globalToLocalMap.size() == base_DistGraph::numNodes); base_DistGraph::numNodesWithEdges = base_DistGraph::numOwned; } /** * Given a loaded graph, construct the edges in the DistGraph graph. * Variant that constructs edge data as well. * * @tparam GraphTy type of graph to construct * * @param [in,out] graph Graph to construct edges in * @param bGraph Buffered graph that has edges to write into graph in memory */ template ::value>::type* = nullptr> void edgeCutLoad(GraphTy& graph, galois::graphs::BufferedGraph& bGraph) { if (base_DistGraph::id == 0) { galois::gPrint("Loading edge-data while creating edges\n"); } uint64_t globalOffset = base_DistGraph::gid2host[base_DistGraph::id].first; bGraph.resetReadCounters(); galois::StatTimer timer("EdgeLoading", GRNAME); timer.start(); galois::do_all( galois::iterate(base_DistGraph::gid2host[base_DistGraph::id].first, base_DistGraph::gid2host[base_DistGraph::id].second), [&](size_t n) { auto ii = bGraph.edgeBegin(n); auto ee = bGraph.edgeEnd(n); uint32_t lsrc = this->G2LEdgeCut(n, globalOffset); uint64_t cur = *graph.edge_begin(lsrc, galois::MethodFlag::UNPROTECTED); for (; ii < ee; ++ii) { auto gdst = bGraph.edgeDestination(*ii); decltype(gdst) ldst = this->G2LEdgeCut(gdst, globalOffset); auto gdata = bGraph.edgeData(*ii); graph.constructEdge(cur++, ldst, gdata); } assert(cur == (*graph.edge_end(lsrc))); }, #if MORE_DIST_STATS galois::loopname("EdgeLoadingLoop"), #endif galois::steal(), galois::no_stats()); timer.stop(); galois::gPrint("[", base_DistGraph::id, "] Edge loading time: ", timer.get_usec() / 1000000.0f, " seconds to read ", bGraph.getBytesRead(), " bytes (", bGraph.getBytesRead() / (float)timer.get_usec(), " MBPS)\n"); } /** * Given a loaded graph, construct the edges in the DistGraph graph. * No edge data. * * @tparam GraphTy type of graph to construct * * @param [in,out] graph Graph to construct edges in * @param bGraph Buffered graph that has edges to write into graph in memory */ template ::value>::type* = nullptr> void edgeCutLoad(GraphTy& graph, galois::graphs::BufferedGraph& bGraph) { if (base_DistGraph::id == 0) { galois::gPrint("Loading edge-data while creating edges\n"); } uint64_t globalOffset = base_DistGraph::gid2host[base_DistGraph::id].first; bGraph.resetReadCounters(); galois::StatTimer timer("EdgeLoading", GRNAME); timer.start(); galois::do_all( galois::iterate(base_DistGraph::gid2host[base_DistGraph::id].first, base_DistGraph::gid2host[base_DistGraph::id].second), [&](size_t n) { auto ii = bGraph.edgeBegin(n); auto ee = bGraph.edgeEnd(n); uint32_t lsrc = this->G2LEdgeCut(n, globalOffset); uint64_t cur = *graph.edge_begin(lsrc, galois::MethodFlag::UNPROTECTED); for (; ii < ee; ++ii) { auto gdst = bGraph.edgeDestination(*ii); decltype(gdst) ldst = this->G2LEdgeCut(gdst, globalOffset); graph.constructEdge(cur++, ldst); } assert(cur == (*graph.edge_end(lsrc))); }, #if MORE_DIST_STATS galois::loopname("EdgeLoadingLoop"), #endif galois::steal(), galois::no_stats()); timer.stop(); galois::gPrint("[", base_DistGraph::id, "] Edge loading time: ", timer.get_usec() / 1000000.0f, " seconds to read ", bGraph.getBytesRead(), " bytes (", bGraph.getBytesRead() / (float)timer.get_usec(), " MBPS)\n"); } /** * Assign edges to hosts (but don't actually send), and send this information * out to all hosts * @param[in] bufGraph local graph to read * @param[in,out] numOutgoingEdges specifies which nodes on a host will have * outgoing edges * @param[in,out] hasIncomingEdge indicates which nodes (that need to be * created)on a host have incoming edges */ void edgeInspection(galois::graphs::BufferedGraph& bufGraph, std::vector>& numOutgoingEdges, std::vector& hasIncomingEdge, galois::StatTimer& inspectionTimer) { // number of nodes that this host has read from disk uint32_t numRead = base_DistGraph::gid2host[base_DistGraph::id].second - base_DistGraph::gid2host[base_DistGraph::id].first; // allocate space for outgoing edges for (uint32_t i = 0; i < base_DistGraph::numHosts; ++i) { numOutgoingEdges[i].assign(numRead, 0); } galois::DynamicBitSet hostHasOutgoing; hostHasOutgoing.resize(base_DistGraph::numHosts); hostHasOutgoing.reset(); assignEdges(bufGraph, numOutgoingEdges, hasIncomingEdge, hostHasOutgoing); inspectionTimer.stop(); // report edge inspection time uint64_t allBytesRead = bufGraph.getBytesRead(); galois::gPrint( "[", base_DistGraph::id, "] Edge inspection time: ", inspectionTimer.get_usec() / 1000000.0f, " seconds to read ", allBytesRead, " bytes (", allBytesRead / (float)inspectionTimer.get_usec(), " MBPS)\n"); // old inspection barrier // galois::runtime::getHostBarrier().wait(); sendInspectionData(numOutgoingEdges, hasIncomingEdge, hostHasOutgoing); // setup a single hasIncomingEdge bitvector uint32_t myHostID = base_DistGraph::id; if (hasIncomingEdge[myHostID].size() == 0) { hasIncomingEdge[myHostID].resize(base_DistGraph::numGlobalNodes); hasIncomingEdge[myHostID].reset(); } recvInspectionData(numOutgoingEdges, hasIncomingEdge[myHostID]); base_DistGraph::increment_evilPhase(); } /** * Inspect read edges and determine where to send them. Mark metadata as * necessary. * * @param[in] bufGraph local graph to read * @param[in,out] numOutgoingEdges specifies which nodes on a host will have * outgoing edges * @param[in,out] hasIncomingEdge indicates which nodes (that need to be * created)on a host have incoming edges * @param[in,out] hostHasOutgoing bitset tracking which hosts have outgoing * edges from this host */ void assignEdges(galois::graphs::BufferedGraph& bufGraph, std::vector>& numOutgoingEdges, std::vector& hasIncomingEdge, galois::DynamicBitSet& hostHasOutgoing) { std::vector> indicatorVars( base_DistGraph::numHosts); // initialize indicators of initialized bitsets to 0 for (unsigned i = 0; i < base_DistGraph::numHosts; i++) { indicatorVars[i] = 0; } // global offset into my read nodes uint64_t globalOffset = base_DistGraph::gid2host[base_DistGraph::id].first; uint32_t globalNodes = base_DistGraph::numGlobalNodes; for (unsigned syncRound = 0; syncRound < _edgeStateRounds; syncRound++) { uint32_t beginNode; uint32_t endNode; std::tie(beginNode, endNode) = galois::block_range( globalOffset, base_DistGraph::gid2host[base_DistGraph::id].second, syncRound, _edgeStateRounds); // TODO maybe edge range this? galois::do_all( // iterate over my read nodes galois::iterate(beginNode, endNode), [&](size_t src) { auto ee = bufGraph.edgeBegin(src); auto ee_end = bufGraph.edgeEnd(src); uint64_t numEdgesL = std::distance(ee, ee_end); for (; ee != ee_end; ee++) { uint32_t dst = bufGraph.edgeDestination(*ee); uint32_t hostBelongs = -1; hostBelongs = graphPartitioner->getEdgeOwner(src, dst, numEdgesL); if (_edgeStateRounds > 1) { hostLoads[hostBelongs] += 1; } numOutgoingEdges[hostBelongs][src - globalOffset] += 1; hostHasOutgoing.set(hostBelongs); bool hostIsMasterOfDest = (hostBelongs == graphPartitioner->retrieveMaster(dst)); // this means a mirror must be created for destination node on // that host since it will not be created otherwise if (!hostIsMasterOfDest) { auto& bitsetStatus = indicatorVars[hostBelongs]; // initialize the bitset if necessary if (bitsetStatus == 0) { char expected = 0; bool result = bitsetStatus.compare_exchange_strong(expected, 1); // i swapped successfully, therefore do allocation if (result) { hasIncomingEdge[hostBelongs].resize(globalNodes); hasIncomingEdge[hostBelongs].reset(); bitsetStatus = 2; } } // until initialized, loop while (indicatorVars[hostBelongs] != 2) ; hasIncomingEdge[hostBelongs].set(dst); } } }, #if MORE_DIST_STATS galois::loopname("AssignEdges"), #endif galois::steal(), galois::no_stats()); syncEdgeLoad(); } } /** * Given a vector specifying which nodes have edges for an unspecified * receiver host, save the masters of those nodes (which are known on this * host but not necessarily other hosts) into a vector and serialize it for * the receiver to update their master node mapping. * * @param b Send buffer * @param hostOutgoingEdges Number of edges that the receiver of this * vector should expect for each node on this host */ void serializeOutgoingMasterMap(galois::runtime::SendBuffer& b, const std::vector& hostOutgoingEdges) { // 2 phase: one phase determines amount of work each thread does, // second has threads actually do copies uint32_t activeThreads = galois::getActiveThreads(); std::vector threadPrefixSums(activeThreads); size_t hostSize = base_DistGraph::gid2host[base_DistGraph::id].second - base_DistGraph::gid2host[base_DistGraph::id].first; assert(hostSize == hostOutgoingEdges.size()); // for each thread, figure out how many items it will work with // (non-zero outgoing edges) galois::on_each([&](unsigned tid, unsigned nthreads) { size_t beginNode; size_t endNode; std::tie(beginNode, endNode) = galois::block_range((size_t)0, hostSize, tid, nthreads); uint64_t count = 0; for (size_t i = beginNode; i < endNode; i++) { if (hostOutgoingEdges[i] > 0) { count++; } } threadPrefixSums[tid] = count; }); // get prefix sums for (unsigned int i = 1; i < threadPrefixSums.size(); i++) { threadPrefixSums[i] += threadPrefixSums[i - 1]; } uint32_t numNonZero = threadPrefixSums[activeThreads - 1]; std::vector masterLocation; masterLocation.resize(numNonZero, (uint32_t)-1); // should only be in here if there's something to send in first place assert(numNonZero > 0); uint64_t startNode = base_DistGraph::gid2host[base_DistGraph::id].first; // do actual work, second on_each; find non-zeros again, get master // corresponding to that non-zero and send to other end galois::on_each([&](unsigned tid, unsigned nthreads) { size_t beginNode; size_t endNode; std::tie(beginNode, endNode) = galois::block_range((size_t)0, hostSize, tid, nthreads); // start location to start adding things into prefix sums/vectors uint32_t threadStartLocation = 0; if (tid != 0) { threadStartLocation = threadPrefixSums[tid - 1]; } uint32_t handledNodes = 0; for (size_t i = beginNode; i < endNode; i++) { if (hostOutgoingEdges[i] > 0) { // get master of i masterLocation[threadStartLocation + handledNodes] = graphPartitioner->retrieveMaster(i + startNode); handledNodes++; } } }); #ifndef NDEBUG for (uint32_t i : masterLocation) { assert(i != (uint32_t)-1); } #endif // serialize into buffer; since this is sent along with vector receiver end // will know how to deal with it galois::runtime::gSerialize(b, masterLocation); } void serializeIncomingMasterMap(galois::runtime::SendBuffer& b, const galois::DynamicBitSet& hostIncomingEdges) { size_t numOfNodes = hostIncomingEdges.count(); std::vector masterMap; masterMap.resize(numOfNodes, (uint32_t)-1); std::vector bitsetOffsets = hostIncomingEdges.getOffsets(); // size_t firstBound = base_DistGraph::gid2host[h].first; // size_t secondBound = base_DistGraph::gid2host[h].second; // galois::do_all( // galois::iterate((size_t)0, firstBound), // [&] (size_t offset) { // masterMap[offset] = // graphPartitioner->retrieveMaster(bitsetOffsets[offset]); // }, // galois::no_stats() //); galois::do_all( // galois::iterate((size_t)secondBound, numOfNodes), galois::iterate((size_t)0, numOfNodes), [&](size_t offset) { masterMap[offset] = graphPartitioner->retrieveMaster(bitsetOffsets[offset]); }, galois::no_stats()); #ifndef NDEBUG for (uint32_t i : masterMap) { assert(i != (uint32_t)-1); assert(i < base_DistGraph::numHosts); } #endif // serialize into buffer; since this is sent along with vector receiver end // will know how to deal with it galois::runtime::gSerialize(b, masterMap); } void deserializeOutgoingMasterMap( uint32_t senderHost, const std::vector& hostOutgoingEdges, const std::vector& recvMasterLocations) { uint64_t hostOffset = base_DistGraph::gid2host[senderHost].first; size_t hostSize = base_DistGraph::gid2host[senderHost].second - base_DistGraph::gid2host[senderHost].first; assert(hostSize == hostOutgoingEdges.size()); galois::DynamicBitSet offsetsToConsider; offsetsToConsider.resize(hostSize); offsetsToConsider.reset(); // step 1: figure out offsets that need to be handled (i.e. non-zero): only // handle if not already in map galois::do_all( galois::iterate((size_t)0, hostOutgoingEdges.size()), [&](size_t offset) { if (hostOutgoingEdges[offset] > 0) { offsetsToConsider.set(offset); } }, galois::no_stats(), galois::steal()); assert(offsetsToConsider.count() == recvMasterLocations.size()); // step 2: using bitset that tells which offsets are set, add // to already master map in partitioner (this is single threaded // since map is not a concurrent data structure) size_t curCount = 0; // size_t actuallySet = 0; for (uint32_t offset : offsetsToConsider.getOffsets()) { // galois::gDebug("[", base_DistGraph::id, "] ", " setting ", // offset + hostOffset, " from host ", senderHost, // " to ", recvMasterLocations[curCount]); graphPartitioner->addMasterMapping(offset + hostOffset, recvMasterLocations[curCount]); // bool set = graphPartitioner->addMasterMapping(offset + hostOffset, // recvMasterLocations[curCount]); // if (set) { actuallySet++; } curCount++; } // galois::gDebug("[", base_DistGraph::id, "] host ", senderHost, ": set ", // actuallySet, " out of ", recvMasterLocations.size()); } /** * Map GIDs to masters from incoming master map sent from hosts. * * @param senderHost host that sent the data * @param gids GIDs corresponding to the received master locations * @param recvMasterLocations masters of GIDs in the gids vector */ void deserializeIncomingMasterMap( const std::vector& gids, const std::vector& recvMasterLocations) { assert(gids.size() == recvMasterLocations.size()); size_t curCount = 0; for (uint64_t gid : gids) { assert(gid < base_DistGraph::numGlobalNodes); // galois::gDebug("[", base_DistGraph::id, "] ", " in-setting ", gid, " to // ", // recvMasterLocations[curCount]); graphPartitioner->addMasterMapping(gid, recvMasterLocations[curCount]); curCount++; } } /** * Send data out from inspection to other hosts. * * @param[in,out] numOutgoingEdges specifies which nodes on a host will have * outgoing edges * @param[in,out] hasIncomingEdge indicates which nodes (that need to be * created)on a host have incoming edges * @param[in] hostHasOutgoing bitset tracking which hosts have outgoing * edges from this host */ void sendInspectionData(std::vector>& numOutgoingEdges, std::vector& hasIncomingEdge, galois::DynamicBitSet& hostHasOutgoing) { auto& net = galois::runtime::getSystemNetworkInterface(); galois::GAccumulator bytesSent; bytesSent.reset(); for (unsigned h = 0; h < net.Num; h++) { if (h == net.ID) { // i have no outgoing edges i will keep; go ahead and clear if (!hostHasOutgoing.test(h)) { numOutgoingEdges[h].clear(); } continue; } // send outgoing edges data off to comm partner galois::runtime::SendBuffer b; // only send if non-zeros exist if (hostHasOutgoing.test(h)) { galois::runtime::gSerialize(b, 1); // token saying data exists galois::runtime::gSerialize(b, numOutgoingEdges[h]); if (graphPartitioner->masterAssignPhase()) { serializeOutgoingMasterMap(b, numOutgoingEdges[h]); } } else { galois::runtime::gSerialize(b, 0); // token saying no data exists } numOutgoingEdges[h].clear(); // determine form to send bitset in galois::DynamicBitSet& curBitset = hasIncomingEdge[h]; uint64_t bitsetSize = curBitset.size(); // num bits uint64_t onlyOffsetsSize = curBitset.count() * 32; if (bitsetSize == 0) { // there was nothing there to send in first place galois::runtime::gSerialize(b, 0); } else if (onlyOffsetsSize <= bitsetSize) { // send only offsets std::vector offsets = curBitset.getOffsets(); galois::runtime::gSerialize(b, 2); // 2 = only offsets galois::runtime::gSerialize(b, offsets); if (graphPartitioner->masterAssignPhase()) { // galois::gDebug("incoming master map serialization"); // serializeIncomingMasterMap(b, curBitset, h); serializeIncomingMasterMap(b, curBitset); } } else { // send entire bitset galois::runtime::gSerialize(b, 1); galois::runtime::gSerialize(b, curBitset); if (graphPartitioner->masterAssignPhase()) { // galois::gDebug("incoming master map serialization"); // serializeIncomingMasterMap(b, curBitset, h); serializeIncomingMasterMap(b, curBitset); } } // get memory from bitset back curBitset.resize(0); bytesSent.update(b.size()); // send buffer and free memory net.sendTagged(h, galois::runtime::evilPhase, b); b.getVec().clear(); } galois::runtime::reportStat_Tsum( GRNAME, std::string("EdgeInspectionBytesSent"), bytesSent.reduce()); galois::gPrint("[", base_DistGraph::id, "] Inspection sends complete.\n"); } /** * Receive data from inspection from other hosts. Processes the incoming * edge bitsets/offsets. * * @param[in,out] numOutgoingEdges specifies which nodes on a host will have * outgoing edges * @param[in,out] hasIncomingEdge indicates which nodes (that need to be * created) on this host have incoming edges */ void recvInspectionData(std::vector>& numOutgoingEdges, galois::DynamicBitSet& hasIncomingEdge) { auto& net = galois::runtime::getSystemNetworkInterface(); for (unsigned h = 0; h < net.Num - 1; h++) { // expect data from comm partner back decltype(net.recieveTagged(galois::runtime::evilPhase, nullptr)) p; do { p = net.recieveTagged(galois::runtime::evilPhase, nullptr); } while (!p); uint32_t sendingHost = p->first; // get outgoing edges; first get status var uint32_t outgoingExists = 2; galois::runtime::gDeserialize(p->second, outgoingExists); if (outgoingExists == 1) { // actual data sent galois::runtime::gDeserialize(p->second, numOutgoingEdges[sendingHost]); if (graphPartitioner->masterAssignPhase()) { std::vector recvMasterLocations; galois::runtime::gDeserialize(p->second, recvMasterLocations); deserializeOutgoingMasterMap( sendingHost, numOutgoingEdges[sendingHost], recvMasterLocations); } } else if (outgoingExists == 0) { // no data sent; just clear again numOutgoingEdges[sendingHost].clear(); } else { GALOIS_DIE("invalid recv inspection data metadata mode, outgoing"); } uint32_t bitsetMetaMode = 3; // initialize to invalid mode galois::runtime::gDeserialize(p->second, bitsetMetaMode); if (bitsetMetaMode == 1) { // sent as bitset; deserialize then or with main bitset galois::DynamicBitSet recvSet; galois::runtime::gDeserialize(p->second, recvSet); hasIncomingEdge.bitwise_or(recvSet); if (graphPartitioner->masterAssignPhase()) { std::vector recvMasterLocations; galois::runtime::gDeserialize(p->second, recvMasterLocations); deserializeIncomingMasterMap(recvSet.getOffsets(), recvMasterLocations); } } else if (bitsetMetaMode == 2) { // sent as vector of offsets std::vector recvOffsets; galois::runtime::gDeserialize(p->second, recvOffsets); for (uint32_t offset : recvOffsets) { hasIncomingEdge.set(offset); } if (graphPartitioner->masterAssignPhase()) { std::vector recvMasterLocations; galois::runtime::gDeserialize(p->second, recvMasterLocations); deserializeIncomingMasterMap(recvOffsets, recvMasterLocations); } } else if (bitsetMetaMode == 0) { // do nothing; there was nothing to receive } else { GALOIS_DIE("invalid recv inspection data metadata mode"); } } galois::gPrint("[", base_DistGraph::id, "] Inspection receives complete.\n"); } /** * Take inspection metadata and being mapping nodes/creating prefix sums, * return the prefix sum. */ galois::gstl::Vector nodeMapping(std::vector>& numOutgoingEdges, galois::DynamicBitSet& hasIncomingEdge, galois::gstl::Vector& prefixSumOfEdges) { base_DistGraph::numNodes = 0; base_DistGraph::numEdges = 0; nodesToReceive = 0; // reserve overestimation of nodes prefixSumOfEdges.reserve(base_DistGraph::numGlobalNodes / base_DistGraph::numHosts * 1.15); base_DistGraph::localToGlobalVector.reserve( base_DistGraph::numGlobalNodes / base_DistGraph::numHosts * 1.15); inspectMasterNodes(numOutgoingEdges, prefixSumOfEdges); inspectOutgoingNodes(numOutgoingEdges, prefixSumOfEdges); createIntermediateMetadata(prefixSumOfEdges, hasIncomingEdge.count()); inspectIncomingNodes(hasIncomingEdge, prefixSumOfEdges); finalizeInspection(prefixSumOfEdges); galois::gDebug("[", base_DistGraph::id, "] To receive this many nodes: ", nodesToReceive); galois::gPrint("[", base_DistGraph::id, "] Inspection mapping complete.\n"); return prefixSumOfEdges; } /** * Inspect master nodes; loop over all nodes, determine if master; if is, * create mapping + get num edges */ void inspectMasterNodes(std::vector>& numOutgoingEdges, galois::gstl::Vector& prefixSumOfEdges) { uint32_t myHID = base_DistGraph::id; galois::GAccumulator toReceive; toReceive.reset(); for (unsigned h = 0; h < base_DistGraph::numHosts; ++h) { uint32_t activeThreads = galois::getActiveThreads(); std::vector threadPrefixSums(activeThreads); uint64_t startNode = base_DistGraph::gid2host[h].first; uint64_t lastNode = base_DistGraph::gid2host[h].second; size_t hostSize = lastNode - startNode; if (numOutgoingEdges[h].size() != 0) { assert(hostSize == numOutgoingEdges[h].size()); } // for each thread, figure out how many items it will work with (only // owned nodes) galois::on_each([&](unsigned tid, unsigned nthreads) { size_t beginNode; size_t endNode; // loop over all nodes that host h has read std::tie(beginNode, endNode) = galois::block_range((size_t)0, hostSize, tid, nthreads); uint64_t count = 0; for (size_t i = beginNode; i < endNode; i++) { // galois::gDebug("[", base_DistGraph::id, "] ", i + startNode, // " mapped to ", // graphPartitioner->retrieveMaster(i+startNode)); if (graphPartitioner->retrieveMaster(i + startNode) == myHID) { count++; } } threadPrefixSums[tid] = count; }); // get prefix sums for (unsigned int i = 1; i < threadPrefixSums.size(); i++) { threadPrefixSums[i] += threadPrefixSums[i - 1]; } assert(prefixSumOfEdges.size() == base_DistGraph::numNodes); assert(base_DistGraph::localToGlobalVector.size() == base_DistGraph::numNodes); uint32_t newMasterNodes = threadPrefixSums[activeThreads - 1]; galois::gDebug("[", base_DistGraph::id, "] This many masters from host ", h, ": ", newMasterNodes); uint32_t startingNodeIndex = base_DistGraph::numNodes; // increase size of prefix sum + mapping vector prefixSumOfEdges.resize(base_DistGraph::numNodes + newMasterNodes); base_DistGraph::localToGlobalVector.resize(base_DistGraph::numNodes + newMasterNodes); if (newMasterNodes > 0) { // do actual work, second on_each galois::on_each([&](unsigned tid, unsigned nthreads) { size_t beginNode; size_t endNode; std::tie(beginNode, endNode) = galois::block_range((size_t)0, hostSize, tid, nthreads); // start location to start adding things into prefix sums/vectors uint32_t threadStartLocation = 0; if (tid != 0) { threadStartLocation = threadPrefixSums[tid - 1]; } uint32_t handledNodes = 0; for (size_t i = beginNode; i < endNode; i++) { uint32_t globalID = startNode + i; // if this node is master, get outgoing edges + save mapping if (graphPartitioner->retrieveMaster(globalID) == myHID) { // check size if (numOutgoingEdges[h].size() > 0) { uint64_t myEdges = numOutgoingEdges[h][i]; numOutgoingEdges[h][i] = 0; // set to 0; does not need to be // handled later prefixSumOfEdges[startingNodeIndex + threadStartLocation + handledNodes] = myEdges; if (myEdges > 0 && h != myHID) { toReceive += 1; } } else { prefixSumOfEdges[startingNodeIndex + threadStartLocation + handledNodes] = 0; } base_DistGraph::localToGlobalVector[startingNodeIndex + threadStartLocation + handledNodes] = globalID; handledNodes++; } } }); base_DistGraph::numNodes += newMasterNodes; } } nodesToReceive += toReceive.reduce(); // masters have been handled base_DistGraph::numOwned = base_DistGraph::numNodes; } /** * Outgoing inspection: loop over all nodes, determnine if outgoing exists; * if does, create mapping, get edges */ void inspectOutgoingNodes(std::vector>& numOutgoingEdges, galois::gstl::Vector& prefixSumOfEdges) { uint32_t myHID = base_DistGraph::id; galois::GAccumulator toReceive; toReceive.reset(); for (unsigned h = 0; h < base_DistGraph::numHosts; ++h) { size_t hostSize = numOutgoingEdges[h].size(); // if i got no outgoing info from this host, safely continue to next one if (hostSize == 0) { continue; } uint32_t activeThreads = galois::getActiveThreads(); std::vector threadPrefixSums(activeThreads); // for each thread, figure out how many items it will work with (only // owned nodes) galois::on_each([&](unsigned tid, unsigned nthreads) { size_t beginNode; size_t endNode; std::tie(beginNode, endNode) = galois::block_range((size_t)0, hostSize, tid, nthreads); uint64_t count = 0; for (size_t i = beginNode; i < endNode; i++) { if (numOutgoingEdges[h][i] > 0) { count++; } } threadPrefixSums[tid] = count; }); // get prefix sums for (unsigned int i = 1; i < threadPrefixSums.size(); i++) { threadPrefixSums[i] += threadPrefixSums[i - 1]; } assert(prefixSumOfEdges.size() == base_DistGraph::numNodes); assert(base_DistGraph::localToGlobalVector.size() == base_DistGraph::numNodes); uint32_t newOutgoingNodes = threadPrefixSums[activeThreads - 1]; // increase size of prefix sum + mapping vector prefixSumOfEdges.resize(base_DistGraph::numNodes + newOutgoingNodes); base_DistGraph::localToGlobalVector.resize(base_DistGraph::numNodes + newOutgoingNodes); uint64_t startNode = base_DistGraph::gid2host[h].first; uint32_t startingNodeIndex = base_DistGraph::numNodes; if (newOutgoingNodes > 0) { // do actual work, second on_each galois::on_each([&](unsigned tid, unsigned nthreads) { size_t beginNode; size_t endNode; std::tie(beginNode, endNode) = galois::block_range((size_t)0, hostSize, tid, nthreads); // start location to start adding things into prefix sums/vectors uint32_t threadStartLocation = 0; if (tid != 0) { threadStartLocation = threadPrefixSums[tid - 1]; } uint32_t handledNodes = 0; for (size_t i = beginNode; i < endNode; i++) { uint64_t myEdges = numOutgoingEdges[h][i]; if (myEdges > 0) { prefixSumOfEdges[startingNodeIndex + threadStartLocation + handledNodes] = myEdges; base_DistGraph::localToGlobalVector[startingNodeIndex + threadStartLocation + handledNodes] = startNode + i; handledNodes++; if (myEdges > 0 && h != myHID) { toReceive += 1; } } } }); base_DistGraph::numNodes += newOutgoingNodes; } // don't need anymore after this point; get memory back numOutgoingEdges[h].clear(); } nodesToReceive += toReceive.reduce(); base_DistGraph::numNodesWithEdges = base_DistGraph::numNodes; } /** * Create a part of the global to local map (it's missing the incoming * mirrors with no edges) + part of prefix sum * * @param[in, out] prefixSumOfEdges edge prefix sum to build * @param[in] incomingEstimate estimate of number of incoming nodes to build */ void createIntermediateMetadata(galois::gstl::Vector& prefixSumOfEdges, const uint64_t incomingEstimate) { if (base_DistGraph::numNodes == 0) { return; } base_DistGraph::globalToLocalMap.reserve(base_DistGraph::numNodesWithEdges + incomingEstimate); base_DistGraph::globalToLocalMap[base_DistGraph::localToGlobalVector[0]] = 0; // global to local map construction using num nodes with edges for (unsigned i = 1; i < base_DistGraph::numNodesWithEdges; i++) { prefixSumOfEdges[i] += prefixSumOfEdges[i - 1]; base_DistGraph::globalToLocalMap[base_DistGraph::localToGlobalVector[i]] = i; } } /** * incoming node creation if is doesn't already exist + if actually amrked * as having incoming node */ void inspectIncomingNodes(galois::DynamicBitSet& hasIncomingEdge, galois::gstl::Vector& prefixSumOfEdges) { uint32_t totalNumNodes = base_DistGraph::numGlobalNodes; uint32_t activeThreads = galois::getActiveThreads(); std::vector threadPrefixSums(activeThreads); galois::on_each([&](unsigned tid, unsigned nthreads) { size_t beginNode; size_t endNode; std::tie(beginNode, endNode) = galois::block_range(0u, totalNumNodes, tid, nthreads); uint64_t count = 0; for (size_t i = beginNode; i < endNode; i++) { // only count if doesn't exist in global/local map + is incoming // edge if (hasIncomingEdge.test(i) && !base_DistGraph::globalToLocalMap.count(i)) ++count; } threadPrefixSums[tid] = count; }); // get prefix sums for (unsigned int i = 1; i < threadPrefixSums.size(); i++) { threadPrefixSums[i] += threadPrefixSums[i - 1]; } assert(prefixSumOfEdges.size() == base_DistGraph::numNodes); assert(base_DistGraph::localToGlobalVector.size() == base_DistGraph::numNodes); uint32_t newIncomingNodes = threadPrefixSums[activeThreads - 1]; // increase size of prefix sum + mapping vector prefixSumOfEdges.resize(base_DistGraph::numNodes + newIncomingNodes); base_DistGraph::localToGlobalVector.resize(base_DistGraph::numNodes + newIncomingNodes); uint32_t startingNodeIndex = base_DistGraph::numNodes; if (newIncomingNodes > 0) { // do actual work, second on_each galois::on_each([&](unsigned tid, unsigned nthreads) { size_t beginNode; size_t endNode; std::tie(beginNode, endNode) = galois::block_range(0u, totalNumNodes, tid, nthreads); // start location to start adding things into prefix sums/vectors uint32_t threadStartLocation = 0; if (tid != 0) { threadStartLocation = threadPrefixSums[tid - 1]; } uint32_t handledNodes = 0; for (size_t i = beginNode; i < endNode; i++) { if (hasIncomingEdge.test(i) && !base_DistGraph::globalToLocalMap.count(i)) { prefixSumOfEdges[startingNodeIndex + threadStartLocation + handledNodes] = 0; base_DistGraph::localToGlobalVector[startingNodeIndex + threadStartLocation + handledNodes] = i; handledNodes++; } } }); base_DistGraph::numNodes += newIncomingNodes; } } /** * finalize metadata maps */ void finalizeInspection(galois::gstl::Vector& prefixSumOfEdges) { // reserve rest of memory needed base_DistGraph::globalToLocalMap.reserve(base_DistGraph::numNodes); for (unsigned i = base_DistGraph::numNodesWithEdges; i < base_DistGraph::numNodes; i++) { // finalize prefix sum prefixSumOfEdges[i] += prefixSumOfEdges[i - 1]; // global to local map construction base_DistGraph::globalToLocalMap[base_DistGraph::localToGlobalVector[i]] = i; } if (prefixSumOfEdges.size() != 0) { base_DistGraph::numEdges = prefixSumOfEdges.back(); } else { base_DistGraph::numEdges = 0; } } //////////////////////////////////////////////////////////////////////////////// /** * Fill up mirror arrays. * TODO make parallel? */ void fillMirrors() { base_DistGraph::mirrorNodes.reserve(base_DistGraph::numNodes - base_DistGraph::numOwned); for (uint32_t i = base_DistGraph::numOwned; i < base_DistGraph::numNodes; i++) { uint32_t globalID = base_DistGraph::localToGlobalVector[i]; base_DistGraph::mirrorNodes[graphPartitioner->retrieveMaster(globalID)] .push_back(globalID); } } //////////////////////////////////////////////////////////////////////////////// template void loadEdges(GraphTy& graph, galois::graphs::BufferedGraph& bufGraph) { if (base_DistGraph::id == 0) { if (std::is_void::value) { fprintf(stderr, "Loading void edge-data while creating edges.\n"); } else { fprintf(stderr, "Loading edge-data while creating edges.\n"); } } bufGraph.resetReadCounters(); std::atomic receivedNodes; receivedNodes.store(0); galois::StatTimer loadEdgeTimer("EdgeLoading", GRNAME); loadEdgeTimer.start(); // sends data sendEdges(graph, bufGraph, receivedNodes); uint64_t bufBytesRead = bufGraph.getBytesRead(); // get data from graph back (don't need it after sending things out) bufGraph.resetAndFree(); // receives data galois::on_each( [&](unsigned, unsigned) { receiveEdges(graph, receivedNodes); }); base_DistGraph::increment_evilPhase(); loadEdgeTimer.stop(); galois::gPrint("[", base_DistGraph::id, "] Edge loading time: ", loadEdgeTimer.get_usec() / 1000000.0f, " seconds to read ", bufBytesRead, " bytes (", bufBytesRead / (float)loadEdgeTimer.get_usec(), " MBPS)\n"); } // Edge type is not void. (i.e. edge data exists) template ::value>::type* = nullptr> void sendEdges(GraphTy& graph, galois::graphs::BufferedGraph& bufGraph, std::atomic& receivedNodes) { using DstVecType = std::vector>; using DataVecType = std::vector>; using SendBufferVecTy = std::vector; galois::substrate::PerThreadStorage gdst_vecs( base_DistGraph::numHosts); galois::substrate::PerThreadStorage gdata_vecs( base_DistGraph::numHosts); galois::substrate::PerThreadStorage sendBuffers( base_DistGraph::numHosts); auto& net = galois::runtime::getSystemNetworkInterface(); const unsigned& id = this->base_DistGraph::id; const unsigned& numHosts = this->base_DistGraph::numHosts; galois::GAccumulator messagesSent; galois::GAccumulator bytesSent; galois::GReduceMax maxBytesSent; messagesSent.reset(); bytesSent.reset(); maxBytesSent.reset(); for (unsigned syncRound = 0; syncRound < _edgeStateRounds; syncRound++) { uint32_t beginNode; uint32_t endNode; std::tie(beginNode, endNode) = galois::block_range( base_DistGraph::gid2host[base_DistGraph::id].first, base_DistGraph::gid2host[base_DistGraph::id].second, syncRound, _edgeStateRounds); // Go over assigned nodes and distribute edges. galois::do_all( galois::iterate(beginNode, endNode), [&](uint64_t src) { uint32_t lsrc = 0; uint64_t curEdge = 0; if (base_DistGraph::isLocal(src)) { lsrc = this->G2L(src); curEdge = *graph.edge_begin(lsrc, galois::MethodFlag::UNPROTECTED); } auto ee = bufGraph.edgeBegin(src); auto ee_end = bufGraph.edgeEnd(src); uint64_t numEdgesL = std::distance(ee, ee_end); auto& gdst_vec = *gdst_vecs.getLocal(); auto& gdata_vec = *gdata_vecs.getLocal(); for (unsigned i = 0; i < numHosts; ++i) { gdst_vec[i].clear(); gdata_vec[i].clear(); gdst_vec[i].reserve(numEdgesL); // gdata_vec[i].reserve(numEdgesL); } for (; ee != ee_end; ++ee) { uint32_t gdst = bufGraph.edgeDestination(*ee); auto gdata = bufGraph.edgeData(*ee); uint32_t hostBelongs = graphPartitioner->getEdgeOwner(src, gdst, numEdgesL); if (_edgeStateRounds > 1) { hostLoads[hostBelongs] += 1; } if (hostBelongs == id) { // edge belongs here, construct on self assert(base_DistGraph::isLocal(src)); uint32_t ldst = this->G2L(gdst); graph.constructEdge(curEdge++, ldst, gdata); // TODO // if ldst is an outgoing mirror, this is vertex cut } else { // add to host vector to send out later gdst_vec[hostBelongs].push_back(gdst); gdata_vec[hostBelongs].push_back(gdata); } } // make sure all edges accounted for if local if (base_DistGraph::isLocal(src)) { assert(curEdge == (*graph.edge_end(lsrc))); } // send for (uint32_t h = 0; h < numHosts; ++h) { if (h == id) continue; if (gdst_vec[h].size() > 0) { auto& b = (*sendBuffers.getLocal())[h]; galois::runtime::gSerialize(b, src); galois::runtime::gSerialize(b, gdst_vec[h]); galois::runtime::gSerialize(b, gdata_vec[h]); // send if over limit if (b.size() > edgePartitionSendBufSize) { messagesSent += 1; bytesSent.update(b.size()); maxBytesSent.update(b.size()); net.sendTagged(h, galois::runtime::evilPhase, b); b.getVec().clear(); b.getVec().reserve(edgePartitionSendBufSize * 1.25); } } } // overlap receives auto buffer = net.recieveTagged(galois::runtime::evilPhase, nullptr); this->processReceivedEdgeBuffer(buffer, graph, receivedNodes); }, #if MORE_DIST_STATS galois::loopname("EdgeLoadingLoop"), #endif galois::steal(), galois::no_stats()); syncEdgeLoad(); // printEdgeLoad(); } // flush buffers for (unsigned threadNum = 0; threadNum < sendBuffers.size(); ++threadNum) { auto& sbr = *sendBuffers.getRemote(threadNum); for (unsigned h = 0; h < this->base_DistGraph::numHosts; ++h) { if (h == this->base_DistGraph::id) continue; auto& sendBuffer = sbr[h]; if (sendBuffer.size() > 0) { messagesSent += 1; bytesSent.update(sendBuffer.size()); maxBytesSent.update(sendBuffer.size()); net.sendTagged(h, galois::runtime::evilPhase, sendBuffer); sendBuffer.getVec().clear(); } } } net.flush(); galois::runtime::reportStat_Tsum( GRNAME, std::string("EdgeLoadingMessagesSent"), messagesSent.reduce()); galois::runtime::reportStat_Tsum( GRNAME, std::string("EdgeLoadingBytesSent"), bytesSent.reduce()); galois::runtime::reportStat_Tmax( GRNAME, std::string("EdgeLoadingMaxBytesSent"), maxBytesSent.reduce()); } // no edge data version template ::value>::type* = nullptr> void sendEdges(GraphTy& graph, galois::graphs::BufferedGraph& bufGraph, std::atomic& receivedNodes) { using DstVecType = std::vector>; using SendBufferVecTy = std::vector; galois::substrate::PerThreadStorage gdst_vecs( base_DistGraph::numHosts); galois::substrate::PerThreadStorage sendBuffers( base_DistGraph::numHosts); auto& net = galois::runtime::getSystemNetworkInterface(); const unsigned& id = this->base_DistGraph::id; const unsigned& numHosts = this->base_DistGraph::numHosts; galois::GAccumulator messagesSent; galois::GAccumulator bytesSent; galois::GReduceMax maxBytesSent; messagesSent.reset(); bytesSent.reset(); maxBytesSent.reset(); for (unsigned syncRound = 0; syncRound < _edgeStateRounds; syncRound++) { uint64_t beginNode; uint64_t endNode; std::tie(beginNode, endNode) = galois::block_range( base_DistGraph::gid2host[base_DistGraph::id].first, base_DistGraph::gid2host[base_DistGraph::id].second, syncRound, _edgeStateRounds); // Go over assigned nodes and distribute edges. galois::do_all( galois::iterate(beginNode, endNode), [&](uint64_t src) { uint32_t lsrc = 0; uint64_t curEdge = 0; if (base_DistGraph::isLocal(src)) { lsrc = this->G2L(src); curEdge = *graph.edge_begin(lsrc, galois::MethodFlag::UNPROTECTED); } auto ee = bufGraph.edgeBegin(src); auto ee_end = bufGraph.edgeEnd(src); uint64_t numEdgesL = std::distance(ee, ee_end); auto& gdst_vec = *gdst_vecs.getLocal(); for (unsigned i = 0; i < numHosts; ++i) { gdst_vec[i].clear(); // gdst_vec[i].reserve(numEdgesL); } for (; ee != ee_end; ++ee) { uint32_t gdst = bufGraph.edgeDestination(*ee); uint32_t hostBelongs = graphPartitioner->getEdgeOwner(src, gdst, numEdgesL); if (_edgeStateRounds > 1) { hostLoads[hostBelongs] += 1; } if (hostBelongs == id) { // edge belongs here, construct on self assert(base_DistGraph::isLocal(src)); uint32_t ldst = this->G2L(gdst); graph.constructEdge(curEdge++, ldst); // TODO // if ldst is an outgoing mirror, this is vertex cut } else { // add to host vector to send out later gdst_vec[hostBelongs].push_back(gdst); } } // make sure all edges accounted for if local if (base_DistGraph::isLocal(src)) { assert(curEdge == (*graph.edge_end(lsrc))); } // send for (uint32_t h = 0; h < numHosts; ++h) { if (h == id) continue; if (gdst_vec[h].size() > 0) { auto& b = (*sendBuffers.getLocal())[h]; galois::runtime::gSerialize(b, src); galois::runtime::gSerialize(b, gdst_vec[h]); // send if over limit if (b.size() > edgePartitionSendBufSize) { messagesSent += 1; bytesSent.update(b.size()); maxBytesSent.update(b.size()); net.sendTagged(h, galois::runtime::evilPhase, b); b.getVec().clear(); b.getVec().reserve(edgePartitionSendBufSize * 1.25); } } } // overlap receives auto buffer = net.recieveTagged(galois::runtime::evilPhase, nullptr); this->processReceivedEdgeBuffer(buffer, graph, receivedNodes); }, #if MORE_DIST_STATS galois::loopname("EdgeLoading"), #endif galois::steal(), galois::no_stats()); syncEdgeLoad(); // printEdgeLoad(); } // flush buffers for (unsigned threadNum = 0; threadNum < sendBuffers.size(); ++threadNum) { auto& sbr = *sendBuffers.getRemote(threadNum); for (unsigned h = 0; h < this->base_DistGraph::numHosts; ++h) { if (h == this->base_DistGraph::id) continue; auto& sendBuffer = sbr[h]; if (sendBuffer.size() > 0) { messagesSent += 1; bytesSent.update(sendBuffer.size()); maxBytesSent.update(sendBuffer.size()); net.sendTagged(h, galois::runtime::evilPhase, sendBuffer); sendBuffer.getVec().clear(); } } } net.flush(); galois::runtime::reportStat_Tsum( GRNAME, std::string("EdgeLoadingMessagesSent"), messagesSent.reduce()); galois::runtime::reportStat_Tsum( GRNAME, std::string("EdgeLoadingBytesSent"), bytesSent.reduce()); galois::runtime::reportStat_Tmax( GRNAME, std::string("EdgeLoadingMaxBytesSent"), maxBytesSent.reduce()); } //! @copydoc DistGraphHybridCut::processReceivedEdgeBuffer template void processReceivedEdgeBuffer( std::optional>& buffer, GraphTy& graph, std::atomic& receivedNodes) { if (buffer) { auto& rb = buffer->second; while (rb.r_size() > 0) { uint64_t n; std::vector gdst_vec; galois::runtime::gDeserialize(rb, n); galois::runtime::gDeserialize(rb, gdst_vec); assert(base_DistGraph::isLocal(n)); uint32_t lsrc = this->G2L(n); uint64_t cur = *graph.edge_begin(lsrc, galois::MethodFlag::UNPROTECTED); uint64_t cur_end = *graph.edge_end(lsrc); assert((cur_end - cur) == gdst_vec.size()); deserializeEdges(graph, rb, gdst_vec, cur, cur_end); ++receivedNodes; } } } /** * Receive the edge dest/data assigned to this host from other hosts * that were responsible for reading them. */ template void receiveEdges(GraphTy& graph, std::atomic& receivedNodes) { auto& net = galois::runtime::getSystemNetworkInterface(); // receive edges for all mirror nodes while (receivedNodes < nodesToReceive) { decltype(net.recieveTagged(galois::runtime::evilPhase, nullptr)) p; p = net.recieveTagged(galois::runtime::evilPhase, nullptr); processReceivedEdgeBuffer(p, graph, receivedNodes); } } template ::value>::type* = nullptr> void deserializeEdges(GraphTy& graph, galois::runtime::RecvBuffer& b, std::vector& gdst_vec, uint64_t& cur, uint64_t& cur_end) { std::vector gdata_vec; galois::runtime::gDeserialize(b, gdata_vec); uint64_t i = 0; while (cur < cur_end) { auto gdata = gdata_vec[i]; uint64_t gdst = gdst_vec[i++]; uint32_t ldst = this->G2L(gdst); graph.constructEdge(cur++, ldst, gdata); // TODO if ldst is an outgoing mirror, this is vertex cut } } template ::value>::type* = nullptr> void deserializeEdges(GraphTy& graph, galois::runtime::RecvBuffer&, std::vector& gdst_vec, uint64_t& cur, uint64_t& cur_end) { uint64_t i = 0; while (cur < cur_end) { uint64_t gdst = gdst_vec[i++]; uint32_t ldst = this->G2L(gdst); graph.constructEdge(cur++, ldst); // TODO if ldst is an outgoing mirror, this is vertex cut } } }; // make GRNAME visible to public template constexpr const char* const galois::graphs::NewDistGraphGeneric::GRNAME; } // end namespace graphs } // end namespace galois #endif ================================================ FILE: libdist/CMakeLists.txt ================================================ add_library(galois_dist_async STATIC) add_library(Galois::dist_async ALIAS galois_dist_async) add_dependencies(lib galois_dist_async) set_target_properties(galois_dist_async PROPERTIES EXPORT_NAME dist_async) target_sources(galois_dist_async PRIVATE src/Barrier.cpp src/DistGalois.cpp src/DistStats.cpp src/Network.cpp src/NetworkBuffered.cpp src/NetworkIOMPI.cpp src/NetworkLCI.cpp ) target_include_directories(galois_dist_async PUBLIC $ $ ) target_link_libraries(galois_dist_async PUBLIC MPI::MPI_CXX) target_link_libraries(galois_dist_async PUBLIC galois_shmem) target_compile_definitions(galois_dist_async PRIVATE GALOIS_SUPPORT_ASYNC=1) if (GALOIS_USE_BARE_MPI) target_compile_definitions(galois_dist_async PRIVATE GALOIS_USE_BARE_MPI=1) endif() if (GALOIS_USE_LCI) add_definitions(-DGALOIS_USE_LCI) set(LCI_ROOT "${CMAKE_BINARY_DIR}/libdist/external/src/lci") set(LCI_INCLUDE "${LCI_ROOT}/include") set(LCI_LIBRARY "${LCI_ROOT}/liblci.a") include(ExternalProject) # do not clone submodules for external projects cmake_policy(SET CMP0097 NEW) ExternalProject_Add(lci PREFIX external BUILD_IN_SOURCE 1 CONFIGURE_COMMAND "" INSTALL_COMMAND "" LOG_OUTPUT_ON_FAILURE 1 GIT_REPOSITORY "https://github.com/uiuc-hpc/LC.git" GIT_SUBMODULES "" GIT_TAG "9bf912829339879e1132614c6d24cd032c32366b") add_dependencies(galois_dist_async lci) target_link_libraries(galois_dist_async PRIVATE ${LCI_LIBRARY} -lpsm2) target_include_directories(galois_dist_async PUBLIC $ $ ) endif(GALOIS_USE_LCI) install( DIRECTORY include/ DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" COMPONENT dev FILES_MATCHING PATTERN "*.h" ) install(TARGETS galois_dist_async EXPORT GaloisTargets LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT shlib ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT lib INCLUDES DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" ) ================================================ FILE: libdist/include/galois/DReducible.h ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ /** * @file DReducible.h * * Implements distributed reducible objects for easy reduction of values * across a distributed system. */ #ifndef GALOIS_DISTACCUMULATOR_H #define GALOIS_DISTACCUMULATOR_H #include #include "galois/Galois.h" #include "galois/Reduction.h" #include "galois/AtomicHelpers.h" #include "galois/runtime/LWCI.h" #include "galois/runtime/DistStats.h" namespace galois { /** * Distributed sum-reducer for getting the sum of some value across multiple * hosts. * * @tparam Ty type of value to max-reduce */ template class DGAccumulator { galois::runtime::NetworkInterface& net = galois::runtime::getSystemNetworkInterface(); galois::GAccumulator mdata; Ty local_mdata, global_mdata; #ifdef GALOIS_USE_LCI /** * Sum reduction using LWCI */ inline void reduce_lwci() { lc_alreduce(&local_mdata, &global_mdata, sizeof(Ty), &galois::runtime::internal::ompi_op_sum, lc_col_ep); } #else /** * Sum reduction using MPI */ inline void reduce_mpi() { if (typeid(Ty) == typeid(int32_t)) { MPI_Allreduce(&local_mdata, &global_mdata, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); } else if (typeid(Ty) == typeid(int64_t)) { MPI_Allreduce(&local_mdata, &global_mdata, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD); } else if (typeid(Ty) == typeid(uint32_t)) { MPI_Allreduce(&local_mdata, &global_mdata, 1, MPI_UNSIGNED, MPI_SUM, MPI_COMM_WORLD); } else if (typeid(Ty) == typeid(uint64_t)) { MPI_Allreduce(&local_mdata, &global_mdata, 1, MPI_UNSIGNED_LONG, MPI_SUM, MPI_COMM_WORLD); } else if (typeid(Ty) == typeid(float)) { MPI_Allreduce(&local_mdata, &global_mdata, 1, MPI_FLOAT, MPI_SUM, MPI_COMM_WORLD); } else if (typeid(Ty) == typeid(double)) { MPI_Allreduce(&local_mdata, &global_mdata, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); } else if (typeid(Ty) == typeid(long double)) { MPI_Allreduce(&local_mdata, &global_mdata, 1, MPI_LONG_DOUBLE, MPI_SUM, MPI_COMM_WORLD); } else { static_assert(true, "Type of DGAccumulator not supported for MPI reduction"); } } #endif public: //! Default constructor DGAccumulator() {} /** * Adds to accumulated value * * @param rhs Value to add * @returns reference to this object */ DGAccumulator& operator+=(const Ty& rhs) { mdata += rhs; return *this; } /** * Sets current value stored in accumulator. * * @param rhs Value to set */ void operator=(const Ty rhs) { mdata.reset(); mdata += rhs; } /** * Sets current value stored in accumulator. * * @param rhs Value to set */ void set(const Ty rhs) { mdata.reset(); mdata += rhs; } /** * Read local accumulated value. * * @returns locally accumulated value */ Ty read_local() { if (local_mdata == 0) local_mdata = mdata.reduce(); return local_mdata; } /** * Read the value returned by the last reduce call. * Should call reduce before calling this function if an up to date * value is required * * @returns the value of the last reduce call */ Ty read() { return global_mdata; } /** * Reset the entire accumulator. * * @returns the value of the last reduce call */ Ty reset() { Ty retval = global_mdata; mdata.reset(); local_mdata = global_mdata = 0; return retval; } /** * Reduce data across all hosts, saves the value, and returns the * reduced value * * @param runID optional argument used to create a statistics timer * for later reporting * * @returns The reduced value */ Ty reduce(std::string runID = std::string()) { std::string timer_str("ReduceDGAccum_" + runID); galois::CondStatTimer reduceTimer(timer_str.c_str(), "DGReducible"); reduceTimer.start(); if (local_mdata == 0) local_mdata = mdata.reduce(); #ifdef GALOIS_USE_LCI reduce_lwci(); #else reduce_mpi(); #endif reduceTimer.stop(); return global_mdata; } }; //////////////////////////////////////////////////////////////////////////////// /** * Distributed max-reducer for getting the max of some value across multiple * hosts. * * @tparam Ty type of value to max-reduce */ template class DGReduceMax { galois::runtime::NetworkInterface& net = galois::runtime::getSystemNetworkInterface(); galois::GReduceMax mdata; // local max reducer Ty local_mdata, global_mdata; #ifdef GALOIS_USE_LCI /** * Use LWCI to reduce max across hosts */ inline void reduce_lwci() { lc_alreduce(&local_mdata, &global_mdata, sizeof(Ty), &galois::runtime::internal::ompi_op_max, lc_col_ep); } #else /** * Use MPI to reduce max across hosts */ inline void reduce_mpi() { if (typeid(Ty) == typeid(int32_t)) { MPI_Allreduce(&local_mdata, &global_mdata, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); } else if (typeid(Ty) == typeid(int64_t)) { MPI_Allreduce(&local_mdata, &global_mdata, 1, MPI_LONG, MPI_MAX, MPI_COMM_WORLD); } else if (typeid(Ty) == typeid(uint32_t)) { MPI_Allreduce(&local_mdata, &global_mdata, 1, MPI_UNSIGNED, MPI_MAX, MPI_COMM_WORLD); } else if (typeid(Ty) == typeid(uint64_t)) { MPI_Allreduce(&local_mdata, &global_mdata, 1, MPI_UNSIGNED_LONG, MPI_MAX, MPI_COMM_WORLD); } else if (typeid(Ty) == typeid(float)) { MPI_Allreduce(&local_mdata, &global_mdata, 1, MPI_FLOAT, MPI_MAX, MPI_COMM_WORLD); } else if (typeid(Ty) == typeid(double)) { MPI_Allreduce(&local_mdata, &global_mdata, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); } else if (typeid(Ty) == typeid(long double)) { MPI_Allreduce(&local_mdata, &global_mdata, 1, MPI_LONG_DOUBLE, MPI_MAX, MPI_COMM_WORLD); } else { static_assert(true, "Type of DGReduceMax not supported for MPI " "reduction"); } } #endif public: /** * Default constructor; initializes everything to 0. */ DGReduceMax() { local_mdata = 0; global_mdata = 0; } /** * Update the local max-reduced value. * * @param rhs Value to max-reduce locally with */ void update(const Ty rhs) { mdata.update(rhs); } /** * Read the local reduced max value; if it has never been reduced, it will * attempt get the global value through a reduce (i.e. all other hosts * should call reduce as well). * * @returns the local value stored in the accumulator or a global value if * reduce has never been called */ Ty read_local() { if (local_mdata == 0) local_mdata = mdata.reduce(); return local_mdata; } /** * Read the global reduced max value. For accurate results, you should * call reduce before calling this. * * @returns the global value stored in the accumulator */ Ty read() { return global_mdata; } /** * Reset this accumulator. * * @returns the previous global value stored in this accumulator (note if * never reduced, it will be 0 */ Ty reset() { Ty retval = global_mdata; mdata.reset(); local_mdata = global_mdata = 0; return retval; } /** * Do a max reduction across all hosts by sending data to all other hosts * and reducing received data. * * @returns the max-reduced value after reducing from all hosts. */ Ty reduce(std::string runID = std::string()) { std::string timer_str("ReduceDGReduceMax_" + runID); galois::CondStatTimer reduceTimer(timer_str.c_str(), "DGReduceMax"); reduceTimer.start(); if (local_mdata == 0) local_mdata = mdata.reduce(); #ifdef GALOIS_USE_LCI reduce_lwci(); #else reduce_mpi(); #endif reduceTimer.stop(); return global_mdata; } }; //////////////////////////////////////////////////////////////////////////////// /** * Distributed min-reducer for getting the min of some value across multiple * hosts. * * @tparam Ty type of value to min-reduce */ template class DGReduceMin { galois::runtime::NetworkInterface& net = galois::runtime::getSystemNetworkInterface(); galois::GReduceMin mdata; // local min reducer Ty local_mdata, global_mdata; #ifdef GALOIS_USE_LCI /** * Use LWCI to reduce min across hosts */ inline void reduce_lwci() { lc_alreduce(&local_mdata, &global_mdata, sizeof(Ty), &galois::runtime::internal::ompi_op_min, lc_col_ep); } #else /** * Use MPI to reduce min across hosts */ inline void reduce_mpi() { if (typeid(Ty) == typeid(int32_t)) { MPI_Allreduce(&local_mdata, &global_mdata, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD); } else if (typeid(Ty) == typeid(int64_t)) { MPI_Allreduce(&local_mdata, &global_mdata, 1, MPI_LONG, MPI_MIN, MPI_COMM_WORLD); } else if (typeid(Ty) == typeid(uint32_t)) { MPI_Allreduce(&local_mdata, &global_mdata, 1, MPI_UNSIGNED, MPI_MIN, MPI_COMM_WORLD); } else if (typeid(Ty) == typeid(uint64_t)) { MPI_Allreduce(&local_mdata, &global_mdata, 1, MPI_UNSIGNED_LONG, MPI_MIN, MPI_COMM_WORLD); } else if (typeid(Ty) == typeid(float)) { MPI_Allreduce(&local_mdata, &global_mdata, 1, MPI_FLOAT, MPI_MIN, MPI_COMM_WORLD); } else if (typeid(Ty) == typeid(double)) { MPI_Allreduce(&local_mdata, &global_mdata, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); } else if (typeid(Ty) == typeid(long double)) { MPI_Allreduce(&local_mdata, &global_mdata, 1, MPI_LONG_DOUBLE, MPI_MIN, MPI_COMM_WORLD); } else { static_assert(true, "Type of DGReduceMin not supported for MPI " "reduction"); } } #endif public: /** * Default constructor; initializes everything to the max value of the type. */ DGReduceMin() { local_mdata = std::numeric_limits::max(); global_mdata = std::numeric_limits::max(); ; } /** * Update the local min-reduced value. * * @param rhs Value to min-reduce locally with */ void update(const Ty rhs) { mdata.update(rhs); } /** * Read the local reduced min value; if it has never been reduced, it will * attempt get the global value through a reduce (i.e. all other hosts * should call reduce as well). * * @returns the local value stored in the accumulator or a global value if * reduce has never been called */ Ty read_local() { if (local_mdata == std::numeric_limits::max()) local_mdata = mdata.reduce(); return local_mdata; } /** * Read the global reduced min value. For accurate results, you should * call reduce before calling this. * * @returns the global value stored in the accumulator */ Ty read() { return global_mdata; } /** * Reset this accumulator. * * @returns the previous global value stored in this accumulator (note if * never reduced, it will be 0 */ Ty reset() { Ty retval = global_mdata; mdata.reset(); local_mdata = global_mdata = std::numeric_limits::max(); return retval; } /** * Do a min reduction across all hosts by sending data to all other hosts * and reducing received data. * * @returns the min-reduced value after reducing from all hosts. */ Ty reduce(std::string runID = std::string()) { std::string timer_str("ReduceDGReduceMin_" + runID); galois::CondStatTimer reduceTimer(timer_str.c_str(), "DGReduceMin"); reduceTimer.start(); if (local_mdata == std::numeric_limits::max()) local_mdata = mdata.reduce(); #ifdef GALOIS_USE_LCI reduce_lwci(); #else reduce_mpi(); #endif reduceTimer.stop(); return global_mdata; } }; } // namespace galois #endif ================================================ FILE: libdist/include/galois/DTerminationDetector.h ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ /** * @file DReducible.h * * Implements distributed reducible objects for easy reduction of values * across a distributed system. */ #ifndef GALOIS_DISTTERMINATOR_H #define GALOIS_DISTTERMINATOR_H #include #include "galois/Galois.h" #include "galois/Reduction.h" #include "galois/AtomicHelpers.h" #include "galois/runtime/LWCI.h" #include "galois/runtime/DistStats.h" namespace galois { /** * Distributed sum-reducer for getting the sum of some value across multiple * hosts. * * @tparam Ty type of value to max-reduce */ template class DGTerminator { galois::runtime::NetworkInterface& net = galois::runtime::getSystemNetworkInterface(); galois::GAccumulator mdata; Ty local_mdata, global_mdata; uint64_t prev_snapshot; uint64_t snapshot; uint64_t global_snapshot; bool work_done; #ifndef GALOIS_USE_LCI MPI_Request snapshot_request; #else lc_colreq snapshot_request; #endif public: //! Default constructor DGTerminator() { reinitialize(); initiate_snapshot(); reset(); } void reinitialize() { prev_snapshot = 0; snapshot = 1; global_snapshot = 1; work_done = false; } /** * Adds to accumulated value * * @param rhs Value to add * @returns reference to this object */ DGTerminator& operator+=(const Ty& rhs) { mdata += rhs; return *this; } /** * Sets current value stored in accumulator. * * @param rhs Value to set */ void operator=(const Ty rhs) { mdata.reset(); mdata += rhs; } /** * Sets current value stored in accumulator. * * @param rhs Value to set */ void set(const Ty rhs) { mdata.reset(); mdata += rhs; } /** * Read local accumulated value. * * @returns locally accumulated value */ Ty read_local() { if (local_mdata == 0) local_mdata = mdata.reduce(); return local_mdata; } /** * Read the value returned by the last reduce call. * Should call reduce before calling this function if an up to date * value is required * * @returns the value of the last reduce call */ Ty read() { return global_mdata; } /** * Reset the entire accumulator. * * @returns the value of the last reduce call */ Ty reset() { Ty retval = global_mdata; mdata.reset(); local_mdata = global_mdata = 0; return retval; } void initiate_snapshot() { #ifdef GALOIS_USE_LCI lc_ialreduce(&snapshot, &global_snapshot, sizeof(Ty), &galois::runtime::internal::ompi_op_max, lc_col_ep, &snapshot_request); #else MPI_Iallreduce(&snapshot, &global_snapshot, 1, MPI_UNSIGNED_LONG, MPI_MAX, MPI_COMM_WORLD, &snapshot_request); #endif } bool terminate() { bool active = (local_mdata != 0); if (!active) { active = net.anyPendingSends(); } int snapshot_ended = 0; if (!active) { #ifndef GALOIS_USE_LCI MPI_Test(&snapshot_request, &snapshot_ended, MPI_STATUS_IGNORE); #else lc_col_progress(&snapshot_request); snapshot_ended = snapshot_request.flag; #endif } if (!active) { // check pending receives after checking snapshot active = net.anyPendingReceives(); if (active) galois::gDebug("[", net.ID, "] pending receive"); } if (active) { work_done = true; } else { if (snapshot_ended != 0) { snapshot = global_snapshot; if (work_done) { work_done = false; prev_snapshot = snapshot; ++snapshot; galois::gDebug("[", net.ID, "] work done, taking snapshot ", snapshot); initiate_snapshot(); } else if (prev_snapshot != snapshot) { prev_snapshot = snapshot; galois::gDebug("[", net.ID, "] no work done, taking snapshot ", snapshot); initiate_snapshot(); } else { galois::gDebug("[", net.ID, "] terminating ", snapshot); // an explicit barrier may be required here // so that the next async phase begins on all hosts at the same time // however, this may add overheads when it is not required // (depending on when the next async phase actually begins), so // ASSUME: caller will call getHostBarrier().wait() if required reinitialize(); // for next async phase return true; } } } return false; } /** * Reduce data across all hosts, saves the value, and returns the * reduced value * * @param runID optional argument used to create a statistics timer * for later reporting * * @returns The reduced value */ Ty reduce(std::string runID = std::string()) { std::string timer_str("ReduceDGAccum_" + runID); galois::CondStatTimer reduceTimer(timer_str.c_str(), "DGReducible"); reduceTimer.start(); if (local_mdata == 0) local_mdata = mdata.reduce(); bool halt = terminate(); global_mdata = !halt; if (halt) { galois::runtime::evilPhase += 2; // one for reduce and one for broadcast if (galois::runtime::evilPhase >= static_cast( std::numeric_limits::max())) { // limit defined by MPI or // LCI galois::runtime::evilPhase = 1; } } reduceTimer.stop(); return global_mdata; } }; } // namespace galois #endif ================================================ FILE: libdist/include/galois/DistGalois.h ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ /** * @file DistGalois.h * * Contains the declaration of DistMemSys, a way to explicitly initiate the * Galois runtime. */ #ifndef GALOIS_DIST_GALOIS_H #define GALOIS_DIST_GALOIS_H #include "galois/runtime/SharedMem.h" #include "galois/runtime/DistStats.h" #include #include #include namespace galois { /** * Explicit class to initialize the Galois Runtime. * The runtime is destroyed when this object is destroyed */ class DistMemSys : public runtime::SharedMem { public: explicit DistMemSys(); ~DistMemSys(); DistMemSys(const DistMemSys&) = delete; DistMemSys& operator=(const DistMemSys&) = delete; DistMemSys(DistMemSys&&) = delete; DistMemSys& operator=(DistMemSys&&) = delete; }; } // namespace galois #endif ================================================ FILE: libdist/include/galois/runtime/BareMPI.h ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ /* */ /** * @file BareMPI.h * * Contains the BareMPI enum and the command line option that controls bare * MPI usage. */ #pragma once #ifdef GALOIS_USE_BARE_MPI #include "mpi.h" //! Defines types of bare MPI to use enum BareMPI { noBareMPI, //!< do not use bare MPI; use our network layer nonBlockingBareMPI, //!< non blocking bare MPI oneSidedBareMPI //!< one sided bare MPI }; #endif ================================================ FILE: libdist/include/galois/runtime/DistStats.h ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ /** * @file DistStats.h * * Contains declaration of DistStatManager, which reports runtime statistics of * a distributed application in Galois. */ #ifndef GALOIS_RUNTIME_DIST_STATS_H #define GALOIS_RUNTIME_DIST_STATS_H //! Turn on if you want more distributed stats to be printed #ifndef MORE_DIST_STATS #define MORE_DIST_STATS 0 #endif //! Turn on if you want more communication statistics to be printed #ifndef GALOIS_COMM_STATS #define GALOIS_COMM_STATS 0 #endif //! Turn on if you want per-bulk-synchronous parallel timers to be printed //! (otherwise all rounds are under 1 timer) #ifndef GALOIS_PER_ROUND_STATS #define GALOIS_PER_ROUND_STATS 0 #endif #include "galois/runtime/Statistics.h" #include "galois/runtime/Network.h" #include namespace galois { namespace runtime { /** * Helper class for the DistStatManager that aids in receiving statistics */ class StatRecvHelper; /** * Class responsible for tracking all statistics of a running distributed * Galois program and reporting them at the end of program execution. */ class DistStatManager : public galois::runtime::StatManager { //! Friend class that helps with receiving stats friend class galois::runtime::StatRecvHelper; using Base = galois::runtime::StatManager; using Str = galois::gstl::Str; using Base::SEP; static constexpr const char* const HSTAT_SEP = Base::TSTAT_SEP; static constexpr const char* const HSTAT_NAME = "HostValues"; static constexpr const char* const HSTAT_ENV_VAR = "PRINT_PER_HOST_STATS"; static bool printingHostVals(void); template struct HostTotalTypesImpl { struct DummyStat { StatTotal::Type m_totalTy; explicit DummyStat(StatTotal::Type total) : m_totalTy(total) {} template void add(const _U&) const {} const StatTotal::Type& totalTy(void) const { return m_totalTy; } }; using TMap = internal::BasicStatMap; bool merged = false; substrate::PerThreadStorage perThrdMap; void addToStat(const Str& region, const Str& category, const StatTotal::Type& hTotalTy) { perThrdMap.getLocal()->addToStat(region, category, 0, hTotalTy); } void mergeStats(void) { if (merged) { return; } GALOIS_ASSERT(perThrdMap.getLocal() == perThrdMap.getRemote(0), "Must call from Thread 0"); auto* t0Map = perThrdMap.getRemote(0); for (unsigned t = 1; t < perThrdMap.size(); ++t) { const auto* manager = perThrdMap.getRemote(t); for (auto i = manager->cbegin(), end_i = manager->cend(); i != end_i; ++i) { t0Map->addToStat(manager->region(i), manager->category(i), 0, manager->stat(i).totalTy()); } } merged = true; } const TMap& mergedMap(void) const { assert(merged && "Must merge first"); return *perThrdMap.getRemote(0); } }; using HostTotalTypes = HostTotalTypesImpl<>; template using ThrdVals = galois::gstl::Vector; template using HostStatVal = std::tuple&>; template struct HostStat : public internal::VecStat { using Base = internal::VecStat; using ThrdStats = internal::VecStat; using PerHostThrdStats = galois::gstl::Map; PerHostThrdStats perHostThrdStats; explicit HostStat(const StatTotal::Type& hTotalTy) : Base(hTotalTy) {} void add(const HostStatVal& val) { const auto& hostID = std::get<0>(val); const auto& thrdTotal = std::get<1>(val); const auto& thrdTotalTy = std::get<2>(val); const auto& thrdVals = std::get<3>(val); Base::add(thrdTotal); auto p = perHostThrdStats.emplace(hostID, ThrdStats(thrdTotalTy)); auto& tstat = p.first->second; for (const auto& i : thrdVals) { tstat.add(i); } } void printHostVals(std::ostream& out, const Str& region, const Str& category) const { out << StatManager::statKind() << SEP << galois::runtime::getHostID() << SEP; out << region << SEP << category << SEP; out << HSTAT_NAME << SEP; const char* sep = ""; for (const auto& v : Base::values()) { out << sep << v; sep = HSTAT_SEP; } out << std::endl; } void printThreadVals(std::ostream& out, const Str& region, const Str& category) const { for (const auto& p : perHostThrdStats) { out << StatManager::statKind() << SEP << p.first << SEP; out << region << SEP << category << SEP; out << StatTotal::str(p.second.totalTy()) << SEP << p.second.total(); out << std::endl; out << StatManager::statKind() << SEP << p.first << SEP; out << region << SEP << category << SEP; out << StatManager::TSTAT_NAME << SEP; const char* sep = ""; for (const auto& v : p.second.values()) { out << sep << v; sep = StatManager::TSTAT_SEP; } out << std::endl; } } }; template struct DistStatCombiner : public internal::BasicStatMap> { using Base = internal::BasicStatMap>; #if __GNUC__ < 5 static const char* htotalName(const StatTotal::Type& type){ #else static constexpr const char* htotalName(const StatTotal::Type& type) { #endif switch (type) { case StatTotal::SINGLE : return "HOST_0"; case StatTotal::TSUM: return "HSUM"; case StatTotal::TAVG: return "HAVG"; case StatTotal::TMIN: return "HMIN"; case StatTotal::TMAX: return "HMAX"; default: std::abort(); return nullptr; } } void print(std::ostream& out) const { for (auto i = Base::cbegin(), end_i = Base::cend(); i != end_i; ++i) { out << StatManager::statKind() << SEP << galois::runtime::getHostID() << SEP; out << Base::region(i) << SEP << Base::category(i) << SEP; const HostStat& hs = Base::stat(i); out << htotalName(hs.totalTy()) << SEP << hs.total(); out << std::endl; if (DistStatManager::printingHostVals()) { hs.printHostVals(out, Base::region(i), Base::category(i)); } if (StatManager::printingThreadVals()) { hs.printThreadVals(out, Base::region(i), Base::category(i)); } } } }; // namespace runtime DistStatCombiner intDistStats; DistStatCombiner fpDistStats; DistStatCombiner strDistStats; HostTotalTypes hostTotalTypes; protected: /** * Merge all stats from each individual thread as well as each individual * host as prescribed the the reduction (Total) type specified for each * statistic. */ void mergeStats(void); /** * Print the header of the stats file output. * * @param out File to print header out to */ void printHeader(std::ostream& out) const; /** * Merge all stats. Host 0 will then print out all collected stats. */ virtual void printStats(std::ostream& out); public: //! Dist stat manager constructor DistStatManager(const std::string& outfile = ""); ~DistStatManager(); /** * Adds a statistic to the statistics manager. * * @param region Region name to give statistic * @param category Category of statistic * @param val Value of the statistic * @param thrdTotalTy The type of reduction used to combine thread statistics * of the same kind * @param hTotalTy The type of reduction used to combine host statistics * of the same kind */ template void addToStat(const Str& region, const Str& category, const T& val, const StatTotal::Type& thrdTotalTy, const StatTotal::Type& hTotalTy) { Base::addToStat(region, category, val, thrdTotalTy); hostTotalTypes.addToStat(region, category, hTotalTy); } private: void combineAtHost_0_helper(void); void combineAtHost_0_helper2(void); void receiveAtHost_0_helper(void); void receiveAtHost_0_helper2(void); void combineAtHost_0(void); StatTotal::Type findHostTotalTy(const Str& region, const Str& category, const StatTotal::Type& thrdTotalTy) const; void addRecvdHostTotalTy(const Str& region, const Str& category, const StatTotal::Type& totalTy); void addRecvdStat(unsigned hostID, const Str& region, const Str& category, int64_t thrdTotal, const StatTotal::Type& thrdTotalTy, const ThrdVals& thrdVals); void addRecvdStat(unsigned hostID, const Str& region, const Str& category, double thrdTotal, const StatTotal::Type& thrdTotalTy, const ThrdVals& thrdVals); void addRecvdParam(unsigned hostID, const Str& region, const Str& category, const Str& thrdTotal, const StatTotal::Type& thrdTotalTy, const ThrdVals& thrdVals); }; // namespace galois namespace internal { /** * Gets a pointer to the distributed stat manager. * * @returns Pointer to distributed statistics manager */ DistStatManager* distSysStatManager(void); } // namespace internal /** * Adds a statistic to the statistics manager. Calls addToStat in * DistStatManager. * * @param region Region name to give statistic * @param category Category of statistic * @param value Value of the statistic * @param thrdTotalTy The type of reduction used to combine thread statistics * of the same kind * @param hTotalTy The type of reduction used to combine host statistics * of the same kind */ template inline void reportDistStat(const S1& region, const S2& category, const T& value, const StatTotal::Type& thrdTotalTy, const StatTotal::Type& hTotalTy) { internal::distSysStatManager()->addToStat(gstl::makeStr(region), gstl::makeStr(category), value, thrdTotalTy, hTotalTy); } } // end namespace runtime } // end namespace galois #endif // GALOIS_RUNTIME_DIST_STATS_H ================================================ FILE: libdist/include/galois/runtime/LWCI.h ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ /** * @file LWCI.h * * LWCI header that includes lc.h (LCI library) and internal helper functions * on arrays. */ #pragma once #ifdef GALOIS_USE_LCI GALOIS_IGNORE_UNUSED_PARAMETERS #include "lc.h" GALOIS_END_IGNORE_UNUSED_PARAMETERS extern lc_ep lc_col_ep; extern lc_ep lc_p2p_ep[3]; namespace galois { namespace runtime { namespace internal { /** * Element-wise sum of 2 arrays. * * @tparam Ty type of elements contained in the arrays * * @param dst destination array to write to * @param src source array to read from * @param count Size of array in bytes */ template void ompi_op_sum(void* dst, void* src, size_t count) { Ty* dst_ty = (Ty*)dst; Ty* src_ty = (Ty*)src; for (size_t i = 0; i < (count / sizeof(Ty)); ++i) { dst_ty[i] += src_ty[i]; } } /** * Element-wise max of 2 arrays. * * @tparam Ty type of elements contained in the arrays * * @param dst destination array to write to * @param src source array to read from * @param count Size of array in bytes */ template void ompi_op_max(void* dst, void* src, size_t count) { Ty* dst_ty = (Ty*)dst; Ty* src_ty = (Ty*)src; for (size_t i = 0; i < (count / sizeof(Ty)); ++i) { if (dst_ty[i] < src_ty[i]) { dst_ty[i] = src_ty[i]; } } } /** * Element-wise min of 2 arrays. * * @tparam Ty type of elements contained in the arrays * * @param dst destination array to write to * @param src source array to read from * @param count Size of array in bytes */ template void ompi_op_min(void* dst, void* src, size_t count) { Ty* dst_ty = (Ty*)dst; Ty* src_ty = (Ty*)src; for (size_t i = 0; i < (count / sizeof(Ty)); ++i) { if (dst_ty[i] > src_ty[i]) { dst_ty[i] = src_ty[i]; } } } } // namespace internal } // namespace runtime } // namespace galois #endif ================================================ FILE: libdist/include/galois/runtime/MemUsage.h ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ /* */ /** * @file MemUsage.h * * Contains MemUsageTracker, a class that tracks memory usage throughout * runtime of a program of send/receive buffers. */ #pragma once #include namespace galois { namespace runtime { /** * Class that tracks memory usage (mainly of send and receive buffers). */ class MemUsageTracker { std::atomic currentMemUsage; //!< mem usage of send and receive buffers int64_t maxMemUsage; //!< max mem usage of send and receive buffers public: //! Default constructor initializes everything to 0. MemUsageTracker() : currentMemUsage(0), maxMemUsage(0) {} /** * Increment memory usage. * * @param size amount to increment mem usage by */ inline void incrementMemUsage(uint64_t size) { currentMemUsage += size; if (currentMemUsage > maxMemUsage) maxMemUsage = currentMemUsage; } /** * Decrement memory usage. * * @param size amount to decrement mem usage by */ inline void decrementMemUsage(uint64_t size) { currentMemUsage -= size; } /** * Reset mem usage and max mem usage to 0. */ inline void resetMemUsage() { currentMemUsage = 0; maxMemUsage = 0; } /** * Get max mem usage. * * @returns maximum memory usage tracked so far */ inline int64_t getMaxMemUsage() const { return maxMemUsage; } }; } // namespace runtime } // namespace galois ================================================ FILE: libdist/include/galois/runtime/Network.h ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ /** * @file Network.h * * Contains the network interface class which is the base class for all * network layer implementations. */ #ifndef GALOIS_RUNTIME_NETWORK_H #define GALOIS_RUNTIME_NETWORK_H #include "galois/runtime/Serialize.h" #include "galois/runtime/MemUsage.h" #include "galois/substrate/Barrier.h" #include #include #include #include namespace galois::runtime { //! typedef for buffer that stores data to be sent out using SendBuffer = SerializeBuffer; //! typedef for buffer that received data is saved into using RecvBuffer = DeSerializeBuffer; /** * A class that defines functions that a network interface in Galois should * have. How the sends/recvs/stat-collecting happens as well * as the network layer itself is up to the implemention of the class. */ class NetworkInterface { protected: //! Initialize the MPI system. Should only be called once per process. void initializeMPI(); //! Finalize the MPI system. Should only be called once per process. void finalizeMPI(); //! Memory usage tracker MemUsageTracker memUsageTracker; //! Number of inflight sends and receives std::atomic inflightSends; std::atomic inflightRecvs; #ifdef GALOIS_USE_BARE_MPI public: //! Wrapper that calls into increment mem usage on the memory usage tracker inline void incrementMemUsage(uint64_t size) { memUsageTracker.incrementMemUsage(size); } //! Wrapper that calls into decrement mem usage on the memory usage tracker inline void decrementMemUsage(uint64_t size) { memUsageTracker.decrementMemUsage(size); } #endif public: //! This machine's host ID static uint32_t ID; //! The total number of machines in the current program static uint32_t Num; /** * Constructor for interface. */ NetworkInterface(); /** * Destructor destroys MPI (if it exists). */ virtual ~NetworkInterface(); //! Send a message to a given (dest) host. A message is simply a //! landing pad (recv, funciton pointer) and some data (buf) //! on the receiver, recv(buf) will be called durring handleReceives() //! buf is invalidated by this operation void sendMsg(uint32_t dest, void (*recv)(uint32_t, RecvBuffer&), SendBuffer& buf); //! Send a message letting the network handle the serialization and //! deserialization slightly slower template void sendSimple(uint32_t dest, void (*recv)(uint32_t, Args...), Args... param); //! Send a message to a given (dest) host. A message is simply a //! tag (tag) and some data (buf) //! on the receiver, buf will be returned on a receiveTagged(tag) //! buf is invalidated by this operation virtual void sendTagged(uint32_t dest, uint32_t tag, SendBuffer& buf, int type = 0) = 0; //! Send a message to all hosts. A message is simply a //! landing pad (recv) and some data (buf) //! buf is invalidated by this operation void broadcast(void (*recv)(uint32_t, RecvBuffer&), SendBuffer& buf, bool self = false); //! Broadcast a message allowing the network to handle serialization and //! deserialization template void broadcastSimple(void (*recv)(uint32_t, Args...), Args... param); //! Receive and dispatch messages void handleReceives(); //! Wrapper to reset the mem usage tracker's stats inline void resetMemUsage() { memUsageTracker.resetMemUsage(); } //! Reports the memory usage tracker's statistics to the stat manager void reportMemUsage() const; //! Receive a tagged message virtual std::optional> recieveTagged(uint32_t tag, std::unique_lock* rlg, int type = 0) = 0; //! move send buffers out to network virtual void flush() = 0; //! @returns true if any send is in progress or is pending to be enqueued virtual bool anyPendingSends() = 0; //! @returns true if any receive is in progress or is pending to be dequeued virtual bool anyPendingReceives() = 0; //! Get how many bytes were sent //! @returns num bytes sent virtual unsigned long reportSendBytes() const = 0; //! Get how many messages were sent //! @returns num messages sent virtual unsigned long reportSendMsgs() const = 0; //! Get how many bytes were received //! @returns num bytes received virtual unsigned long reportRecvBytes() const = 0; //! Get how many messages were received //! @returns num messages received virtual unsigned long reportRecvMsgs() const = 0; //! Get any other extra statistics that might need to be reported; varies //! depending on implementation //! @returns vector of extra things to be reported virtual std::vector reportExtra() const = 0; //! Get the names of the extra things that are returned by reportExtra //! @returns vector of the names of the reported extra things virtual std::vector> reportExtraNamed() const = 0; }; //! Variable that keeps track of which network send/recv phase a program is //! currently on. Can be seen as a count of send/recv rounds that have occured. extern uint32_t evilPhase; //! Get the network interface //! @returns network interface NetworkInterface& getSystemNetworkInterface(); namespace internal { //! Deletes the system network interface (if it exists). void destroySystemNetworkInterface(); } // namespace internal //! Gets this host's ID //! @returns ID of this host uint32_t getHostID(); //! Returns a BufferedNetwork interface NetworkInterface& makeNetworkBuffered(); //! Returns a LCINetwork interface NetworkInterface& makeNetworkLCI(); //! Returns a host barrier, which is a regular MPI-Like Barrier for all hosts. //! @warning Should not be called within a parallel region; assumes only one //! thread is calling it substrate::Barrier& getHostBarrier(); //! Returns a fence that ensures all pending messages are delivered, acting //! like a memory-barrier substrate::Barrier& getHostFence(); //////////////////////////////////////////////////////////////////////////////// // Implementations //////////////////////////////////////////////////////////////////////////////// namespace { // anon template static void genericLandingPad(uint32_t src, RecvBuffer& buf) { void (*fp)(uint32_t, Args...); std::tuple args; gDeserialize(buf, fp, args); std::apply([fp, src](Args... params) { fp(src, params...); }, args); } } // namespace template void NetworkInterface::sendSimple(uint32_t dest, void (*recv)(uint32_t, Args...), Args... param) { SendBuffer buf; gSerialize(buf, (uintptr_t)recv, param..., (uintptr_t)genericLandingPad); sendTagged(dest, 0, buf); } template void NetworkInterface::broadcastSimple(void (*recv)(uint32_t, Args...), Args... param) { SendBuffer buf; gSerialize(buf, (uintptr_t)recv, param...); broadcast(genericLandingPad, buf, false); } } // namespace galois::runtime #endif ================================================ FILE: libdist/include/galois/runtime/NetworkIO.h ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ /** * @file NetworkIO.h * * Contains NetworkIO, a base class that is inherited by classes that want to * implement the communication layer of Galois. (e.g. NetworkIOMPI and * NetworkIOLWCI) */ #ifndef GALOIS_RUNTIME_NETWORKTHREAD_H #define GALOIS_RUNTIME_NETWORKTHREAD_H #include #include #include #include #include #include #include #include #include #include #include #include "galois/runtime/MemUsage.h" #include "galois/PODResizeableArray.h" namespace galois { namespace runtime { /** * Class for the network IO layer which is responsible for doing sends/receives * of data. Used by the network interface to do the actual communication. */ class NetworkIO { protected: /** * Wrapper for dealing with MPI error codes. Program dies if the error code * isn't MPI_SUCCESS. * * @param rc Error code to check for success */ static void handleError(int rc) { if (rc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD, rc); } } //! memory usage tracker MemUsageTracker& memUsageTracker; //! Number of inflight sends and receives std::atomic& inflightSends; std::atomic& inflightRecvs; // using vTy = std::vector; using vTy = galois::PODResizeableArray; public: /** * Message structure for sending data across the network. */ struct message { uint32_t host; //!< destination of this message uint32_t tag; //!< tag on message indicating distinct communication phases vTy data; //!< data portion of message //! Default constructor initializes host and tag to large numbers. message() : host(~0), tag(~0) {} //! @param h Host to send message to //! @param t Tag to associate with message //! @param d Data to save in message message(uint32_t h, uint32_t t, vTy&& d) : host(h), tag(t), data(std::move(d)) {} //! A message is valid if there is data to be sent //! @returns true if data is non-empty bool valid() const { return !data.empty(); } }; //! The default constructor takes a memory usage tracker and saves it //! @param tracker reference to a memory usage tracker used by the system //! @param sends //! @param recvs NetworkIO(MemUsageTracker& tracker, std::atomic& sends, std::atomic& recvs) : memUsageTracker(tracker), inflightSends(sends), inflightRecvs(recvs) {} //! Default destructor does nothing. virtual ~NetworkIO(); //! Queues a message for sending out. Takes ownership of data buffer. virtual void enqueue(message m) = 0; //! Checks to see if a message is here for this host to receive. If so, take //! and return it //! @returns an empty message if no message virtual message dequeue() = 0; //! Make progress. Other functions don't have to make progress. virtual void progress() = 0; }; /** * Creates/returns a network IO layer that uses MPI to do communication. * * @returns tuple with pointer to the MPI IO layer, this host's ID, and the * total number of hosts in the system */ std::tuple, uint32_t, uint32_t> makeNetworkIOMPI(galois::runtime::MemUsageTracker& tracker, std::atomic& sends, std::atomic& recvs); // #ifdef GALOIS_USE_LCI // /** // * Creates/returns a network IO layer that uses LWCI to do communication. // * // * @returns tuple with pointer to the LWCI IO layer, this host's ID, and the // * total number of hosts in the system // */ // std::tuple, uint32_t, uint32_t> // makeNetworkIOLWCI(galois::runtime::MemUsageTracker& tracker, // std::atomic& sends, std::atomic& recvs); // #endif } // namespace runtime } // namespace galois #endif ================================================ FILE: libdist/include/galois/runtime/Serialize.h ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ /** * @file Serialize.h * * Contains functions that serialize/deserialize data, mainly for sending * out serialized data over the network and deserializing it on the other end. */ #ifndef GALOIS_RUNTIME_SERIALIZE_H #define GALOIS_RUNTIME_SERIALIZE_H #include #include #include #include #include #include #include #include #include "galois/runtime/ExtraTraits.h" #include #include #include #include #include "galois/CopyableTuple.h" #include "galois/Bag.h" namespace galois { namespace runtime { class DeSerializeBuffer; // forward declaration for friend declaration /** * Buffer for serialization of data. Mainly used during network communication. */ class SerializeBuffer { //! Access to a deserialize buffer friend DeSerializeBuffer; //! type of data buffer // using vTy = std::vector; using vTy = galois::PODResizeableArray; //! the actual data stored in this buffer vTy bufdata; public: //! default constructor SerializeBuffer() = default; //! disabled copy constructor SerializeBuffer(SerializeBuffer&& rhs) = default; //! Creates a buffer from another buffer //! @param d buffer to create from //! @param len amount of copy from buffer d SerializeBuffer(const char* d, unsigned len) : bufdata(d, d + len) {} //! Push a character onto the serialize buffer inline void push(const char c) { bufdata.push_back(c); } //! Insert characters from a buffer into the serialize buffer void insert(const uint8_t* c, size_t bytes) { bufdata.insert(bufdata.end(), c, c + bytes); } //! Insert characters from a buffer into the serialize buffer at a particular //! offset void insertAt(const uint8_t* c, size_t bytes, size_t offset) { std::copy_n(c, bytes, bufdata.begin() + offset); } /** * Reserve space at the end for inserting new data into the serialize * buffer * * @param bytes number of bytes to reserve at the end * @returns offset to the end of the buffer before new space was reserved */ size_t encomber(size_t bytes) { size_t retval = bufdata.size(); bufdata.resize(retval + bytes); return retval; } void resize(size_t bytes) { bufdata.resize(bytes); } /** * Reserve more space in the serialize buffer. * * @param s extra space to reserve */ void reserve(size_t s) { bufdata.reserve(bufdata.size() + s); } //! Returns a pointer to the data stored in this serialize buffer const uint8_t* linearData() const { return bufdata.data(); } //! Returns vector of data stored in this serialize buffer vTy& getVec() { return bufdata; } //! Returns an iterator to the beginning of the data in this serialize buffer vTy::const_iterator begin() const { return bufdata.cbegin(); } //! Returns an iterator to the end of the data in this serialize buffer vTy::const_iterator end() const { return bufdata.cend(); } using size_type = vTy::size_type; //! Returns the size of the serialize buffer size_type size() const { return bufdata.size(); } //! Utility print function for the serialize buffer //! @param o stream to print to void print(std::ostream& o) const { o << "<{" << std::hex; for (auto& i : bufdata) o << (unsigned int)i << " "; o << std::dec << "}>"; } //! Operator that calls the print function of the serialize buffer friend std::ostream& operator<<(std::ostream& os, const SerializeBuffer& b) { b.print(os); return os; } }; /** * Buffer for deserialization of data. Mainly used during network * communication. */ class DeSerializeBuffer { //! Access to serialize buffer friend SerializeBuffer; //! type of data buffer // using vTy = std::vector; using vTy = galois::PODResizeableArray; //! the actual data stored in this buffer vTy bufdata; int offset; public: //! Constructor initializes offset into buffer to 0 DeSerializeBuffer() : offset(0) {} //! Disable copy constructor DeSerializeBuffer(DeSerializeBuffer&&) = default; //! Move constructor //! @param v vector to act as deserialize buffer //! @param start offset to start saving data into DeSerializeBuffer(vTy&& v, uint32_t start = 0) : bufdata(std::move(v)), offset(start) {} //! Constructor that takes an existing vector to use as the deserialize //! buffer explicit DeSerializeBuffer(vTy& data) { bufdata.swap(data); offset = 0; } /** * Initializes the deserialize buffer with a certain size * @param [in] count size to initialize buffer to */ explicit DeSerializeBuffer(int count) : bufdata(count), offset(0) {} /** * Initializes the deserialize buffer using vector initialization from * 2 iterators. */ template DeSerializeBuffer(Iter b, Iter e) : bufdata(b, e), offset{0} {} /** * Initialize a deserialize buffer from a serialize buffer */ explicit DeSerializeBuffer(SerializeBuffer&& buf) : offset(0) { bufdata.swap(buf.bufdata); } /** * Disable copy constructor */ DeSerializeBuffer& operator=(DeSerializeBuffer&& buf) = default; /** * Reset deserialize buffer * @param count new size of buffer */ void reset(int count) { offset = 0; bufdata.resize(count); } //! Gets the current offset into the deserialize buffer unsigned getOffset() const { return offset; } //! Sets the offset into the deserialize buffer void setOffset(unsigned off) { assert(off <= size()); offset = off; } //! Gets the size of the deserialize buffer unsigned size() const { return bufdata.size(); } //! Returns true if the deserialize buffer is empty //! @returns true if the deserialize buffer is empty bool empty() const { return bufdata.empty(); } //! Get the next character in the deserialize buffer unsigned char pop() { return bufdata.at(offset++); } //! Clears the last x bytes of the deserialize buffer, resizing it as well //! @param x How many bytes from the end to clear void pop_back(unsigned x) { bufdata.resize(bufdata.size() - x); } /** * Extracts a certain amount of data from the deserialize buffer * * @param dst buffer to copy data from deserialize buffer into * @param num Amount of data to get from deserialize buffer */ void extract(uint8_t* dst, size_t num) { if (num > 0) { memcpy(dst, &bufdata[offset], num); offset += num; } } //! Get the underlying vector storing the data of the deserialize //! buffer vTy& getVec() { return bufdata; } //! Get a pointer to the underlying data of the deserialize buffer void* linearData() { return &bufdata[0]; } //! Get a pointer to the remaining data of the deserialize buffer //! (as determined by offset) const uint8_t* r_linearData() const { return &bufdata[offset]; } //! Get the remaining size of the deserialize buffer (as determined //! by offset) size_t r_size() const { return bufdata.size() - offset; } //! Checks if the current location in the deserialize buffer is aligned //! to some size a bool atAlignment(size_t a) { return (uintptr_t)r_linearData() % a == 0; } //! Utility print of deserialize buffer //! @param o stream to print to void print(std::ostream& o) const { o << "<{(" << offset << ") " << std::hex; for (auto ii = bufdata.begin(), ee = bufdata.end(); ii != ee; ++ii) o << (unsigned int)*ii << " "; o << std::dec << "}>"; } //! Operator for printing deserialize buffer friend std::ostream& operator<<(std::ostream& os, const DeSerializeBuffer& buf) { buf.print(os); return os; } }; namespace internal { /** * Returns the size necessary for an object in a buffer. * This version runs if the data is memory copyable; uses sizeof. * * @tparam T type of datato get size of */ template __attribute__((always_inline)) constexpr size_t gSizedObj(const T&, typename std::enable_if::value>::type* = 0) { return sizeof(T); } /** * Returns the size necessary for an object in a buffer. * This version runs if the data is not memory copyable but is serializable. * It returns the size of a uintptr_t. * * @tparam T type of datato get size of * @returns size of uintptr_t */ template __attribute__((always_inline)) constexpr size_t gSizedObj(const T&, typename std::enable_if::value>::type* = 0, typename std::enable_if::value>::type* = 0) { return sizeof(uintptr_t); } /** * Returns the size necessary for storing 2 elements of a pair into a * serialize buffer. * * @param data pair of 2 elements */ template inline size_t gSizedObj(const std::pair& data) { return gSizedObj(data.first) + gSizedObj(data.second); } /** * Returns the size necessary to store a sequence in a serialize buffer. * This depends on if the sequence is memory copyable. */ template size_t gSizedSeq(const Seq& seq) { typename Seq::size_type size = seq.size(); typedef typename Seq::value_type T; size_t tsize = std::conditional< is_memory_copyable::value, std::integral_constant, std::integral_constant>::type::value; return sizeof(size) + tsize * size; } /** * Returns the size needed to store the elements a vector in a serialize * buffer. * * @returns size needed to store a vector into a serialize buffer */ template inline size_t gSizedObj(const std::vector& data) { return gSizedSeq(data); } /** * Returns the size needed to store the elements a PODResizeableArray in a * serialize buffer. * * @returns size needed to store a PODResizeableArray into a serialize buffer */ template inline size_t gSizedObj(const galois::PODResizeableArray& data) { return gSizedSeq(data); } /** * Returns the size needed to store the elements a deque into a serialize * buffer. * * @returns size needed to store a deque into a serialize buffer */ template inline size_t gSerializeObj(const std::deque& data) { return gSizedSeq(data); } /** * Returns the size needed to store the elements a Galois deque into a serialize * buffer. * * @returns size needed to store a Galois deque into a serialize buffer */ template inline size_t gSizedObj(const galois::gdeque& data) { return gSizedSeq(data); } /** * Returns the size needed to store a string into a serialize * buffer. * * @returns size needed to store a string into a serialize buffer */ template inline size_t gSizedObj(const std::basic_string, A>& data) { return data.length() + 1; } /** * Returns the size of the passed in serialize buffer * * @returns size of the serialize buffer passed into it */ inline size_t gSizedObj(const SerializeBuffer& data) { return data.size(); } /** * Returns the size of the passed in deserialize buffer * * @returns size of the deserialize buffer passed into it */ inline size_t gSizedObj(const DeSerializeBuffer& rbuf) { return rbuf.r_size(); } /** * Returns the size of the passed in insert bag. * * @returns size of the insert bag passed into it */ template inline size_t gSizedObj(const galois::InsertBag& bag) { return bag.size(); } /** * Returns 0. * @returns 0 */ inline size_t adder() { return 0; } /** * Returns the passed in argument. * @param a a number * @returns a */ inline size_t adder(size_t a) { return a; } /** * Returns the sum of all passed in arguments. * @returns sum of all arguments */ template inline size_t adder(size_t a, size_t b, Args&&... args) { return a + b + adder(args...); } } // namespace internal /** * Gets the total size necessary for storing all of the passed in arguments into * a serialize buffer. * * @returns size necessary for storing all arguments into a serialize buffer */ template static inline size_t gSized(Args&&... args) { return internal::adder(internal::gSizedObj(args)...); } //////////////////////////////////////////////////////////////////////////////// // Serialize support //////////////////////////////////////////////////////////////////////////////// namespace internal { /** * Serialize a memory copyable object into a serialize buffer. * * @param [in,out] buf Serialize buffer to serialize into * @param [in] data Data to serialize */ template inline void gSerializeObj( SerializeBuffer& buf, const T& data, typename std::enable_if::value>::type* = 0) { uint8_t* pdata = (uint8_t*)&data; buf.insert(pdata, sizeof(T)); } /** * Serialize a non-memory copyable but serializable object into a serialize * buffer. * * @param [in,out] buf Serialize buffer to serialize into * @param [in] data Data to serialize */ template inline void gSerializeObj(SerializeBuffer& buf, const T& data, typename std::enable_if::value>::type* = 0, typename std::enable_if::value>::type* = 0) { data.serialize(buf); } /** * Serialize a pair into a serialize buffer. * * @param [in,out] buf Serialize buffer to serialize into * @param [in] data Pair to serialize */ template inline void gSerializeObj(SerializeBuffer& buf, const std::pair& data) { gSerialize(buf, data.first, data.second); } /** * Serialize a pair. Either memcpys entire struct or serializes * each element individually. * * @param [in,out] buf Serialize buffer to serialize into * @param [in] data Pair to serialize */ template inline void gSerializeObj(SerializeBuffer& buf, const galois::Pair& data) { if (is_memory_copyable::value && is_memory_copyable::value) { // do memcpy buf.insert((uint8_t*)&data, sizeof(data)); } else { // serialize each individually gSerialize(buf, data.first, data.second); } } /** * Serialize a tuple of 3. Either memcpys entire struct or serializes * each element individually. * * @param [in,out] buf Serialize buffer to serialize into * @param [in] data Tuple of 3 to serialize * @todo This specialization isn't being used as expected. Figure out why. */ template inline void gSerializeObj(SerializeBuffer& buf, const galois::TupleOfThree& data) { if (is_memory_copyable::value && is_memory_copyable::value && is_memory_copyable::value) { // do memcpy buf.insert((uint8_t*)&data, sizeof(data)); } else { // serialize each individually gSerialize(buf, data.first, data.second, data.third); } } /** * Serialize a copyable atomic: load atomic data as a plain old * datatype (POD) and mem copy it to the buffer. * * @param [in,out] buf Serialize buffer to serialize into * @param [in] data copyable atomic to serialize */ template inline void gSerializeObj(SerializeBuffer& buf, const galois::CopyableAtomic& data) { T temp = data.load(); buf.insert((uint8_t*)(&temp), sizeof(T)); } /** * Serialize a string into a buffer. * * @param [in,out] buf Serialize buffer to serialize into * @param [in] data String */ template inline void gSerializeObj(SerializeBuffer& buf, const std::basic_string, A>& data) { buf.insert((uint8_t*)data.data(), data.length() + 1); } // Forward declaration of vector serialize template inline void gSerializeObj(SerializeBuffer& buf, const std::vector& data); /** * Serialize a sequence type into a buffer. * * @param [in,out] buf Serialize buffer to serialize into * @param [in] seq sequence to serialize * @todo specialize for Sequences with consecutive PODS */ template void gSerializeSeq(SerializeBuffer& buf, const Seq& seq) { typename Seq::size_type size = seq.size(); gSerializeObj(buf, size); for (auto& o : seq) gSerializeObj(buf, o); } /** * Serialize a linear sequence type (i.e. memcopyable) into a buffer. * * @param [in,out] buf Serialize buffer to serialize into * @param [in] seq sequence to serialize */ template void gSerializeLinearSeq(SerializeBuffer& buf, const Seq& seq) { typename Seq::size_type size = seq.size(); typedef typename Seq::value_type T; size_t tsize = sizeof(T); // buf.reserve(size * tsize + sizeof(size)); gSerializeObj(buf, size); buf.insert((uint8_t*)seq.data(), size * tsize); } /** * Serialize a vector into a buffer, choosing to do a memcopy or * to serialize each element individually depending on data. * * @param [in,out] buf Serialize buffer to serialize into * @param [in] data vector to serialize */ template inline void gSerializeObj(SerializeBuffer& buf, const std::vector& data) { if (is_memory_copyable::value) gSerializeLinearSeq(buf, data); else gSerializeSeq(buf, data); } /** * Serialize a PODResizeableArray into a buffer, choosing to do a memcopy or * to serialize each element individually depending on data. * * @param [in,out] buf Serialize buffer to serialize into * @param [in] data PODResizeableArray to serialize */ template inline void gSerializeObj(SerializeBuffer& buf, const galois::PODResizeableArray& data) { gSerializeLinearSeq(buf, data); } /** * Serialize a deque into a buffer. * * @param [in,out] buf Serialize buffer to serialize into * @param [in] data deque to serialize */ template inline void gSerializeObj(SerializeBuffer& buf, const std::deque& data) { gSerializeSeq(buf, data); } /** * Serialize a Galois deque into a buffer. * * @param [in,out] buf Serialize buffer to serialize into * @param [in] data deque to serialize */ template inline void gSerializeObj(SerializeBuffer& buf, const galois::gdeque& data) { gSerializeSeq(buf, data); } /** * Serialize data in another serialize buffer into a buffer. * * @param [in,out] buf Serialize buffer to serialize into * @param [in] data serialize buffer to get data from */ inline void gSerializeObj(SerializeBuffer& buf, const SerializeBuffer& data) { buf.insert(data.linearData(), data.size()); } /** * Serialize data in a deserialize buffer into a buffer. * * @param [in,out] buf Serialize buffer to serialize into * @param [in] rbuf deserialize buffer to get data from */ inline void gSerializeObj(SerializeBuffer& buf, const DeSerializeBuffer& rbuf) { // buf.reserve(rbuf.r_size()); buf.insert(rbuf.r_linearData(), rbuf.r_size()); } /** * Serialize a dynamic bitset into a buffer. * * @param [in,out] buf Serialize buffer to serialize into * @param [in] data dynamic bitset to serialize */ inline void gSerializeObj(SerializeBuffer& buf, const galois::DynamicBitSet& data) { gSerializeObj(buf, data.size()); gSerializeObj(buf, data.get_vec()); } // we removed the functions in Bag.h that this function requires, so this // won't work #if 0 /** * For serializing insertBag. * Insert contigous memory chunks for each thread * and clear it. * Can not be const. * Implemention below makes sure that it can be deserialized * into a linear sequence like vector or deque. */ template inline void gSerializeObj(SerializeBuffer& buf, galois::InsertBag& bag){ gSerializeObj(buf, bag.size()); auto headerVec = bag.getHeads(); size_t totalSize = 0; for(auto h : headerVec){ size_t localSize = (h->dend - h->dbegin); buf.insert((uint8_t*)h->dbegin, localSize*sizeof(T)); totalSize += (h->dend - h->dbegin); } assert(totalSize == bag.size()); bag.clear(); } #endif } // namespace internal /** * LazyRef structure; used to store both a type and an offset to begin * saving data into */ template struct LazyRef { size_t off; }; /** * Lazy serialize: doesn't actually serialize the data itself, but only * reserves space for it in the serialize buffer + serializes the * passed in num. */ template static inline LazyRef gSerializeLazySeq(SerializeBuffer& buf, unsigned num, Seq*) { static_assert(is_memory_copyable::value, "Not POD Sequence"); typename Seq::size_type size = num; internal::gSerializeObj(buf, size); size_t tsize = sizeof(typename Seq::value_type); return LazyRef{buf.encomber(tsize * num)}; } /** * Lazy serialize: given an offset and type through a LazyRef object, * serializes a certain amount from the passed in data array. * * @param buf Buffer to serialize into * @param r struct with info on where to start saving data and the type * of the data that needs to be saved * @param item Number of items that need to be serialized * @param data Data array containing data that needs to be serialized */ template static inline void gSerializeLazy(SerializeBuffer& buf, LazyRef r, unsigned item, Ty&& data) { size_t off = r.off + sizeof(Ty) * item; uint8_t* pdata = (uint8_t*)&data; buf.insertAt(pdata, sizeof(Ty), off); } /** * Serialize an entire series of datatypes into a provided serialize buffer */ template static inline void gSerialize(SerializeBuffer& buf, T1&& t1, Args&&... args) { buf.reserve(gSized(t1, args...)); internal::gSerializeObj(buf, std::forward(t1)); gSerialize(buf, std::forward(args)...); } /** * No-op function. "Base case" for recursive gSerialize function. */ static inline void gSerialize(SerializeBuffer&) {} //////////////////////////////////////////////////////////////////////////////// // Deserialize support //////////////////////////////////////////////////////////////////////////////// namespace internal { /** * Deserialize a memcopyable object from a buffer. * * @param buf [in,out] Buffer to deserialize from * @param data [in,out] Data to deserialize into */ template void gDeserializeObj( DeSerializeBuffer& buf, T& data, typename std::enable_if::value>::type* = 0) { uint8_t* pdata = (uint8_t*)&data; buf.extract(pdata, sizeof(T)); } /** * Deserialize a non-memcopyable but seralizable object from a buffer. * * @param buf [in,out] Buffer to deserialize from * @param data [in,out] Data to deserialize into */ template void gDeserializeObj( DeSerializeBuffer& buf, T& data, typename std::enable_if::value>::type* = 0, typename std::enable_if::value>::type* = 0) { data.deserialize(buf); } /** * Deserialize a pair from a buffer. * * @param buf [in,out] Buffer to deserialize from * @param data [in,out] pair to deserialize into */ template void gDeserializeObj(DeSerializeBuffer& buf, std::pair& data) { gDeserialize(buf, data.first, data.second); } /** * Deserialize into a pair. Either memcpys from buffer or deserializes * each element individually. * * @param [in,out] buf Buffer to deserialize from * @param [in] data Pair to deserialize into */ template inline void gDeserializeObj(DeSerializeBuffer& buf, galois::Pair& data) { if (is_memory_copyable::value && is_memory_copyable::value) { // do memcpy buf.extract((uint8_t*)&data, sizeof(data)); } else { // deserialize each individually gDeserialize(buf, data.first, data.second); } } /** * Deserialize into a tuple of 3. Either memcpys from buffer or deserializes * each element individually. * * @param buf [in,out] Buffer to deserialize from * @param data [in,out] triple to deserialize into * @todo This specialization isn't being used as expected. Figure out why. */ template inline void gDeserializeObj(DeSerializeBuffer& buf, galois::TupleOfThree& data) { if (is_memory_copyable::value && is_memory_copyable::value && is_memory_copyable::value) { // do memcpy straight to data buf.extract((uint8_t*)&data, sizeof(data)); } else { // deserialize each individually gDeserialize(buf, data.first, data.second, data.third); } } /** * Deserialize into a CopyableAtomic. Loads the POD from the DeserializeBuffer * then stores it into the atomic. * * @param buf [in,out] Buffer to deserialize from * @param data [in,out] copyable atomic to deserialize into */ template void gDeserializeObj(DeSerializeBuffer& buf, galois::CopyableAtomic& data) { T tempData; uint8_t* pointerToTemp = (uint8_t*)&tempData; buf.extract(pointerToTemp, sizeof(T)); data.store(tempData); } namespace { template struct seq {}; template struct gens : gens {}; template struct gens<0, S...> { typedef seq type; }; } // namespace /** * Deserialize into a tuple. * * @param buf [in,out] Buffer to deserialize from * @param data [in,out] tuple to serialize into */ template void gDeserializeTuple(DeSerializeBuffer& buf, std::tuple& data, seq) { gDeserialize(buf, std::get(data)...); } /** * Wrapper for deserialization into a tuple. * * @param buf [in,out] Buffer to deserialize from * @param data [in,out] tuple to serialize into */ template void gDeserializeObj(DeSerializeBuffer& buf, std::tuple& data) { return gDeserializeTuple(buf, data, typename gens::type()); } /** * Deserialize into a string. * * @param buf [in,out] Buffer to deserialize from * @param data [in,out] string to serialize into */ template inline void gDeserializeObj(DeSerializeBuffer& buf, std::basic_string, A>& data) { char c = buf.pop(); while (c != '\0') { data.push_back(c); c = buf.pop(); }; } // Forward declaration of vector deserialize template void gDeserializeObj(DeSerializeBuffer& buf, std::vector& data); /** * Deserialize into a sequence object * * @param buf [in,out] Buffer to deserialize from * @param seq [in,out] sequence to deserialize into */ template void gDeserializeSeq(DeSerializeBuffer& buf, Seq& seq) { seq.clear(); typename Seq::size_type size; gDeserializeObj(buf, size); while (size--) { typename Seq::value_type v; gDeserializeObj(buf, v); seq.push_back(v); } } /** * Deserialize into a linear sequence object (i.e. one that is mem-copyable) * * @param buf [in,out] Buffer to deserialize from * @param seq [in,out] sequence to deserialize into */ template void gDeserializeLinearSeq(DeSerializeBuffer& buf, Seq& seq) { typedef typename Seq::value_type T; // seq.clear(); typename Seq::size_type size; gDeserializeObj(buf, size); // If the alignment is right, cast to a T array and insert if (buf.atAlignment(alignof(T))) { T* src = (T*)buf.r_linearData(); seq.assign(src, &src[size]); buf.setOffset(buf.getOffset() + size * sizeof(T)); } else { seq.resize(size); buf.extract((uint8_t*)seq.data(), size * sizeof(T)); } } /** * Deserialize into a deque * * @param buf [in,out] Buffer to deserialize from * @param data [in,out] deque to deserialize into */ template void gDeserializeObj(DeSerializeBuffer& buf, std::deque& data) { gDeserializeSeq(buf, data); } /** * Deserialize into a vector; implementation depends on whether or not data in * vector is mem-copyable * * @param buf [in,out] Buffer to deserialize from * @param data [in,out] vector to deserialize into */ template void gDeserializeObj(DeSerializeBuffer& buf, std::vector& data) { if (is_memory_copyable::value) gDeserializeLinearSeq(buf, data); else gDeserializeSeq(buf, data); } /** * Deserialize into a PODResizeableArray * * @param buf [in,out] Buffer to deserialize from * @param data [in,out] PODResizeableArray to deserialize into */ template void gDeserializeObj(DeSerializeBuffer& buf, galois::PODResizeableArray& data) { gDeserializeLinearSeq(buf, data); } /** * Deserialize into a galois deque * * @param buf [in,out] Buffer to deserialize from * @param data [in,out] galois deque to deserialize into */ template void gDeserializeObj(DeSerializeBuffer& buf, galois::gdeque& data) { gDeserializeSeq(buf, data); } /** * Deserialize into a dynamic bitset * * @param buf [in,out] Buffer to deserialize from * @param data [in,out] bitset to deserialize into */ inline void gDeserializeObj(DeSerializeBuffer& buf, galois::DynamicBitSet& data) { size_t size = 0; gDeserializeObj(buf, size); data.resize(size); gDeserializeObj(buf, data.get_vec()); } } // namespace internal /** * Deserialize data in a buffer into a series of objects */ template void gDeserialize(DeSerializeBuffer& buf, T1&& t1, Args&&... args) { internal::gDeserializeObj(buf, std::forward(t1)); gDeserialize(buf, std::forward(args)...); } /** * Base case for regular gDeserialize recursive call. */ inline void gDeserialize(DeSerializeBuffer&) {} /** * "Deserialize" data in an iterator type into a data object. * * @tparam Iter iterator type that has objects of type T * @tparam T type of data to deserialize into * @param iter Iterator containing data that we want to save into the passed in * data reference * @param data Object to save data in the iterator type into */ template auto gDeserializeRaw(Iter iter, T& data) -> decltype( std::declval::value>::type>(), Iter()) { unsigned char* pdata = (unsigned char*)&data; for (size_t i = 0; i < sizeof(T); ++i) pdata[i] = *iter++; return iter; } } // namespace runtime } // namespace galois #endif // SERIALIZE DEF end ================================================ FILE: libdist/src/Barrier.cpp ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ /** * @file libdist/src/Barrier.cpp * * Contains implementation of HostFence and HostBarrier as well as functions * that get static singletons of the 2. * * A fence flushes out and receives all messages in the network while a barrier * simply acts as a barrier in the code for all hosts. */ #include "galois/substrate/PerThreadStorage.h" #include "galois/runtime/Substrate.h" #include "galois/substrate/CompilerSpecific.h" #include "galois/runtime/Network.h" #include "galois/runtime/LWCI.h" #include #include #include #include #include "galois/runtime/BareMPI.h" namespace { class HostFence : public galois::substrate::Barrier { public: virtual const char* name() const { return "HostFence"; } virtual void reinit(unsigned) {} //! control-flow barrier across distributed hosts //! acts as a distributed-memory fence as well (flushes send and receives) virtual void wait() { auto& net = galois::runtime::getSystemNetworkInterface(); if (galois::runtime::evilPhase == 0) { galois::gWarn("evilPhase is 0, implying loop-around or no use: fence " "may not work correctly!"); } for (unsigned h = 0; h < net.Num; ++h) { if (h == net.ID) continue; galois::runtime::SendBuffer b; galois::runtime::gSerialize(b, net.ID + 1); // non-zero message net.sendTagged(h, galois::runtime::evilPhase, b); } net.flush(); // flush all sends unsigned received = 1; // self while (received < net.Num) { decltype(net.recieveTagged(galois::runtime::evilPhase, nullptr)) p; do { net.handleReceives(); // flush all receives from net.sendMsg() or // net.sendSimple() p = net.recieveTagged(galois::runtime::evilPhase, nullptr); } while (!p); assert(p->first != net.ID); // ignore received data ++received; } ++galois::runtime::evilPhase; if (galois::runtime::evilPhase >= static_cast( std::numeric_limits::max())) { // limit defined by MPI or // LCI galois::runtime::evilPhase = 1; } } }; class HostBarrier : public galois::substrate::Barrier { public: virtual const char* name() const { return "HostBarrier"; } virtual void reinit(unsigned) {} //! Control-flow barrier across distributed hosts virtual void wait() { #ifdef GALOIS_USE_LCI lc_barrier(lc_col_ep); #else MPI_Barrier(MPI_COMM_WORLD); // assumes MPI_THREAD_MULTIPLE #endif } }; } // end anonymous namespace galois::substrate::Barrier& galois::runtime::getHostBarrier() { static HostBarrier b; return b; } galois::substrate::Barrier& galois::runtime::getHostFence() { static HostFence b; return b; } ================================================ FILE: libdist/src/DistGalois.cpp ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ /** * @file DistGalois.cpp * * Includes the definitions for DistMemSys's constructor and destructor */ #include "galois/DistGalois.h" #include "galois/runtime/Network.h" //! DistMemSys constructor which calls the shared memory runtime constructor //! with the distributed stats manager galois::DistMemSys::DistMemSys() : galois::runtime::SharedMem() {} //! DistMemSys destructor which reports memory usage from the network galois::DistMemSys::~DistMemSys() { if (MORE_DIST_STATS) { auto& net = galois::runtime::getSystemNetworkInterface(); net.reportMemUsage(); } } ================================================ FILE: libdist/src/DistStats.cpp ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ /** * @file DistStats.cpp * * Contains implementations for DistStats.h. */ #include "galois/runtime/DistStats.h" #include "galois/runtime/Serialize.h" #include "galois/DTerminationDetector.h" using namespace galois::runtime; DistStatManager* internal::distSysStatManager(void) { galois::runtime::StatManager* sm = internal::sysStatManager(); assert(sm && "StatManager not initialized"); DistStatManager* d = dynamic_cast(sm); assert(d && "dynamic_cast failed"); return d; } inline static DistStatManager* dsm(void) { return internal::distSysStatManager(); } DistStatManager::DistStatManager(const std::string& outfile) : StatManager(outfile) {} DistStatManager::~DistStatManager() { galois::runtime::internal::destroySystemNetworkInterface(); } class galois::runtime::StatRecvHelper { public: static void recvAtHost_0_hostTotalTy(galois::gstl::Str region, galois::gstl::Str category, StatTotal::Type totalTy) { dsm()->addRecvdHostTotalTy(region, category, totalTy); } static void recvAtHost_0_int(uint32_t hostID, galois::gstl::Str region, galois::gstl::Str category, int64_t thrdTotal, StatTotal::Type totalTy, const galois::gstl::Vector thrdVals) { dsm()->addRecvdStat(hostID, region, category, thrdTotal, totalTy, thrdVals); } static void recvAtHost_0_fp(uint32_t hostID, galois::gstl::Str region, galois::gstl::Str category, double thrdTotal, StatTotal::Type totalTy, const galois::gstl::Vector thrdVals) { dsm()->addRecvdStat(hostID, region, category, thrdTotal, totalTy, thrdVals); } static void recvAtHost_0_str(uint32_t hostID, galois::gstl::Str region, galois::gstl::Str category, galois::gstl::Str thrdTotal, StatTotal::Type totalTy, const galois::gstl::Vector thrdVals) { dsm()->addRecvdParam(hostID, region, category, thrdTotal, totalTy, thrdVals); } }; void DistStatManager::mergeStats(void) { Base::mergeStats(); hostTotalTypes.mergeStats(); combineAtHost_0(); } void DistStatManager::combineAtHost_0_helper(void) { const bool IS_HOST0 = getHostID() == 0; const auto& hTotalMap = hostTotalTypes.mergedMap(); size_t syncTypePhase = 0; if (!IS_HOST0) { for (auto i = hTotalMap.cbegin(), end_i = hTotalMap.cend(); i != end_i; ++i) { SendBuffer b; gSerialize(b, hTotalMap.region(i), hTotalMap.category(i), hTotalMap.stat(i).totalTy()); getSystemNetworkInterface().sendTagged(0, galois::runtime::evilPhase, b, syncTypePhase); } } ++syncTypePhase; for (auto i = Base::intBegin(), end_i = Base::intEnd(); i != end_i; ++i) { Str ln; Str cat; int64_t thrdTotal; StatTotal::Type totalTy; galois::gstl::Vector thrdVals; Base::readIntStat(i, ln, cat, thrdTotal, totalTy, thrdVals); if (IS_HOST0) { addRecvdStat(0, ln, cat, thrdTotal, totalTy, thrdVals); } else { SendBuffer b; gSerialize(b, ln, cat, thrdTotal, totalTy, thrdVals); getSystemNetworkInterface().sendTagged(0, galois::runtime::evilPhase, b, syncTypePhase); } } } void DistStatManager::combineAtHost_0_helper2(void) { const bool IS_HOST0 = getHostID() == 0; size_t syncTypePhase = 0; for (auto i = Base::fpBegin(), end_i = Base::fpEnd(); i != end_i; ++i) { Str ln; Str cat; double thrdTotal; StatTotal::Type totalTy; galois::gstl::Vector thrdVals; Base::readFPstat(i, ln, cat, thrdTotal, totalTy, thrdVals); if (IS_HOST0) { addRecvdStat(0, ln, cat, thrdTotal, totalTy, thrdVals); } else { SendBuffer b; gSerialize(b, ln, cat, thrdTotal, totalTy, thrdVals); getSystemNetworkInterface().sendTagged(0, galois::runtime::evilPhase, b, syncTypePhase); } } ++syncTypePhase; for (auto i = Base::paramBegin(), end_i = Base::paramEnd(); i != end_i; ++i) { Str ln; Str cat; Str thrdTotal; StatTotal::Type totalTy; galois::gstl::Vector thrdVals; Base::readParam(i, ln, cat, thrdTotal, totalTy, thrdVals); if (IS_HOST0) { addRecvdParam(0, ln, cat, thrdTotal, totalTy, thrdVals); } else { SendBuffer b; gSerialize(b, ln, cat, thrdTotal, totalTy, thrdVals); getSystemNetworkInterface().sendTagged(0, galois::runtime::evilPhase, b, syncTypePhase); } } } void DistStatManager::receiveAtHost_0_helper(void) { size_t syncTypePhase = 0; { decltype(getSystemNetworkInterface().recieveTagged( galois::runtime::evilPhase, nullptr, syncTypePhase)) p; do { p = getSystemNetworkInterface().recieveTagged(galois::runtime::evilPhase, nullptr, syncTypePhase); if (p) { RecvBuffer& b = p->second; galois::gstl::Str region; galois::gstl::Str category; StatTotal::Type totalTy; gDeserialize(b, region, category, totalTy); StatRecvHelper::recvAtHost_0_hostTotalTy(region, category, totalTy); } } while (p); } ++syncTypePhase; { decltype(getSystemNetworkInterface().recieveTagged( galois::runtime::evilPhase, nullptr, syncTypePhase)) p; do { p = getSystemNetworkInterface().recieveTagged(galois::runtime::evilPhase, nullptr, syncTypePhase); if (p) { uint32_t hostID = p->first; RecvBuffer& b = p->second; Str ln; Str cat; int64_t thrdTotal; StatTotal::Type totalTy; galois::gstl::Vector thrdVals; gDeserialize(b, ln, cat, thrdTotal, totalTy, thrdVals); StatRecvHelper::recvAtHost_0_int(hostID, ln, cat, thrdTotal, totalTy, thrdVals); } } while (p); } } void DistStatManager::receiveAtHost_0_helper2(void) { size_t syncTypePhase = 0; { decltype(getSystemNetworkInterface().recieveTagged( galois::runtime::evilPhase, nullptr, syncTypePhase)) p; do { p = getSystemNetworkInterface().recieveTagged(galois::runtime::evilPhase, nullptr, syncTypePhase); if (p) { uint32_t hostID = p->first; RecvBuffer& b = p->second; Str ln; Str cat; double thrdTotal; StatTotal::Type totalTy; galois::gstl::Vector thrdVals; gDeserialize(b, ln, cat, thrdTotal, totalTy, thrdVals); StatRecvHelper::recvAtHost_0_fp(hostID, ln, cat, thrdTotal, totalTy, thrdVals); } } while (p); } ++syncTypePhase; { decltype(getSystemNetworkInterface().recieveTagged( galois::runtime::evilPhase, nullptr, syncTypePhase)) p; do { p = getSystemNetworkInterface().recieveTagged(galois::runtime::evilPhase, nullptr, syncTypePhase); if (p) { uint32_t hostID = p->first; RecvBuffer& b = p->second; Str ln; Str cat; Str thrdTotal; StatTotal::Type totalTy; galois::gstl::Vector thrdVals; gDeserialize(b, ln, cat, thrdTotal, totalTy, thrdVals); StatRecvHelper::recvAtHost_0_str(hostID, ln, cat, thrdTotal, totalTy, thrdVals); } } while (p); } } void DistStatManager::combineAtHost_0(void) { galois::DGTerminator td; // host 0 reads stats from Base class // other hosts send stats to host 0 combineAtHost_0_helper(); getSystemNetworkInterface().flush(); // work done before check td += 1; // barrier while (td.reduce()) { td.reset(); if (getHostID() == 0) { // receive from other hosts receiveAtHost_0_helper(); } } // explicit barrier after logical barrier is required // as next async phase begins immediately getHostBarrier().wait(); // host 0 reads stats from Base class // other hosts send stats to host 0 combineAtHost_0_helper2(); getSystemNetworkInterface().flush(); td += 1; // barrier while (td.reduce()) { td.reset(); if (getHostID() == 0) { // receive from other hosts receiveAtHost_0_helper2(); } } // explicit barrier after logical barrier is required // as next async phase begins immediately getHostBarrier().wait(); } bool DistStatManager::printingHostVals(void) { return galois::substrate::EnvCheck(DistStatManager::HSTAT_ENV_VAR); } StatTotal::Type DistStatManager::findHostTotalTy(const Str& region, const Str& category, const StatTotal::Type& thrdTotalTy) const { StatTotal::Type hostTotalTy = thrdTotalTy; auto& mrgMap = hostTotalTypes.mergedMap(); auto i = mrgMap.findStat(region, category); if (i != mrgMap.cend()) { hostTotalTy = mrgMap.stat(i).totalTy(); } return hostTotalTy; } void DistStatManager::addRecvdHostTotalTy(const Str& region, const Str& category, const StatTotal::Type& totalTy) { hostTotalTypes.addToStat(region, category, totalTy); } void DistStatManager::addRecvdStat( unsigned hostID, const Str& region, const Str& category, int64_t thrdTotal, const StatTotal::Type& thrdTotalTy, const DistStatManager::ThrdVals& thrdVals) { intDistStats.addToStat( region, category, std::make_tuple(hostID, thrdTotal, thrdTotalTy, thrdVals), findHostTotalTy(region, category, thrdTotalTy)); } void DistStatManager::addRecvdStat( unsigned hostID, const Str& region, const Str& category, double thrdTotal, const StatTotal::Type& thrdTotalTy, const DistStatManager::ThrdVals& thrdVals) { fpDistStats.addToStat( region, category, std::make_tuple(hostID, thrdTotal, thrdTotalTy, thrdVals), findHostTotalTy(region, category, thrdTotalTy)); } void DistStatManager::addRecvdParam( unsigned hostID, const Str& region, const Str& category, const Str& thrdTotal, const StatTotal::Type& thrdTotalTy, const DistStatManager::ThrdVals& thrdVals) { strDistStats.addToStat( region, category, std::make_tuple(hostID, thrdTotal, thrdTotalTy, thrdVals), findHostTotalTy(region, category, thrdTotalTy)); } void DistStatManager::printHeader(std::ostream& out) const { out << "STAT_TYPE" << SEP; out << "HOST_ID" << SEP; out << "REGION" << SEP << "CATEGORY" << SEP; out << "TOTAL_TYPE" << SEP << "TOTAL"; out << std::endl; } void DistStatManager::printStats(std::ostream& out) { mergeStats(); galois::DGTerminator td; if (getHostID() == 0) { printHeader(out); intDistStats.print(out); fpDistStats.print(out); strDistStats.print(out); } // all hosts must wait for host 0 to finish printing stats while (td.reduce()) { }; } ================================================ FILE: libdist/src/Network.cpp ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ /** * @file Network.cpp * * Contains implementations for basic NetworkInterface functions and * initializations of some NetworkInterface variables. */ #include "galois/runtime/Tracer.h" #include "galois/runtime/Network.h" #include "galois/runtime/NetworkIO.h" #include #include using namespace galois::runtime; uint32_t galois::runtime::evilPhase = 1; uint32_t galois::runtime::NetworkInterface::ID = 0; uint32_t galois::runtime::NetworkInterface::Num = 1; uint32_t galois::runtime::getHostID() { return NetworkInterface::ID; } galois::runtime::NetworkIO::~NetworkIO() {} void NetworkInterface::initializeMPI() { int supportProvided; int initSuccess = MPI_Init_thread(NULL, NULL, MPI_THREAD_MULTIPLE, &supportProvided); if (initSuccess != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD, initSuccess); } if (supportProvided != MPI_THREAD_MULTIPLE) { GALOIS_DIE("MPI_THREAD_MULTIPLE not supported."); } } void NetworkInterface::finalizeMPI() { int finalizeSuccess = MPI_Finalize(); if (finalizeSuccess != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD, finalizeSuccess); } galois::gDebug("[", NetworkInterface::ID, "] MPI finalized"); } NetworkInterface::NetworkInterface() {} NetworkInterface::~NetworkInterface() {} void NetworkInterface::reportMemUsage() const { std::string str("CommunicationMemUsage"); galois::runtime::reportStat_Tmin("dGraph", str + "Min", memUsageTracker.getMaxMemUsage()); galois::runtime::reportStat_Tmax("dGraph", str + "Max", memUsageTracker.getMaxMemUsage()); } // forward decl //! Receive broadcasted messages over the network static void bcastLandingPad(uint32_t src, ::RecvBuffer& buf); static void bcastLandingPad(uint32_t src, RecvBuffer& buf) { uintptr_t fp; gDeserialize(buf, fp); auto recv = (void (*)(uint32_t, RecvBuffer&))fp; trace("NetworkInterface::bcastLandingPad", (void*)recv); recv(src, buf); } void NetworkInterface::sendMsg(uint32_t dest, void (*recv)(uint32_t, RecvBuffer&), SendBuffer& buf) { gSerialize(buf, recv); sendTagged(dest, 0, buf); } void NetworkInterface::broadcast(void (*recv)(uint32_t, RecvBuffer&), SendBuffer& buf, bool self) { trace("NetworkInterface::broadcast", (void*)recv); auto fp = (uintptr_t)recv; for (unsigned x = 0; x < Num; ++x) { if (x != ID) { SendBuffer b; gSerialize(b, fp, buf, (uintptr_t)&bcastLandingPad); sendTagged(x, 0, b); } else if (self) { RecvBuffer rb(buf.begin(), buf.end()); recv(ID, rb); } } } void NetworkInterface::handleReceives() { std::unique_lock lg; auto opt = recieveTagged(0, &lg); while (opt) { uint32_t src = std::get<0>(*opt); RecvBuffer& buf = std::get<1>(*opt); uintptr_t fp = 0; gDeserializeRaw(buf.r_linearData() + buf.r_size() - sizeof(uintptr_t), fp); buf.pop_back(sizeof(uintptr_t)); assert(fp); auto f = (void (*)(uint32_t, RecvBuffer&))fp; f(src, buf); opt = recieveTagged(0, &lg); } } NetworkInterface& galois::runtime::getSystemNetworkInterface() { #ifndef GALOIS_USE_LCI return makeNetworkBuffered(); #else return makeNetworkLCI(); #endif } void galois::runtime::internal::destroySystemNetworkInterface() { // get net interface, then delete it NetworkInterface& netInterface = getSystemNetworkInterface(); delete &netInterface; } ================================================ FILE: libdist/src/NetworkBuffered.cpp ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ /** * @file NetworkBuffered.cpp * * Contains NetworkInterfaceBuffered, an implementation of a network interface * that buffers messages before sending them out. * * @todo document this file more */ #include "galois/runtime/Network.h" #include "galois/runtime/NetworkIO.h" #include "galois/runtime/Tracer.h" #ifdef GALOIS_USE_LCI #define NO_AGG #endif #include #include #include #include using namespace galois::runtime; using namespace galois::substrate; namespace { /** * @class NetworkInterfaceBuffered * * Buffered network interface: messages are buffered before they are sent out. * A single worker thread is initialized to send/receive messages from/to * buffers. */ class NetworkInterfaceBuffered : public NetworkInterface { static const int COMM_MIN = 1400; //! bytes (sligtly smaller than an ethernet packet) static const int COMM_DELAY = 100; //! microseconds delay unsigned long statSendNum; unsigned long statSendBytes; unsigned long statSendEnqueued; unsigned long statRecvNum; unsigned long statRecvBytes; unsigned long statRecvDequeued; bool anyReceivedMessages; // using vTy = std::vector; using vTy = galois::PODResizeableArray; /** * Receive buffers for the buffered network interface */ class recvBuffer { std::deque data; size_t frontOffset; SimpleLock qlock; // tag of head of queue std::atomic dataPresent; bool sizeAtLeast(size_t n, uint32_t tag) { size_t tot = -frontOffset; for (auto& v : data) { if (v.tag == tag) { tot += v.data.size(); if (tot >= n) return true; } else { return false; } } return false; } template void copyOut(IterTy it, size_t n) { // assert(sizeAtLeast(n)); // fast path is first buffer { // limit scope auto& f0data = data[0].data; for (int k = frontOffset, ke = f0data.size(); k < ke && n; ++k, --n) *it++ = f0data[k]; } if (n) { // more data (slow path) for (int j = 1, je = data.size(); j < je && n; ++j) { auto& vdata = data[j].data; for (int k = 0, ke = vdata.size(); k < ke && n; ++k, --n) { *it++ = vdata[k]; } } } } /** * Return a (moved) vector if the len bytes requested are the last len * bytes of the front of the buffer queue */ std::optional popVec(uint32_t len, std::atomic& inflightRecvs) { if (data[0].data.size() == frontOffset + len) { vTy retval(std::move(data[0].data)); data.pop_front(); --inflightRecvs; frontOffset = 0; if (data.size()) { dataPresent = data.front().tag; } else { dataPresent = ~0; } return std::optional(std::move(retval)); } else { return std::optional(); } } void erase(size_t n, std::atomic& inflightRecvs) { frontOffset += n; while (frontOffset && frontOffset >= data.front().data.size()) { frontOffset -= data.front().data.size(); data.pop_front(); --inflightRecvs; } if (data.size()) { dataPresent = data.front().tag; } else { dataPresent = ~0; } } uint32_t getLenFromFront(uint32_t tag) { if (sizeAtLeast(sizeof(uint32_t), tag)) { union { uint8_t a[sizeof(uint32_t)]; uint32_t b; } c; copyOut(&c.a[0], sizeof(uint32_t)); return c.b; } else { return ~0; } } public: std::optional popMsg(uint32_t tag, std::atomic& inflightRecvs) { std::lock_guard lg(qlock); #ifndef NO_AGG uint32_t len = getLenFromFront(tag); // assert(len); if (len == ~0U || len == 0) return std::optional(); if (!sizeAtLeast(sizeof(uint32_t) + len, tag)) return std::optional(); erase(4, inflightRecvs); // Try just using the buffer if (auto r = popVec(len, inflightRecvs)) { auto start = r->size() - len; // std::cerr << "FP " << r->size() << " " << len << " " << start // << "\n"; return std::optional(RecvBuffer(std::move(*r), start)); } RecvBuffer buf(len); // FIXME: This is slows things down 25% copyOut((char*)buf.linearData(), len); erase(len, inflightRecvs); // std::cerr << "p " << tag << " " << len << "\n"; return std::optional(std::move(buf)); #else if (data.empty() || data.front().tag != tag) return std::optional(); vTy vec(std::move(data.front().data)); data.pop_front(); --inflightRecvs; if (!data.empty()) { dataPresent = data.front().tag; } else { dataPresent = ~0; } return std::optional(RecvBuffer(std::move(vec), 0)); #endif } // Worker thread interface void add(NetworkIO::message m) { std::lock_guard lg(qlock); if (data.empty()) { galois::runtime::trace("ADD LATEST ", m.tag); dataPresent = m.tag; } // std::cerr << m.data.size() << " " << // std::count(m.data.begin(), m.data.end(), 0) << "\n"; // for (auto x : m.data) { // std::cerr << (int) x << " "; // } // std::cerr << "\n"; // std::cerr << "A " << m.host << " " << m.tag << " " << m.data.size() << // "\n"; data.push_back(std::move(m)); assert(data.back().data.size() != (unsigned int)std::count(data.back().data.begin(), data.back().data.end(), 0)); } bool hasData(uint32_t tag) { return dataPresent == tag; } size_t size() { return data.size(); } uint32_t getPresentTag() { return dataPresent; } }; // end recv buffer class std::vector recvData; std::vector recvLock; /** * Send buffers for the buffered network interface */ class sendBuffer { struct msg { uint32_t tag; vTy data; msg(uint32_t t, vTy& _data) : tag(t), data(std::move(_data)) {} }; std::deque messages; std::atomic numBytes; std::atomic urgent; //! @todo FIXME track time since some epoch in an atomic. std::chrono::high_resolution_clock::time_point time; SimpleLock lock, timelock; public: unsigned long statSendTimeout; unsigned long statSendOverflow; unsigned long statSendUrgent; size_t size() { return messages.size(); } void markUrgent() { if (numBytes) { std::lock_guard lg(lock); urgent = messages.size(); } } bool ready() { #ifndef NO_AGG if (numBytes == 0) return false; if (urgent) { ++statSendUrgent; return true; } if (numBytes > COMM_MIN) { ++statSendOverflow; return true; } auto n = std::chrono::high_resolution_clock::now(); decltype(n) mytime; { std::lock_guard lg(timelock); mytime = time; } auto elapsed = std::chrono::duration_cast(n - mytime); if (elapsed.count() > COMM_DELAY) { ++statSendTimeout; return true; } return false; #else return messages.size() > 0; #endif } std::pair assemble(std::atomic& GALOIS_UNUSED(inflightSends)) { std::unique_lock lg(lock); if (messages.empty()) return std::make_pair(~0, vTy()); #ifndef NO_AGG // compute message size uint32_t len = 0; int num = 0; uint32_t tag = messages.front().tag; for (auto& m : messages) { if (m.tag != tag) { break; } else { // do not let it go over the integer limit because MPI_Isend cannot // deal with it if ((m.data.size() + sizeof(uint32_t) + len + num) > static_cast(std::numeric_limits::max())) { break; } len += m.data.size(); num += sizeof(uint32_t); } } lg.unlock(); // construct message vTy vec; vec.reserve(len + num); // go out of our way to avoid locking out senders when making messages lg.lock(); do { auto& m = messages.front(); lg.unlock(); union { uint32_t a; uint8_t b[sizeof(uint32_t)]; } foo; foo.a = m.data.size(); vec.insert(vec.end(), &foo.b[0], &foo.b[sizeof(uint32_t)]); vec.insert(vec.end(), m.data.begin(), m.data.end()); if (urgent) --urgent; lg.lock(); messages.pop_front(); --inflightSends; } while (vec.size() < len + num); ++inflightSends; numBytes -= len; #else uint32_t tag = messages.front().tag; vTy vec(std::move(messages.front().data)); messages.pop_front(); #endif return std::make_pair(tag, std::move(vec)); } void add(uint32_t tag, vTy& b) { std::lock_guard lg(lock); if (messages.empty()) { std::lock_guard lg(timelock); time = std::chrono::high_resolution_clock::now(); } unsigned oldNumBytes = numBytes; numBytes += b.size(); galois::runtime::trace("BufferedAdd", oldNumBytes, numBytes, tag, galois::runtime::printVec(b)); messages.emplace_back(tag, b); } }; // end send buffer class std::vector sendData; void workerThread() { initializeMPI(); int rank; int hostSize; int rankSuccess = MPI_Comm_rank(MPI_COMM_WORLD, &rank); if (rankSuccess != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD, rankSuccess); } int sizeSuccess = MPI_Comm_size(MPI_COMM_WORLD, &hostSize); if (sizeSuccess != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD, sizeSuccess); } galois::gDebug("[", NetworkInterface::ID, "] MPI initialized"); std::tie(netio, ID, Num) = makeNetworkIOMPI(memUsageTracker, inflightSends, inflightRecvs); assert(ID == (unsigned)rank); assert(Num == (unsigned)hostSize); ready = 1; while (ready < 2) { /*fprintf(stderr, "[WaitOnReady-2]");*/ }; while (ready != 3) { for (unsigned i = 0; i < sendData.size(); ++i) { netio->progress(); // handle send queue i auto& sd = sendData[i]; if (sd.ready()) { NetworkIO::message msg; msg.host = i; std::tie(msg.tag, msg.data) = sd.assemble(inflightSends); galois::runtime::trace("BufferedSending", msg.host, msg.tag, galois::runtime::printVec(msg.data)); ++statSendEnqueued; netio->enqueue(std::move(msg)); } // handle receive NetworkIO::message rdata = netio->dequeue(); if (rdata.data.size()) { ++statRecvDequeued; assert(rdata.data.size() != (unsigned int)std::count(rdata.data.begin(), rdata.data.end(), 0)); galois::runtime::trace("BufferedRecieving", rdata.host, rdata.tag, galois::runtime::printVec(rdata.data)); recvData[rdata.host].add(std::move(rdata)); } } } finalizeMPI(); } std::thread worker; std::atomic ready; public: using NetworkInterface::ID; using NetworkInterface::Num; NetworkInterfaceBuffered() { inflightSends = 0; inflightRecvs = 0; ready = 0; anyReceivedMessages = false; worker = std::thread(&NetworkInterfaceBuffered::workerThread, this); while (ready != 1) { }; recvData = decltype(recvData)(Num); recvLock.resize(Num); sendData = decltype(sendData)(Num); ready = 2; } virtual ~NetworkInterfaceBuffered() { ready = 3; worker.join(); } std::unique_ptr netio; virtual void sendTagged(uint32_t dest, uint32_t tag, SendBuffer& buf, int phase) { ++inflightSends; tag += phase; statSendNum += 1; statSendBytes += buf.size(); galois::runtime::trace("sendTagged", dest, tag, galois::runtime::printVec(buf.getVec())); auto& sd = sendData[dest]; sd.add(tag, buf.getVec()); } virtual std::optional> recieveTagged(uint32_t tag, std::unique_lock* rlg, int phase) { tag += phase; for (unsigned h = 0; h < recvData.size(); ++h) { auto& rq = recvData[h]; if (rq.hasData(tag)) { if (recvLock[h].try_lock()) { std::unique_lock lg(recvLock[h], std::adopt_lock); auto buf = rq.popMsg(tag, inflightRecvs); if (buf) { ++statRecvNum; statRecvBytes += buf->size(); memUsageTracker.decrementMemUsage(buf->size()); if (rlg) *rlg = std::move(lg); galois::runtime::trace("recvTagged", h, tag, galois::runtime::printVec(buf->getVec())); anyReceivedMessages = true; return std::optional>( std::make_pair(h, std::move(*buf))); } } } galois::runtime::trace("recvTagged BLOCKED this by that", tag, rq.getPresentTag()); } return std::optional>(); } virtual void flush() { for (auto& sd : sendData) sd.markUrgent(); } virtual bool anyPendingSends() { return (inflightSends > 0); } virtual bool anyPendingReceives() { if (anyReceivedMessages) { // might not be acted on by the computation yet anyReceivedMessages = false; // galois::gDebug("[", ID, "] receive out of buffer \n"); return true; } // if (inflightRecvs > 0) { // galois::gDebug("[", ID, "] inflight receive: ", inflightRecvs, " \n"); // } return (inflightRecvs > 0); } virtual unsigned long reportSendBytes() const { return statSendBytes; } virtual unsigned long reportSendMsgs() const { return statSendNum; } virtual unsigned long reportRecvBytes() const { return statRecvBytes; } virtual unsigned long reportRecvMsgs() const { return statRecvNum; } virtual std::vector reportExtra() const { std::vector retval(5); for (auto& sd : sendData) { retval[0] += sd.statSendTimeout; retval[1] += sd.statSendOverflow; retval[2] += sd.statSendUrgent; } retval[3] = statSendEnqueued; retval[4] = statRecvDequeued; return retval; } virtual std::vector> reportExtraNamed() const { std::vector> retval(5); retval[0].first = "SendTimeout"; retval[1].first = "SendOverflow"; retval[2].first = "SendUrgent"; retval[3].first = "SendEnqueued"; retval[4].first = "RecvDequeued"; for (auto& sd : sendData) { retval[0].second += sd.statSendTimeout; retval[1].second += sd.statSendOverflow; retval[2].second += sd.statSendUrgent; } retval[3].second = statSendEnqueued; retval[4].second = statRecvDequeued; return retval; } }; } // namespace /** * Create a buffered network interface, or return one if already * created. */ NetworkInterface& galois::runtime::makeNetworkBuffered() { static std::atomic net; static substrate::SimpleLock m_mutex; // create the interface if it doesn't yet exist in the static variable auto* tmp = net.load(); if (tmp == nullptr) { std::lock_guard lock(m_mutex); tmp = net.load(); if (tmp == nullptr) { tmp = new NetworkInterfaceBuffered(); net.store(tmp); } } return *tmp; } ================================================ FILE: libdist/src/NetworkIOMPI.cpp ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ /** * @file NetworkIOMPI.cpp * * Contains an implementation of network IO that uses MPI. */ #include "galois/runtime/NetworkIO.h" #include "galois/runtime/Tracer.h" #include "galois/substrate/SimpleLock.h" /** * MPI implementation of network IO. ASSUMES THAT MPI IS INITIALIZED * UPON CREATION OF THIS OBJECT. */ class NetworkIOMPI : public galois::runtime::NetworkIO { private: /** * Get the host id of the caller. * * @returns host id of the caller with regard to the MPI setup */ static int getID() { int taskRank; handleError(MPI_Comm_rank(MPI_COMM_WORLD, &taskRank)); return taskRank; } /** * Get the total number of hosts in the system. * * @returns number of hosts with regard to the MPI setup */ static int getNum() { int numTasks; handleError(MPI_Comm_size(MPI_COMM_WORLD, &numTasks)); return numTasks; } /** * Get both the ID of the caller + number of hosts. */ std::pair getIDAndHostNum() { return std::make_pair(getID(), getNum()); } /** * Message type to send/recv in this network IO layer. */ struct mpiMessage { uint32_t host; uint32_t tag; vTy data; MPI_Request req; // mpiMessage(message&& _m, MPI_Request _req) : m(std::move(_m)), req(_req) // {} mpiMessage(uint32_t host, uint32_t tag, vTy&& data) : host(host), tag(tag), data(std::move(data)) {} mpiMessage(uint32_t host, uint32_t tag, size_t len) : host(host), tag(tag), data(len) {} }; /** * Send queue structure. */ struct sendQueueTy { std::deque inflight; galois::runtime::MemUsageTracker& memUsageTracker; std::atomic& inflightSends; sendQueueTy(galois::runtime::MemUsageTracker& tracker, std::atomic& sends) : memUsageTracker(tracker), inflightSends(sends) {} void complete() { while (!inflight.empty()) { int flag = 0; MPI_Status status; auto& f = inflight.front(); int rv = MPI_Test(&f.req, &flag, &status); handleError(rv); if (flag) { memUsageTracker.decrementMemUsage(f.data.size()); inflight.pop_front(); --inflightSends; } else break; } } void send(message m) { inflight.emplace_back(m.host, m.tag, std::move(m.data)); auto& f = inflight.back(); galois::runtime::trace("MPI SEND", f.host, f.tag, f.data.size(), galois::runtime::printVec(f.data)); #ifdef GALOIS_SUPPORT_ASYNC int rv = MPI_Issend(f.data.data(), f.data.size(), MPI_BYTE, f.host, f.tag, MPI_COMM_WORLD, &f.req); #else int rv = MPI_Isend(f.data.data(), f.data.size(), MPI_BYTE, f.host, f.tag, MPI_COMM_WORLD, &f.req); #endif handleError(rv); } }; /** * Receive queue structure */ struct recvQueueTy { std::deque done; std::deque inflight; galois::runtime::MemUsageTracker& memUsageTracker; std::atomic& inflightRecvs; recvQueueTy(galois::runtime::MemUsageTracker& tracker, std::atomic& recvs) : memUsageTracker(tracker), inflightRecvs(recvs) {} // FIXME: Does synchronous recieves overly halt forward progress? void probe() { int flag = 0; MPI_Status status; // check for new messages int rv = MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &flag, &status); handleError(rv); if (flag) { #ifdef GALOIS_USE_BARE_MPI assert(status.MPI_TAG <= 32767); if (status.MPI_TAG != 32767) { #endif ++inflightRecvs; int nbytes; rv = MPI_Get_count(&status, MPI_BYTE, &nbytes); handleError(rv); inflight.emplace_back(status.MPI_SOURCE, status.MPI_TAG, nbytes); auto& m = inflight.back(); memUsageTracker.incrementMemUsage(m.data.size()); rv = MPI_Irecv(m.data.data(), nbytes, MPI_BYTE, status.MPI_SOURCE, status.MPI_TAG, MPI_COMM_WORLD, &m.req); handleError(rv); galois::runtime::trace("MPI IRECV", status.MPI_SOURCE, status.MPI_TAG, m.data.size()); #ifdef GALOIS_USE_BARE_MPI } #endif } // complete messages if (!inflight.empty()) { auto& m = inflight.front(); int flag = 0; rv = MPI_Test(&m.req, &flag, MPI_STATUS_IGNORE); handleError(rv); if (flag) { done.emplace_back(m.host, m.tag, std::move(m.data)); inflight.pop_front(); } } } }; sendQueueTy sendQueue; recvQueueTy recvQueue; public: /** * Constructor. * * @param tracker memory usage tracker * @param sends * @param recvs * @param [out] ID this machine's host id * @param [out] NUM total number of hosts in the system */ NetworkIOMPI(galois::runtime::MemUsageTracker& tracker, std::atomic& sends, std::atomic& recvs, uint32_t& ID, uint32_t& NUM) : NetworkIO(tracker, sends, recvs), sendQueue(tracker, inflightSends), recvQueue(tracker, inflightRecvs) { auto p = getIDAndHostNum(); ID = p.first; NUM = p.second; } /** * Adds a message to the send queue */ virtual void enqueue(message m) { memUsageTracker.incrementMemUsage(m.data.size()); sendQueue.send(std::move(m)); } /** * Attempts to get a message from the recv queue. */ virtual message dequeue() { if (!recvQueue.done.empty()) { auto msg = std::move(recvQueue.done.front()); recvQueue.done.pop_front(); return msg; } return message{~0U, 0, vTy()}; } /** * Push progress forward in the system. */ virtual void progress() { sendQueue.complete(); recvQueue.probe(); } }; // end NetworkIOMPI class std::tuple, uint32_t, uint32_t> galois::runtime::makeNetworkIOMPI(galois::runtime::MemUsageTracker& tracker, std::atomic& sends, std::atomic& recvs) { uint32_t ID, NUM; std::unique_ptr n{ new NetworkIOMPI(tracker, sends, recvs, ID, NUM)}; return std::make_tuple(std::move(n), ID, NUM); } ================================================ FILE: libdist/src/NetworkLCI.cpp ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ /** * @file NetworkBuffered.cpp * * Contains NetworkInterfaceLCI, an implementation of a network interface * that buffers messages before sending them out. * * @todo document this file more */ #ifdef GALOIS_USE_LCI #include "galois/runtime/Network.h" #include "galois/runtime/NetworkIO.h" #include "galois/runtime/Tracer.h" #include "galois/runtime/LWCI.h" using vTy = galois::PODResizeableArray; #include #include #include #include #include #include using namespace galois::runtime; using namespace galois::substrate; /* CRC-32C (iSCSI) polynomial in reversed bit order. */ #define POLY 0x82f63b78 inline uint32_t crc32c(char* buf, size_t len) { uint32_t crc = 0; int k; crc = ~crc; while (len--) { crc ^= *buf++; for (k = 0; k < 8; k++) crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1; } return ~crc; } lc_ep lc_p2p_ep[3]; lc_ep lc_col_ep; struct pendingReq { uint32_t dest; uint32_t tag; int phase; vTy buf; lc_req req; std::atomic& inflight; pendingReq(uint32_t _d, uint32_t _t, int _p, vTy& _buf, std::atomic& s) : dest(_d), tag(_t), phase(_p), buf(std::move(_buf)), inflight(s) { s++; } ~pendingReq() { inflight--; } }; static void* alloc_req(size_t size, void** ctx) { vTy** vector = (vTy**)ctx; *vector = new vTy(size); return (*vector)->data(); } static void free_req(void* ctx) { pendingReq* req = (pendingReq*)ctx; delete req; } namespace { /** * @class NetworkInterfaceLCI * * Buffered network interface: messages are buffered before they are sent out. * A single worker thread is initialized to send/receive messages from/to * buffers. */ class NetworkInterfaceLCI : public NetworkInterface { unsigned long statSendNum; unsigned long statSendBytes; unsigned long statSendEnqueued; unsigned long statRecvNum; unsigned long statRecvBytes; unsigned long statRecvDequeued; bool anyReceivedMessages; // using vTy = std::vector; using vTy = galois::PODResizeableArray; public: void workerThread() { // Initialize LWCI // makeNetworkIOLWCI(memUsageTracker, inflightSends, inflightRecvs); if (ID == 0) fprintf(stderr, "**Using LWCI Communication layer**\n"); ready = 1; while (ready < 2) { /*fprintf(stderr, "[WaitOnReady-2]");*/ }; while (ready != 3) { lc_progress(0); lc_req* req_ptr; for (int phase = 0; phase < 3; phase++) { if (lc_cq_pop(lc_p2p_ep[phase], &req_ptr) == LC_OK) { int bin = ((req_ptr->meta % 3) * 3) + phase; bufferedRecv[bin].push(convertReq(req_ptr, phase)); } } sched_yield(); } } std::thread worker; std::atomic ready; public: using NetworkInterface::ID; using NetworkInterface::Num; NetworkInterfaceLCI() { lc_init(1, &lc_col_ep); lc_opt opt; opt.dev = 0; opt.desc = LC_DYN_CQ; opt.alloc = alloc_req; lc_ep_dup(&opt, lc_col_ep, &lc_p2p_ep[0]); lc_ep_dup(&opt, lc_col_ep, &lc_p2p_ep[1]); lc_ep_dup(&opt, lc_col_ep, &lc_p2p_ep[2]); lc_get_proc_num((int*)&ID); lc_get_num_proc((int*)&Num); inflightSends = 0; inflightRecvs = 0; ready = 0; anyReceivedMessages = false; worker = std::thread(&NetworkInterfaceLCI::workerThread, this); while (ready != 1) { }; ready = 2; } virtual ~NetworkInterfaceLCI() { ready = 3; worker.join(); } boost::lockfree::queue bufferedRecv[9]; // [0, 1, 2] [0, 1, 2] 0: normal, 1: reduce, 2: AM virtual void sendTagged(uint32_t dest, uint32_t tag, SendBuffer& buf, int phase) { if (tag == 0) phase = 2; statSendNum += 1; statSendBytes += buf.size(); // int count = 0; #ifndef GALOIS_SUPPORT_ASYNC if (buf.getVec().size() < 8192) { while (lc_sendm(buf.getVec().data(), buf.getVec().size(), dest, tag, lc_p2p_ep[phase]) != LC_OK) { sched_yield(); } } else #endif { pendingReq* msg = new pendingReq(dest, tag, phase, buf.getVec(), inflightSends); while (lc_sendl(msg->buf.data(), msg->buf.size(), dest, tag, lc_p2p_ep[phase], free_req, msg) != LC_OK) { sched_yield(); } } } inline pendingReq* convertReq(lc_req* req_ptr, int phase) { // Need to drain LCI queue to allow more injection. // Convert internal LCI request to a Galois pending request. vTy buf = std::move(*((vTy*)(req_ptr->ctx))); int rank = req_ptr->rank; int meta = req_ptr->meta; delete (vTy*)req_ptr->ctx; lc_cq_reqfree(lc_p2p_ep[phase], req_ptr); return new pendingReq(rank, meta, phase, buf, inflightRecvs); } virtual std::optional> recieveTagged(uint32_t tag, std::unique_lock* /*rlg*/, int phase) { if (tag == 0) phase = 2; // static int count = 0; pendingReq* req; int bin = ((tag % 3) * 3) + phase; if (!bufferedRecv[bin].pop(req)) { // if (count ++ == 10000) { // printf("[%d] WARNING possible lock out on RECV %d\n", ID, tag); // } return std::optional>(); } if (req->tag == tag) { vTy buf = std::move(req->buf); int dest = req->dest; delete req; return std::optional>( std::make_pair(dest, std::move(buf))); } else { printf("[%d] WARNING possible lock out, wrong tag %d/%d.\n", ID, req->tag, tag); return std::optional>(); } } virtual void flush() {} virtual bool anyPendingSends() { // static int count = 0; // if (count++ == 10000) // printf("[%d] WARNING possible lock out terminate %d %d\n", ID, // inflightSends.load(), inflightRecvs.load()); return (inflightSends > 0); } virtual bool anyPendingReceives() { if (anyReceivedMessages) { // might not be acted on by the computation yet anyReceivedMessages = false; // galois::gDebug("[", ID, "] receive out of buffer \n"); return true; } // if (inflightRecvs > 0) { // galois::gDebug("[", ID, "] inflight receive: ", inflightRecvs, " \n"); // } return (inflightRecvs > 0); } virtual unsigned long reportSendBytes() const { return statSendBytes; } virtual unsigned long reportSendMsgs() const { return statSendNum; } virtual unsigned long reportRecvBytes() const { return statRecvBytes; } virtual unsigned long reportRecvMsgs() const { return statRecvNum; } virtual std::vector reportExtra() const { std::vector retval(5); return retval; } virtual std::vector> reportExtraNamed() const { std::vector> retval(5); retval[0].first = "SendTimeout"; retval[1].first = "SendOverflow"; retval[2].first = "SendUrgent"; retval[3].first = "SendEnqueued"; retval[4].first = "RecvDequeued"; retval[3].second = statSendEnqueued; retval[4].second = statRecvDequeued; return retval; } }; } // namespace /** * Create a buffered network interface, or return one if already * created. */ NetworkInterface& galois::runtime::makeNetworkLCI() { static std::atomic net; static substrate::SimpleLock m_mutex; // create the interface if it doesn't yet exist in the static variable auto* tmp = net.load(); if (tmp == nullptr) { std::lock_guard lock(m_mutex); tmp = net.load(); if (tmp == nullptr) { tmp = new NetworkInterfaceLCI(); net.store(tmp); } } return *tmp; } #endif ================================================ FILE: libgalois/CMakeLists.txt ================================================ add_library(galois_shmem) add_library(Galois::shmem ALIAS galois_shmem) set_target_properties(galois_shmem PROPERTIES EXPORT_NAME shmem) add_dependencies(lib galois_shmem) configure_file(src/Version.cpp.in Version.cpp @ONLY) configure_file(include/galois/config.h.in include/galois/config.h) set(sources "${CMAKE_CURRENT_BINARY_DIR}/Version.cpp" src/Barrier_Counting.cpp src/Barrier.cpp src/Barrier_Dissemination.cpp src/Barrier_MCS.cpp src/Barrier_Pthread.cpp src/Barrier_Simple.cpp src/Barrier_Topo.cpp src/Context.cpp src/Deterministic.cpp src/DynamicBitset.cpp src/EnvCheck.cpp src/FileGraph.cpp src/FileGraphParallel.cpp src/gIO.cpp src/GraphHelpers.cpp src/HWTopo.cpp src/Mem.cpp src/NumaMem.cpp src/OCFileGraph.cpp src/PageAlloc.cpp src/PagePool.cpp src/PagePool.cpp src/ParaMeter.cpp src/PerThreadStorage.cpp src/PreAlloc.cpp src/Profile.cpp src/PtrLock.cpp src/SharedMem.cpp src/SharedMemSys.cpp src/SimpleLock.cpp src/Statistics.cpp src/Substrate.cpp src/Support.cpp src/Termination.cpp src/ThreadPool.cpp src/Threads.cpp src/ThreadTimer.cpp src/Timer.cpp src/Tracer.cpp ) if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin") list(APPEND sources src/HWTopoDarwin.cpp) else() include(CheckSchedSetAffinity) if (NOT SCHED_SETAFFINITY_FOUND) if (GALOIS_STRICT_CONFIG) message(FATAL_ERROR "Need sched_setaffinity") endif() endif() list(APPEND sources src/HWTopoLinux.cpp) endif() target_sources(galois_shmem PRIVATE ${sources}) target_include_directories(galois_shmem PUBLIC $ $ $ ) if (TARGET Boost::Boost) # Autogenerated conan module doesn't provide header-only target. Extract one # manually. get_target_property(include_dirs Boost::Boost INTERFACE_INCLUDE_DIRECTORIES) target_include_directories(galois_shmem PUBLIC ${include_dirs}) else() # Standard CMake Boost module target_link_libraries(galois_shmem PUBLIC Boost::boost) endif() if (SCHED_SETAFFINITY_FOUND) target_compile_definitions(galois_shmem PRIVATE GALOIS_USE_SCHED_SETAFFINITY) target_link_libraries(galois_shmem PRIVATE ${SCHED_SETAFFINITY_LIBRARIES}) endif() target_link_libraries(galois_shmem INTERFACE pygalois) target_link_libraries(galois_shmem PRIVATE Threads::Threads) if (CMAKE_HAVE_PTHREAD_H) target_compile_definitions(galois_shmem PRIVATE GALOIS_HAVE_PTHREAD) endif() find_package(NUMA) if (NUMA_FOUND) target_compile_definitions(galois_shmem PRIVATE GALOIS_USE_NUMA) target_link_libraries(galois_shmem PRIVATE ${NUMA_LIBRARY}) else() message(WARNING "No NUMA Support. Likely poor performance for multi-socket systems.") endif() if (VTune_FOUND) target_link_libraries(galois_shmem PRIVATE ${VTune_LIBRARIES}) endif() add_subdirectory(test) install( DIRECTORY include/ DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" COMPONENT dev FILES_MATCHING PATTERN "*.h" ) install( DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/ DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" COMPONENT dev FILES_MATCHING PATTERN "*.h" ) install( TARGETS galois_shmem EXPORT GaloisTargets LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT shlib ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT lib INCLUDES DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" ) ================================================ FILE: libgalois/include/galois/ArrayWrapper.h ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ /** * @file ArrayWrapper.h * * Defines the CopyableArray subclass used to make arrays trivially copyable if * possible. */ #ifndef _ARRAY_WRAPPER_H_ #define _ARRAY_WRAPPER_H_ #include #include "galois/config.h" #include "galois/runtime/ExtraTraits.h" namespace galois { /** * A subclass of std::array that is marked trivially copyable if the type is * also memory copyable. Useful when you need a trivially copyable type for * serialization. * * @tparam T type of the items to be stored in the array * @tparam N total number of items in the array */ template class CopyableArray : public std::array { public: //! Only typedef tt_is_copyable if T is trivially copyable. //! Allows the use of memcopy in serialize/deserialize. using tt_is_copyable = typename std::enable_if::value, int>::type; }; } // namespace galois #endif ================================================ FILE: libgalois/include/galois/AtomicHelpers.h ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ #pragma once #include #include #include #include "galois/config.h" namespace galois { /** galois::atomicMax + non-atomic max calls **/ template const Ty atomicMax(std::atomic& a, const Ty b) { Ty old_a = a.load(std::memory_order_relaxed); // if old value is less than new value, atomically exchange while (old_a < b && !a.compare_exchange_weak(old_a, b, std::memory_order_relaxed)) ; return old_a; } template const Ty max(std::atomic& a, const Ty& b) { Ty old_a = a.load(std::memory_order_relaxed); if (a < b) { a.store(b, std::memory_order_relaxed); } return old_a; } template const Ty max(Ty& a, const Ty& b) { Ty old_a = a; if (a < b) { a = b; } return old_a; } /** galois::atomicMin **/ template const Ty atomicMin(std::atomic& a, const Ty b) { Ty old_a = a.load(std::memory_order_relaxed); while (old_a > b && !a.compare_exchange_weak(old_a, b, std::memory_order_relaxed)) ; return old_a; } template const Ty min(std::atomic& a, const Ty& b) { Ty old_a = a.load(std::memory_order_relaxed); if (a > b) { a.store(b, std::memory_order_relaxed); } return old_a; } template const Ty min(Ty& a, const Ty& b) { Ty old_a = a; if (a > b) { a = b; } return old_a; } /** galois::atomicAdd **/ template const Ty atomicAdd(std::atomic& val, Ty delta) { Ty old_val = val.load(std::memory_order_relaxed); while (!val.compare_exchange_weak(old_val, old_val + delta, std::memory_order_relaxed)) ; return old_val; } template const Ty add(std::atomic& a, const Ty& b) { Ty old_a = a.load(std::memory_order_relaxed); a.store(a + b, std::memory_order_relaxed); return old_a; } template const Ty add(Ty& a, std::atomic& b) { Ty old_a = a; a = a + b.load(std::memory_order_relaxed); return old_a; } template const Ty add(Ty& a, const Ty& b) { Ty old_a = a; a += b; return old_a; } /** * atomic subtraction of delta (because atomicAdd with negative numbers implies * a signed integer cast) */ template const Ty atomicSubtract(std::atomic& val, Ty delta) { Ty old_val = val.load(std::memory_order_relaxed); while (!val.compare_exchange_weak(old_val, old_val - delta, std::memory_order_relaxed)) ; return old_val; } template const Ty set(Ty& a, const Ty& b) { a = b; return a; } template const Ty set(std::atomic& a, const Ty& b) { a.store(b, std::memory_order_relaxed); return a; } /** Pair Wise Average function **/ template const Ty pairWiseAvg(Ty a, Ty b) { return (a + b) / 2.0; } template void pairWiseAvg_vec(std::vector& a_vec, std::vector& b_vec) { for (unsigned i = 0; i < a_vec.size(); ++i) { a_vec[i] = (a_vec[i] + b_vec[i]) / 2.0; } } template void resetVec(Ty& a_arr) { // std::for_each(a_arr.begin(), a_arr.end(),[](Ty &ele){ele = 0;} ); std::fill(a_arr.begin(), a_arr.end(), 0); } template void pairWiseAvg_vec(Ty& a_arr, Ty& b_arr) { for (unsigned i = 0; i < a_arr.size(); ++i) { a_arr[i] = (a_arr[i] + b_arr[i]) / 2.0; } } template void addArray(Ty& a_arr, Ty& b_arr) { for (unsigned i = 0; i < a_arr.size(); ++i) { a_arr[i] = (a_arr[i] + b_arr[i]); } } template void resetVec(std::vector& a_vec) { std::for_each(a_vec.begin(), a_vec.end(), [](Ty& ele) { ele = 0; }); } // like std::inner_product template Ty innerProduct(ItrTy a_begin, ItrTy a_end, ItrTy b_begin, Ty init_value) { auto jj = b_begin; for (auto ii = a_begin; ii != a_end; ++ii, ++jj) { init_value += (*ii) * (*jj); } return init_value; } // like std::inner_product template Ty innerProduct(ItrTy& a_arr, ItrTy& b_arr, Ty init_value) { auto jj = b_arr.begin(); for (auto ii = a_arr.begin(); ii != a_arr.end(); ++ii, ++jj) { init_value += (*ii) * (*jj); } return init_value; } template void reset(Ty& var, Ty val) { var = val; } template void reset(std::atomic& var, Ty val) { var.store(val, std::memory_order_relaxed); } } // end namespace galois ================================================ FILE: libgalois/include/galois/AtomicWrapper.h ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ /** * @file AtomicWrapper.h * * Contains a copyable atomics class. */ #ifndef _ATOMIC_WRAPPER_H_ #define _ATOMIC_WRAPPER_H_ #include #include "galois/config.h" namespace galois { /** * Class that inherits from std::atomic to make it copyable by defining a copy * constructor. * * @tparam T type of the atomic */ template class CopyableAtomic : public std::atomic { public: //! Default constructor CopyableAtomic() : std::atomic(T{}) {} //! Constructor initializing atomic to passed in data constexpr CopyableAtomic(T desired) : std::atomic(desired) {} //! Copy constructor constexpr CopyableAtomic(const CopyableAtomic& other) : CopyableAtomic(other.load(std::memory_order_relaxed)) {} //! Copy constructor operator CopyableAtomic& operator=(const CopyableAtomic& other) { this->store(other.load(std::memory_order_relaxed), std::memory_order_relaxed); return *this; } }; } // namespace galois #endif ================================================ FILE: libgalois/include/galois/Bag.h ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ #ifndef GALOIS_BAG_H #define GALOIS_BAG_H #include #include #include #include "galois/config.h" #include "galois/gstl.h" #include "galois/runtime/Executor_OnEach.h" #include "galois/substrate/PerThreadStorage.h" #include "galois/gIO.h" #include "galois/runtime/Mem.h" namespace galois { /** * Unordered collection of elements. This data structure supports scalable * concurrent pushes but reading the bag can only be done serially. */ template class InsertBag { struct header { header* next; T* dbegin; // start of interesting data T* dend; // end of valid data T* dlast; // end of storage }; typedef std::pair PerThread; public: template class Iterator : public boost::iterator_facade, U, boost::forward_traversal_tag> { friend class boost::iterator_core_access; galois::substrate::PerThreadStorage>* hd; unsigned int thr; header* p; U* v; bool init_thread() { p = thr < hd->size() ? hd->getRemote(thr)->first : 0; v = p ? p->dbegin : 0; return p; } bool advance_local() { if (p) { ++v; return v != p->dend; } return false; } bool advance_chunk() { if (p) { p = p->next; v = p ? p->dbegin : 0; } return p; } void advance_thread() { while (thr < hd->size()) { ++thr; if (init_thread()) return; } } void increment() { if (advance_local()) return; if (advance_chunk()) return; advance_thread(); } template bool equal(const Iterator& o) const { return hd == o.hd && thr == o.thr && p == o.p && v == o.v; } U& dereference() const { return *v; } public: Iterator() : hd(0), thr(0), p(0), v(0) {} template Iterator(const Iterator& o) : hd(o.hd), thr(o.thr), p(o.p), v(o.v) {} Iterator( galois::substrate::PerThreadStorage>* h, unsigned t) : hd(h), thr(t), p(0), v(0) { // find first valid item if (!init_thread()) advance_thread(); } }; private: galois::runtime::FixedSizeHeap heap; galois::substrate::PerThreadStorage heads; void insHeader(header* h) { PerThread& hpair = *heads.getLocal(); if (hpair.second) { hpair.second->next = h; hpair.second = h; } else { hpair.first = hpair.second = h; } } header* newHeaderFromHeap(void* m, unsigned size) { header* H = new (m) header(); int offset = 1; if (sizeof(T) < sizeof(header)) offset += sizeof(header) / sizeof(T); T* a = reinterpret_cast(m); H->dbegin = &a[offset]; H->dend = H->dbegin; H->dlast = &a[(size / sizeof(T))]; H->next = 0; return H; } header* newHeader() { if (BlockSize) { return newHeaderFromHeap(heap.allocate(BlockSize), BlockSize); } else { return newHeaderFromHeap(galois::runtime::pagePoolAlloc(), galois::runtime::pagePoolSize()); } } void destruct_serial() { for (unsigned x = 0; x < heads.size(); ++x) { PerThread& hpair = *heads.getRemote(x); header*& h = hpair.first; while (h) { uninitialized_destroy(h->dbegin, h->dend); header* h2 = h; h = h->next; if (BlockSize) heap.deallocate(h2); else galois::runtime::pagePoolFree(h2); } hpair.second = 0; } } void destruct_parallel(void) { galois::runtime::on_each_gen( [this](const unsigned int tid, const unsigned int) { PerThread& hpair = *heads.getLocal(tid); header*& h = hpair.first; while (h) { uninitialized_destroy(h->dbegin, h->dend); header* h2 = h; h = h->next; if (BlockSize) heap.deallocate(h2); else galois::runtime::pagePoolFree(h2); } hpair.second = 0; }, std::make_tuple(galois::no_stats())); } public: // static_assert(BlockSize == 0 || BlockSize >= (2 * sizeof(T) + // sizeof(header)), // "BlockSize should larger than sizeof(T) + O(1)"); InsertBag() : heap(BlockSize) {} InsertBag(InsertBag&& o) : heap(BlockSize) { std::swap(heap, o.heap); std::swap(heads, o.heads); } InsertBag& operator=(InsertBag&& o) { std::swap(heap, o.heap); std::swap(heads, o.heads); return *this; } InsertBag(const InsertBag&) = delete; InsertBag& operator=(const InsertBag&) = delete; ~InsertBag() { destruct_parallel(); } void clear() { destruct_parallel(); } void clear_serial() { destruct_serial(); } void swap(InsertBag& o) { std::swap(heap, o.heap); std::swap(heads, o.heads); } typedef T value_type; typedef T* pointer; typedef const T* const_pointer; typedef const T& const_reference; typedef T& reference; typedef Iterator iterator; typedef Iterator const_iterator; typedef iterator local_iterator; iterator begin() { return iterator(&heads, 0); } iterator end() { return iterator(&heads, heads.size()); } const_iterator begin() const { return const_iterator(&heads, 0); } const_iterator end() const { return const_iterator(&heads, heads.size()); } local_iterator local_begin() { return local_iterator(&heads, galois::substrate::ThreadPool::getTID()); } local_iterator local_end() { return local_iterator(&heads, galois::substrate::ThreadPool::getTID() + 1); } bool empty() const { for (unsigned x = 0; x < heads.size(); ++x) { header* h = heads.getRemote(x)->first; if (h) return false; } return true; } //! Thread safe bag insertion template reference emplace(Args&&... args) { header* H = heads.getLocal()->second; T* rv; if (!H || H->dend == H->dlast) { H = newHeader(); insHeader(H); } rv = new (H->dend) T(std::forward(args)...); ++H->dend; return *rv; } template reference emplace_back(Args&&... args) { return emplace(std::forward(args)...); } /** * Pop the last element pushed by this thread. The number of consecutive * pops supported without intevening pushes is implementation dependent. */ void pop() { header* H = heads.getLocal()->second; if (H->dbegin == H->dend) { throw std::out_of_range("InsertBag::pop"); } uninitialized_destroy(H->dend - 1, H->dend); --H->dend; } //! Thread safe bag insertion template reference push(ItemTy&& val) { return emplace(std::forward(val)); } //! Thread safe bag insertion template reference push_back(ItemTy&& val) { return emplace(std::forward(val)); } }; } // namespace galois #endif ================================================ FILE: libgalois/include/galois/CheckedObject.h ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ #ifndef GALOIS_CHECKEDOBJECT_H #define GALOIS_CHECKEDOBJECT_H #include "galois/config.h" #include "galois/runtime/Context.h" namespace galois { /** * Conflict-checking wrapper for any type. Performs global conflict detection * on the enclosed object. This enables arbitrary types to be managed by the * Galois runtime. */ template class GChecked : public galois::runtime::Lockable { T val; public: template GChecked(Args&&... args) : val(std::forward(args)...) {} T& get(galois::MethodFlag m = MethodFlag::WRITE) { galois::runtime::acquire(this, m); return val; } const T& get(galois::MethodFlag m = MethodFlag::WRITE) const { galois::runtime::acquire(const_cast(this), m); return val; } }; template <> class GChecked : public galois::runtime::Lockable { public: void get(galois::MethodFlag m = MethodFlag::WRITE) const { galois::runtime::acquire(const_cast(this), m); } }; } // namespace galois #endif // _GALOIS_CHECKEDOBJECT_H ================================================ FILE: libgalois/include/galois/CopyableTuple.h ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ /** * @file CopyableTuple.h * * Contains copyable tuple classes whose elements are contiguous in memory */ #pragma once #include "galois/config.h" namespace galois { /** * Struct that contains 2 elements. Used over std::pair as std::pair memory * layout isn't guaranteed. * * @tparam T1 type of first element * @tparam T2 type of second element */ template struct Pair { //! first element T1 first; //! second element T2 second; //! empty constructor Pair() {} //! Constructor that initializes 2 fields Pair(T1 one, T2 two) { first = one; second = two; } }; /** * Struct that contains 3 elements. Used over std::tuple as std::tuple memory * layout isn't guaranteed. * * @tparam T1 type of first element * @tparam T2 type of second element * @tparam T3 type of third element */ template struct TupleOfThree { //! first element T1 first; //! second element T2 second; //! third element T3 third; //! empty constructor TupleOfThree() {} //! Constructor that initializes 3 fields TupleOfThree(T1 one, T2 two, T3 three) { first = one; second = two; third = three; } }; } // namespace galois ================================================ FILE: libgalois/include/galois/DynamicBitset.h ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2019, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ /** * @file galois/DynamicBitset.h * * Contains the DynamicBitSet class and most of its implementation. */ #ifndef _GALOIS_DYNAMIC_BIT_SET_ #define _GALOIS_DYNAMIC_BIT_SET_ #include #include #include #include #include #include "galois/config.h" #include "galois/AtomicWrapper.h" #include "galois/PODResizeableArray.h" #include "galois/GaloisForwardDecl.h" #include "galois/Traits.h" #include "galois/Galois.h" namespace galois { /** * Concurrent dynamically allocated bitset **/ class DynamicBitSet { protected: galois::PODResizeableArray> bitvec; size_t num_bits; static constexpr uint32_t bits_uint64 = sizeof(uint64_t) * CHAR_BIT; public: //! Constructor which initializes to an empty bitset. DynamicBitSet() : num_bits(0) {} /** * Returns the underlying bitset representation to the user * * @returns constant reference vector of copyable atomics that represents * the bitset */ const auto& get_vec() const { return bitvec; } /** * Returns the underlying bitset representation to the user * * @returns reference to vector of copyable atomics that represents the * bitset */ auto& get_vec() { return bitvec; } /** * Resizes the bitset. * * @param n Size to change the bitset to */ void resize(uint64_t n) { assert(bits_uint64 == 64); // compatibility with other devices num_bits = n; bitvec.resize((n + bits_uint64 - 1) / bits_uint64); reset(); } /** * Reserves capacity for the bitset. * * @param n Size to reserve the capacity of the bitset to */ void reserve(uint64_t n) { assert(bits_uint64 == 64); // compatibility with other devices bitvec.reserve((n + bits_uint64 - 1) / bits_uint64); } /** * Gets the size of the bitset * @returns The number of bits held by the bitset */ size_t size() const { return num_bits; } /** * Gets the space taken by the bitset * @returns the space in bytes taken by this bitset */ // size_t alloc_size() const { return bitvec.size() * sizeof(uint64_t); } /** * Unset every bit in the bitset. */ void reset() { std::fill(bitvec.begin(), bitvec.end(), 0); } /** * Unset a range of bits given an inclusive range * * @param begin first bit in range to reset * @param end last bit in range to reset */ void reset(size_t begin, size_t end) { if (num_bits == 0) return; assert(begin <= (num_bits - 1)); assert(end <= (num_bits - 1)); // 100% safe implementation, but slow // for (unsigned long i = begin; i <= end; i++) { // size_t bit_index = i / bits_uint64; // uint64_t bit_offset = 1; // bit_offset <<= (i % bits_uint64); // uint64_t mask = ~bit_offset; // bitvec[bit_index] &= mask; //} // block which you are safe to clear size_t vec_begin = (begin + bits_uint64 - 1) / bits_uint64; size_t vec_end; if (end == (num_bits - 1)) vec_end = bitvec.size(); else vec_end = (end + 1) / bits_uint64; // floor if (vec_begin < vec_end) { std::fill(bitvec.begin() + vec_begin, bitvec.begin() + vec_end, 0); } vec_begin *= bits_uint64; vec_end *= bits_uint64; // at this point vec_begin -> vec_end-1 has been reset if (vec_begin > vec_end) { // no fill happened if (begin < vec_begin) { size_t diff = vec_begin - begin; assert(diff < 64); uint64_t mask = ((uint64_t)1 << (64 - diff)) - 1; size_t end_diff = end - vec_end + 1; uint64_t or_mask = ((uint64_t)1 << end_diff) - 1; mask |= ~or_mask; size_t bit_index = begin / bits_uint64; bitvec[bit_index] &= mask; } } else { if (begin < vec_begin) { size_t diff = vec_begin - begin; assert(diff < 64); uint64_t mask = ((uint64_t)1 << (64 - diff)) - 1; size_t bit_index = begin / bits_uint64; bitvec[bit_index] &= mask; } if (end >= vec_end) { size_t diff = end - vec_end + 1; assert(diff < 64); uint64_t mask = ((uint64_t)1 << diff) - 1; size_t bit_index = end / bits_uint64; bitvec[bit_index] &= ~mask; } } } /** * Check a bit to see if it is currently set. * Using this is recommeneded only if set() and reset() * are not being used in that parallel section/phase * * @param index Bit to check to see if set * @returns true if index is set */ bool test(size_t index) const { size_t bit_index = index / bits_uint64; uint64_t bit_offset = 1; bit_offset <<= (index % bits_uint64); return ((bitvec[bit_index].load(std::memory_order_relaxed) & bit_offset) != 0); } /** * Set a bit in the bitset. * * @param index Bit to set * @returns the old value */ bool set(size_t index) { size_t bit_index = index / bits_uint64; uint64_t bit_offset = 1; bit_offset <<= (index % bits_uint64); uint64_t old_val = bitvec[bit_index]; // test and set // if old_bit is 0, then atomically set it while (((old_val & bit_offset) == 0) && !bitvec[bit_index].compare_exchange_weak( old_val, old_val | bit_offset, std::memory_order_relaxed)) ; return (old_val & bit_offset); } /** * Reset a bit in the bitset. * * @param index Bit to reset * @returns the old value */ bool reset(size_t index) { size_t bit_index = index / bits_uint64; uint64_t bit_offset = 1; bit_offset <<= (index % bits_uint64); uint64_t old_val = bitvec[bit_index]; // test and reset // if old_bit is 1, then atomically reset it while (((old_val & bit_offset) != 0) && !bitvec[bit_index].compare_exchange_weak( old_val, old_val & ~bit_offset, std::memory_order_relaxed)) ; return (old_val & bit_offset); } // assumes bit_vector is not updated (set) in parallel void bitwise_or(const DynamicBitSet& other) { assert(size() == other.size()); auto& other_bitvec = other.get_vec(); galois::do_all( galois::iterate(size_t{0}, bitvec.size()), [&](size_t i) { bitvec[i] |= other_bitvec[i]; }, galois::no_stats()); } // assumes bit_vector is not updated (set) in parallel /** * Does an IN-PLACE bitwise and of this bitset and another bitset * * @param other Other bitset to do bitwise and with */ void bitwise_and(const DynamicBitSet& other) { assert(size() == other.size()); auto& other_bitvec = other.get_vec(); galois::do_all( galois::iterate(size_t{0}, bitvec.size()), [&](size_t i) { bitvec[i] &= other_bitvec[i]; }, galois::no_stats()); } /** * Does an IN-PLACE bitwise and of 2 passed in bitsets and saves to this * bitset * * @param other1 Bitset to and with other 2 * @param other2 Bitset to and with other 1 */ void bitwise_and(const DynamicBitSet& other1, const DynamicBitSet& other2) { assert(size() == other1.size()); assert(size() == other2.size()); auto& other_bitvec1 = other1.get_vec(); auto& other_bitvec2 = other2.get_vec(); galois::do_all( galois::iterate(size_t{0}, bitvec.size()), [&](size_t i) { bitvec[i] = other_bitvec1[i] & other_bitvec2[i]; }, galois::no_stats()); } /** * Does an IN-PLACE bitwise xor of this bitset and another bitset * * @param other Other bitset to do bitwise xor with */ void bitwise_xor(const DynamicBitSet& other) { assert(size() == other.size()); auto& other_bitvec = other.get_vec(); galois::do_all( galois::iterate(size_t{0}, bitvec.size()), [&](size_t i) { bitvec[i] ^= other_bitvec[i]; }, galois::no_stats()); } /** * Does an IN-PLACE bitwise and of 2 passed in bitsets and saves to this * bitset * * @param other1 Bitset to xor with other 2 * @param other2 Bitset to xor with other 1 */ void bitwise_xor(const DynamicBitSet& other1, const DynamicBitSet& other2) { assert(size() == other1.size()); assert(size() == other2.size()); auto& other_bitvec1 = other1.get_vec(); auto& other_bitvec2 = other2.get_vec(); galois::do_all( galois::iterate(size_t{0}, bitvec.size()), [&](size_t i) { bitvec[i] = other_bitvec1[i] ^ other_bitvec2[i]; }, galois::no_stats()); } /** * Count how many bits are set in the bitset * * @returns number of set bits in the bitset */ uint64_t count() const { galois::GAccumulator ret; galois::do_all( galois::iterate(bitvec.begin(), bitvec.end()), [&](uint64_t n) { #ifdef __GNUC__ ret += __builtin_popcountll(n); #else n = n - ((n >> 1) & 0x5555555555555555UL); n = (n & 0x3333333333333333UL) + ((n >> 2) & 0x3333333333333333UL); ret += (((n + (n >> 4)) & 0xF0F0F0F0F0F0F0FUL) * 0x101010101010101UL) >> 56; #endif }, galois::no_stats()); return ret.reduce(); } /** * Returns a vector containing the set bits in this bitset in order * from left to right. * Do NOT call in a parallel region as it uses galois::on_each. * * @returns vector with offsets into set bits */ // TODO uint32_t is somewhat dangerous; change in the future std::vector getOffsets() const { uint32_t activeThreads = galois::getActiveThreads(); std::vector tPrefixBitCounts(activeThreads); // count how many bits are set on each thread galois::on_each([&](unsigned tid, unsigned nthreads) { size_t start; size_t end; std::tie(start, end) = galois::block_range((size_t)0, this->size(), tid, nthreads); unsigned int count = 0; for (unsigned int i = start; i < end; ++i) { if (this->test(i)) ++count; } tPrefixBitCounts[tid] = count; }); // calculate prefix sum of bits per thread for (unsigned int i = 1; i < activeThreads; ++i) { tPrefixBitCounts[i] += tPrefixBitCounts[i - 1]; } // total num of set bits uint64_t bitsetCount = tPrefixBitCounts[activeThreads - 1]; std::vector offsets; // calculate the indices of the set bits and save them to the offset // vector if (bitsetCount > 0) { offsets.resize(bitsetCount); galois::on_each([&](unsigned tid, unsigned nthreads) { size_t start; size_t end; std::tie(start, end) = galois::block_range((size_t)0, this->size(), tid, nthreads); unsigned int count = 0; unsigned int tPrefixBitCount; if (tid == 0) { tPrefixBitCount = 0; } else { tPrefixBitCount = tPrefixBitCounts[tid - 1]; } for (unsigned int i = start; i < end; ++i) { if (this->test(i)) { offsets[tPrefixBitCount + count] = i; ++count; } } }); } return offsets; } //! this is defined to using tt_is_copyable = int; }; //! An empty bitset object; used mainly by InvalidBitsetFnTy static galois::DynamicBitSet EmptyBitset; //! A structure representing an empty bitset. struct InvalidBitsetFnTy { //! Returns false as this is an empty bitset static constexpr bool is_vector_bitset() { return false; } //! Returns false as this is an empty bitset (invalid) static constexpr bool is_valid() { return false; } //! Returns the empty bitset static galois::DynamicBitSet& get() { return EmptyBitset; } //! No-op since it's an empty bitset static void reset_range(size_t, size_t) {} }; } // namespace galois #endif ================================================ FILE: libgalois/include/galois/Endian.h ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ #ifndef GALOIS_ENDIAN_H #define GALOIS_ENDIAN_H #include #include "galois/config.h" namespace galois { static inline uint32_t bswap32(uint32_t x) { #if defined(__GNUC__) || defined(__clang__) return __builtin_bswap32(x); #else return ((x << 24) & 0xff000000) | ((x << 8) & 0x00ff0000) | ((x >> 8) & 0x0000ff00) | ((x >> 24) & 0x000000ff); #endif } static inline uint64_t bswap64(uint64_t x) { #if defined(__GNUC__) || defined(__clang__) return __builtin_bswap64(x); #else return ((x << 56) & 0xff00000000000000UL) | ((x << 40) & 0x00ff000000000000UL) | ((x << 24) & 0x0000ff0000000000UL) | ((x << 8) & 0x000000ff00000000UL) | ((x >> 8) & 0x00000000ff000000UL) | ((x >> 24) & 0x0000000000ff0000UL) | ((x >> 40) & 0x000000000000ff00UL) | ((x >> 56) & 0x00000000000000ffUL); #endif } // NB: Wrap these standard functions with different names because // sometimes le64toh and such are implemented as macros and we don't // want any nasty surprises. static inline uint64_t convert_le64toh(uint64_t x) { #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ return x; #else return bswap64(x); #endif } static inline uint32_t convert_le32toh(uint32_t x) { #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ return x; #else return bswap32(x); #endif } static inline uint64_t convert_htobe64(uint64_t x) { #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ return x; #else return bswap64(x); #endif } static inline uint32_t convert_htobe32(uint32_t x) { #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ return x; #else return bswap32(x); #endif } static inline uint64_t convert_htole64(uint64_t x) { #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ return x; #else return bswap64(x); #endif } static inline uint32_t convert_htole32(uint32_t x) { #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ return x; #else return bswap32(x); #endif } } // namespace galois #endif ================================================ FILE: libgalois/include/galois/FixedSizeRing.h ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ #ifndef GALOIS_FIXEDSIZERING_H #define GALOIS_FIXEDSIZERING_H #include #include #include #include #include #include "galois/config.h" #include "galois/optional.h" #include "galois/LazyArray.h" namespace galois { //! Unordered collection of bounded size template class FixedSizeBagBase { LazyArray datac; typedef typename boost::mpl::if_c, unsigned>::type Count; Count count; T* at(unsigned i) { return &datac[i]; } const T* at(unsigned i) const { return &datac[i]; } bool precondition() const { return count <= ChunkSize; } public: typedef T value_type; typedef T* pointer; typedef const T* const_pointer; typedef T& reference; typedef const T& const_reference; typedef boost::reverse_iterator iterator; typedef boost::reverse_iterator const_iterator; typedef pointer reverse_iterator; typedef const_pointer const_reverse_iterator; FixedSizeBagBase() : count(0) {} template FixedSizeBagBase(InputIterator first, InputIterator last) : count(0) { while (first != last) { assert(count < ChunkSize); datac.emplace(count++, *first++); } } FixedSizeBagBase(const FixedSizeBagBase& o) = delete; FixedSizeBagBase& operator=(const FixedSizeBagBase& o) = delete; ~FixedSizeBagBase() { clear(); } unsigned size() const { assert(precondition()); return count; } bool empty() const { assert(precondition()); return count == 0; } bool full() const { assert(precondition()); return count == ChunkSize; } void clear() { assert(precondition()); for (unsigned x = 0; x < count; ++x) datac.destroy(x); count = 0; } template pointer push_back(U&& val) { return push_front(std::forward(val)); } template pointer emplace_back(Args&&... args) { return emplace_front(std::forward(args)...); } template auto push_front(U&& val) -> typename std::enable_if::type { return emplace_front(std::forward(val)); } template auto push_front(const value_type& val) -> typename std::enable_if::type { unsigned top; do { top = count.load(std::memory_order_relaxed); if (top >= ChunkSize) return nullptr; } while (!count.compare_exchange_weak(top, top + 1)); return datac.emplace(top, val); } /** * emplace_front is not available for concurrent versions because it is not * possible for clients to know in advance whether insertion will succeed, * which will leave xvalue arguments in indeterminate state. */ template auto emplace_front(Args&&... args) -> typename std::enable_if::type { if (full()) return 0; unsigned top = count++; return datac.emplace(top, std::forward(args)...); } reference back() { return front(); } const_reference back() const { return front(); } galois::optional extract_back() { return extract_front(); } bool pop_back() { return pop_front(); } reference front() { assert(precondition()); assert(!empty()); return *at(count - 1); } const_reference front() const { return *at(count - 1); } template auto extract_front() -> typename std::enable_if>::type { if (!empty()) { galois::optional retval(back()); pop_back(); return retval; } return galois::optional(); } //! returns true if something was popped template auto pop_front() -> typename std::enable_if::type { unsigned top; do { top = count.load(std::memory_order_relaxed); if (top == 0) return false; } while (!count.compare_exchange_weak(top, top - 1)); datac.destroy(top); return true; } //! returns true if something was popped template auto pop_front() -> typename std::enable_if::type { if (count == 0) return false; datac.destroy(--count); return true; } reverse_iterator rbegin() { return &datac[0]; } reverse_iterator rend() { return &datac[count]; } const_reverse_iterator rbegin() const { return &datac[0]; } const_reverse_iterator rend() const { return &datac[count]; } iterator begin() { return iterator(rend()); } iterator end() { return iterator(rbegin()); } const_iterator begin() const { return const_iterator(rend()); } const_iterator end() const { return const_iterator(rbegin()); } }; //! Unordered collection of bounded size template using FixedSizeBag = FixedSizeBagBase; //! Unordered collection of bounded size with concurrent insertion or deletion //! but not both simultaneously template using ConcurrentFixedSizeBag = FixedSizeBagBase; //! Ordered collection of bounded size template class FixedSizeRing { LazyArray datac; unsigned start; unsigned count; T* at(unsigned i) { return &datac[i]; } const T* at(unsigned i) const { return &datac[i]; } bool precondition() const { return count <= ChunkSize && start <= ChunkSize; } template class Iterator : public boost::iterator_facade, U, boost::random_access_traversal_tag> { friend class boost::iterator_core_access; U* base; unsigned cur; unsigned count; template bool equal(const Iterator& o) const { assert(base && o.base); return &base[cur] == &o.base[o.cur] && count == o.count; } U& dereference() const { return base[cur]; } void increment() { assert(base && count != 0); count -= 1; cur = (cur + 1) % ChunkSize; } void decrement() { assert(base && count < ChunkSize); count += 1; cur = (cur + ChunkSize - 1) % ChunkSize; } void advance(ptrdiff_t x) { count -= x; cur = (cur + ChunkSize + x) % ChunkSize; } ptrdiff_t distance_to(const Iterator& o) const { ptrdiff_t c = count; ptrdiff_t oc = o.count; return c - oc; } public: Iterator() : base(0), cur(0), count(0) {} template Iterator(const Iterator& o) : base(o.base), cur(o.cur), count(o.count) {} Iterator(U* b, unsigned c, unsigned co) : base(b), cur(c), count(co) {} }; public: typedef T value_type; typedef T* pointer; typedef T& reference; typedef const T& const_reference; typedef Iterator iterator; typedef Iterator const_iterator; typedef boost::reverse_iterator> reverse_iterator; typedef boost::reverse_iterator> const_reverse_iterator; FixedSizeRing() : start(0), count(0) {} template FixedSizeRing(InputIterator first, InputIterator last) : start(0), count(0) { while (first != last) { assert(count < ChunkSize); datac.emplace(count++, *first++); } } FixedSizeRing(const FixedSizeRing& o) = delete; FixedSizeRing& operator=(const FixedSizeRing& o) = delete; ~FixedSizeRing() { clear(); } unsigned size() const { assert(precondition()); return count; } bool empty() const { assert(precondition()); return count == 0; } bool full() const { assert(precondition()); return count == ChunkSize; } reference getAt(unsigned x) { assert(precondition()); assert(!empty()); return *at((start + x) % ChunkSize); } const_reference getAt(unsigned x) const { assert(precondition()); assert(!empty()); return *at((start + x) % ChunkSize); } void clear() { assert(precondition()); for (unsigned x = 0; x < count; ++x) datac.destroy((start + x) % ChunkSize); count = 0; start = 0; } // NB(ddn): Keeping emplace_front/_back code paths separate to improve // branch prediction etc template pointer emplace(iterator pos, Args&&... args) { if (full()) return 0; unsigned i; if (pos == begin()) { i = start = (start + ChunkSize - 1) % ChunkSize; ++count; } else if (pos == end()) { i = (start + count) % ChunkSize; ++count; } else { auto d = std::distance(begin(), pos); i = (start + d) % ChunkSize; emplace_back(); std::move_backward(begin() + d, end() - 1, end()); datac.destroy(i); } return datac.emplace(i, std::forward(args)...); } template pointer push_front(U&& val) { return emplace_front(std::forward(val)); } template pointer emplace_front(Args&&... args) { if (full()) return 0; start = (start + ChunkSize - 1) % ChunkSize; ++count; return datac.emplace(start, std::forward(args)...); } template pointer push_back(U&& val) { return emplace_back(std::forward(val)); } template pointer emplace_back(Args&&... args) { if (full()) return 0; unsigned end = (start + count) % ChunkSize; ++count; return datac.emplace(end, std::forward(args)...); } reference front() { assert(precondition()); assert(!empty()); return *at(start); } const_reference front() const { assert(precondition()); assert(!empty()); return *at(start); } galois::optional extract_front() { if (!empty()) { galois::optional retval(front()); pop_front(); return retval; } return galois::optional(); } void pop_front() { assert(precondition()); assert(!empty()); datac.destroy(start); start = (start + 1) % ChunkSize; --count; } reference back() { assert(precondition()); assert(!empty()); return *at((start + count - 1) % ChunkSize); } const_reference back() const { assert(precondition()); assert(!empty()); return *at((start + count - 1) % ChunkSize); } galois::optional extract_back() { if (!empty()) { galois::optional retval(back()); pop_back(); return retval; } return galois::optional(); } void pop_back() { assert(precondition()); assert(!empty()); datac.destroy((start + count - 1) % ChunkSize); --count; } iterator begin() { return iterator(at(0), start, count); } iterator end() { return iterator(at(0), (start + count) % ChunkSize, 0); } const_iterator begin() const { return const_iterator(at(0), start, count); } const_iterator end() const { return const_iterator(at(0), (start + count) % ChunkSize, 0); } reverse_iterator rbegin() { return reverse_iterator(end()); } reverse_iterator rend() { return reverse_iterator(begin()); } const_iterator rbegin() const { const_reverse_iterator(this->end()); } const_iterator rend() const { const_reverse_iterator(this->begin()); } }; } // namespace galois #endif ================================================ FILE: libgalois/include/galois/FlatMap.h ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ #ifndef GALOIS_FLATMAP_H #define GALOIS_FLATMAP_H #include #include #include #include #include "galois/config.h" namespace galois { //! Simple map data structure, based off a single array. template , class _Alloc = std::allocator>, class _Store = std::vector, _Alloc>> class flat_map { public: typedef _Key key_type; typedef _Tp mapped_type; typedef std::pair<_Key, _Tp> value_type; typedef _Compare key_compare; typedef _Alloc allocator_type; class value_compare { friend class flat_map<_Key, _Tp, _Compare, _Alloc, _Store>; protected: _Compare comp; value_compare(_Compare __c) : comp(__c) {} public: bool operator()(const value_type& __x, const value_type& __y) const { return comp(__x.first, __y.first); } }; private: /// This turns... typedef typename _Alloc::template rebind::other _Pair_alloc_type; typedef _Store _VectTy; _VectTy _data; _Compare _comp; class value_key_compare { friend class flat_map<_Key, _Tp, _Compare, _Alloc, _Store>; protected: _Compare comp; value_key_compare(_Compare __c) : comp(__c) {} public: bool operator()(const value_type& __x, const key_type& __y) const { return comp(__x.first, __y); } }; value_key_compare value_key_comp() const { return value_key_compare(key_comp()); } bool key_eq(const key_type& k1, const key_type& k2) const { return !key_comp()(k1, k2) && !key_comp()(k2, k1); } void resort() { std::sort(_data.begin(), _data.end(), value_comp()); } public: typedef typename _Pair_alloc_type::pointer pointer; typedef typename _Pair_alloc_type::const_pointer const_pointer; typedef typename _Pair_alloc_type::reference reference; typedef typename _Pair_alloc_type::const_reference const_reference; typedef typename _VectTy::iterator iterator; typedef typename _VectTy::const_iterator const_iterator; typedef typename _VectTy::size_type size_type; typedef typename _VectTy::difference_type difference_type; typedef typename _VectTy::reverse_iterator reverse_iterator; typedef typename _VectTy::const_reverse_iterator const_reverse_iterator; flat_map() : _data(), _comp() {} explicit flat_map(const _Compare& __comp, const allocator_type& = allocator_type()) // XXX :_data(_Pair_alloc_type(__a)), _comp(__comp) {} : _data(), _comp(__comp) {} flat_map(const flat_map& __x) : _data(__x._data), _comp(__x._comp) {} flat_map(flat_map&& __x) /* noexcept(std::is_nothrow_copy_constructible<_Compare>::value) */ : _data(std::move(__x._data)), _comp(std::move(__x._comp)) {} /* flat_map(std::initializer_list __l, const _Compare& __comp = _Compare(), const allocator_type& __a = allocator_type()) : _data(__l, _Pair_alloc_type(__a)), _comp(__comp) { resort(); } */ template flat_map(_InputIterator __first, _InputIterator __last) : _data(__first, __last), _comp() { resort(); } template flat_map(_InputIterator __first, _InputIterator __last, const _Compare&, const allocator_type& __a = allocator_type()) : _data(__first, __last, _Pair_alloc_type(__a)) { resort(); } flat_map& operator=(const flat_map& __x) { _data = __x._data; _comp = __x._comp; return *this; } flat_map& operator=(flat_map&& __x) { clear(); swap(__x); return *this; } /* flat_map& operator=(std::initializer_list __l) { clear(); insert(__l.begin(), __l.end()); return *this; } */ allocator_type get_allocator() const /* noexcept */ { return allocator_type(_data.get_allocator()); } // iterators iterator begin() /* noexcept */ { return _data.begin(); } const_iterator begin() const /* noexcept */ { return _data.begin(); } iterator end() /* noexcept */ { return _data.end(); } const_iterator end() const /* noexcept */ { return _data.end(); } reverse_iterator rbegin() /* noexcept */ { return _data.rbegin(); } const_reverse_iterator rbegin() const /* noexcept */ { return _data.rbegin(); } reverse_iterator rend() /* noexcept */ { return _data.rend(); } const_reverse_iterator rend() const /* noexcept */ { return _data.rend(); } const_iterator cbegin() const /* noexcept */ { return _data.begin(); } const_iterator cend() const /* noexcept */ { return _data.end(); } const_reverse_iterator crbegin() const /* noexcept */ { return _data.rbegin(); } const_reverse_iterator crend() const /* noexcept */ { return _data.rend(); } bool empty() const /* noexcept */ { return _data.empty(); } size_type size() const /* noexcept */ { return _data.size(); } size_type max_size() const /* noexcept */ { return _data.max_size(); } template std::pair emplace(Args&&... args) { // assert(std::adjacent_find(_data.begin(), _data.end(), [&](const // value_type& a, const value_type& b) { // return key_comp()(b.first, a.first); //}) == _data.end()); _data.emplace_back(std::forward(args)...); value_type& v = _data.back(); auto ee = _data.end(); --ee; auto __i = std::lower_bound(_data.begin(), ee, v.first, value_key_comp()); // key < __i->first bool retval = __i == ee || key_comp()(v.first, (*__i).first); if (retval) { if (__i != ee) { value_type tmp = std::move(v); __i = _data.emplace(__i, std::move(tmp)); _data.pop_back(); } } else { // key == __i->first _data.pop_back(); } return std::make_pair(__i, retval); } mapped_type& operator[](const key_type& __k) { iterator __i = lower_bound(__k); // __i->first is greater than or equivalent to __k. if (__i == end() || key_comp()(__k, (*__i).first)) __i = _data.emplace(__i, std::piecewise_construct, std::forward_as_tuple(__k), std::tuple<>()); return (*__i).second; } mapped_type& operator[](key_type&& __k) { iterator __i = lower_bound(__k); // __i->first is greater than or equivalent to __k. if (__i == end() || key_comp()(__k, (*__i).first)) __i = _data.emplace(__i, std::piecewise_construct, std::forward_as_tuple(std::move(__k)), std::tuple<>()); return (*__i).second; } mapped_type& at(const key_type& __k) { iterator __i = lower_bound(__k); if (__i == end() || key_comp()(__k, (*__i).first)) throw std::out_of_range("flat_map::at"); return (*__i).second; } const mapped_type& at(const key_type& __k) const { const_iterator __i = lower_bound(__k); if (__i == end() || key_comp()(__k, (*__i).first)) throw std::out_of_range("flat_map::at"); return (*__i).second; } template ::value>::type> std::pair insert(PairTy&& __x) { return emplace(std::forward(__x)); } /* void insert(std::initializer_list __list) { insert(__list.begin(), __list.end()); } */ template void insert(_InputIterator __first, _InputIterator __last) { while (__first != __last) insert(*__first++); } iterator erase(const_iterator __position) { return _data.erase(__position); } iterator erase(iterator __position) { return _data.erase(__position); } size_type erase(const key_type& __x) { auto i = find(__x); if (i != end()) { _data.erase(i); return 1; } return 0; } iterator erase(const_iterator __first, const_iterator __last) { return _data.erase(__first, __last); } void swap(flat_map& __x) { _data.swap(__x._data); std::swap(_comp, __x._comp); } void clear() /* noexcept */ { _data.clear(); } key_compare key_comp() const { return _comp; } value_compare value_comp() const { return value_compare(key_comp()); } iterator find(const key_type& __x) { auto i = lower_bound(__x); if (i != end() && key_eq(i->first, __x)) return i; return end(); } const_iterator find(const key_type& __x) const { auto i = lower_bound(__x); if (i != end() && key_eq(i->first, __x)) return i; return end(); } size_type count(const key_type& __x) const { return find(__x) == end() ? 0 : 1; } iterator lower_bound(const key_type& __x) { return std::lower_bound(_data.begin(), _data.end(), __x, value_key_comp()); } const_iterator lower_bound(const key_type& __x) const { return std::lower_bound(_data.begin(), _data.end(), __x, value_key_comp()); } iterator upper_bound(const key_type& __x) { return std::upper_bound(_data.begin(), _data.end(), __x, value_key_comp()); } const_iterator upper_bound(const key_type& __x) const { return std::upper_bound(_data.begin(), _data.end(), __x, value_key_comp()); } std::pair equal_range(const key_type& __x) { return std::make_pair(lower_bound(__x), upper_bound(__x)); } std::pair equal_range(const key_type& __x) const { return std::make_pair(lower_bound(__x), upper_bound(__x)); } }; template inline bool operator==(const flat_map<_Key, _Tp, _Compare, _Alloc>& __x, const flat_map<_Key, _Tp, _Compare, _Alloc>& __y) { return __x._data == __y._data; } template inline bool operator<(const flat_map<_Key, _Tp, _Compare, _Alloc>& __x, const flat_map<_Key, _Tp, _Compare, _Alloc>& __y) { return __x._data < __y._data; } /// Based on operator== template inline bool operator!=(const flat_map<_Key, _Tp, _Compare, _Alloc>& __x, const flat_map<_Key, _Tp, _Compare, _Alloc>& __y) { return !(__x == __y); } /// Based on operator< template inline bool operator>(const flat_map<_Key, _Tp, _Compare, _Alloc>& __x, const flat_map<_Key, _Tp, _Compare, _Alloc>& __y) { return __y < __x; } /// Based on operator< template inline bool operator<=(const flat_map<_Key, _Tp, _Compare, _Alloc>& __x, const flat_map<_Key, _Tp, _Compare, _Alloc>& __y) { return !(__y < __x); } /// Based on operator< template inline bool operator>=(const flat_map<_Key, _Tp, _Compare, _Alloc>& __x, const flat_map<_Key, _Tp, _Compare, _Alloc>& __y) { return !(__x < __y); } } // namespace galois namespace std { /// See galois::flat_map::swap(). template inline void swap(galois::flat_map<_Key, _Tp, _Compare, _Alloc>& __x, galois::flat_map<_Key, _Tp, _Compare, _Alloc>& __y) { __x.swap(__y); } } // namespace std #endif ================================================ FILE: libgalois/include/galois/Galois.h ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ #ifndef GALOIS_GALOIS_H #define GALOIS_GALOIS_H #include "galois/config.h" #include "galois/Loops.h" #include "galois/SharedMemSys.h" #include "galois/runtime/Mem.h" #endif ================================================ FILE: libgalois/include/galois/GaloisForwardDecl.h ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ #include "galois/config.h" namespace galois { template void for_each(const RangeFunc& rangeMaker, FunctionTy&& fn, const Args&... args); template void do_all(const RangeFunc& rangeMaker, FunctionTy&& fn, const Args&... args); template void on_each(FunctionTy&& fn, const Args&... args); } // end namespace galois ================================================ FILE: libgalois/include/galois/LargeArray.h ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ #ifndef GALOIS_LARGEARRAY_H #define GALOIS_LARGEARRAY_H #include #include #include #include #include #include #include #include #include "galois/config.h" #include "galois/Galois.h" #include "galois/gIO.h" #include "galois/ParallelSTL.h" #include "galois/runtime/Mem.h" #include "galois/substrate/NumaMem.h" namespace galois { namespace runtime { extern unsigned activeThreads; } // end namespace runtime /** * Large array of objects with proper specialization for void type and * supporting various allocation and construction policies. * * @tparam T value type of container */ template class LargeArray { substrate::LAptr m_realdata; T* m_data; size_t m_size; public: typedef T raw_value_type; typedef T value_type; typedef size_t size_type; typedef ptrdiff_t difference_type; typedef value_type& reference; typedef const value_type& const_reference; typedef value_type* pointer; typedef const value_type* const_pointer; typedef pointer iterator; typedef const_pointer const_iterator; const static bool has_value = true; // Extra indirection to support incomplete T's struct size_of { const static size_t value = sizeof(T); }; protected: enum AllocType { Blocked, Local, Interleaved, Floating }; void allocate(size_type n, AllocType t) { assert(!m_data); m_size = n; switch (t) { case Blocked: galois::gDebug("Block-alloc'd"); m_realdata = substrate::largeMallocBlocked(n * sizeof(T), runtime::activeThreads); break; case Interleaved: galois::gDebug("Interleave-alloc'd"); m_realdata = substrate::largeMallocInterleaved(n * sizeof(T), runtime::activeThreads); break; case Local: galois::gDebug("Local-allocd"); m_realdata = substrate::largeMallocLocal(n * sizeof(T)); break; case Floating: galois::gDebug("Floating-alloc'd"); m_realdata = substrate::largeMallocFloating(n * sizeof(T)); break; }; m_data = reinterpret_cast(m_realdata.get()); } private: /* * To support boost serialization */ friend class boost::serialization::access; template void save(Archive& ar, const unsigned int) const { // TODO DON'T USE CERR // std::cerr << "save m_size : " << m_size << " Threads : " << // runtime::activeThreads << "\n"; ar << m_size; // for(size_t i = 0; i < m_size; ++i){ // ar << m_data[i]; //} ar << boost::serialization::make_binary_object(m_data, m_size * sizeof(T)); /* * Cas use make_array too as shown below * IMPORTANT: Use make_array as temp fix for benchmarks using non-trivial * structures in nodeData (Eg. SGD) This also requires changes in * libgalois/include/galois/graphs/Details.h (specified in the file). */ // ar << boost::serialization::make_array(m_data, m_size); } template void load(Archive& ar, const unsigned int) { ar >> m_size; // TODO DON'T USE CERR // std::cerr << "load m_size : " << m_size << " Threads : " << // runtime::activeThreads << "\n"; // TODO: For now, always use allocateInterleaved // Allocates and sets m_data pointer if (!m_data) allocateInterleaved(m_size); // for(size_t i = 0; i < m_size; ++i){ // ar >> m_data[i]; //} ar >> boost::serialization::make_binary_object(m_data, m_size * sizeof(T)); /* * Cas use make_array too as shown below * IMPORTANT: Use make_array as temp fix for SGD * This also requires changes in * libgalois/include/galois/graphs/Details.h (specified in the file). */ // ar >> boost::serialization::make_array(m_data, m_size); } // The macro BOOST_SERIALIZATION_SPLIT_MEMBER() generates code which invokes // the save or load depending on whether the archive is used for saving or // loading BOOST_SERIALIZATION_SPLIT_MEMBER() public: /** * Wraps existing buffer in LargeArray interface. */ LargeArray(void* d, size_t s) : m_data(reinterpret_cast(d)), m_size(s) {} LargeArray() : m_data(0), m_size(0) {} LargeArray(LargeArray&& o) : m_data(0), m_size(0) { std::swap(this->m_realdata, o.m_realdata); std::swap(this->m_data, o.m_data); std::swap(this->m_size, o.m_size); } LargeArray& operator=(LargeArray&& o) { std::swap(this->m_realdata, o.m_realdata); std::swap(this->m_data, o.m_data); std::swap(this->m_size, o.m_size); return *this; } LargeArray(const LargeArray&) = delete; LargeArray& operator=(const LargeArray&) = delete; ~LargeArray() { destroy(); deallocate(); } friend void swap(LargeArray& lhs, LargeArray& rhs) { std::swap(lhs.m_realdata, rhs.m_realdata); std::swap(lhs.m_data, rhs.m_data); std::swap(lhs.m_size, rhs.m_size); } const_reference at(difference_type x) const { return m_data[x]; } reference at(difference_type x) { return m_data[x]; } const_reference operator[](size_type x) const { return m_data[x]; } reference operator[](size_type x) { return m_data[x]; } void set(difference_type x, const_reference v) { m_data[x] = v; } size_type size() const { return m_size; } iterator begin() { return m_data; } const_iterator begin() const { return m_data; } iterator end() { return m_data + m_size; } const_iterator end() const { return m_data + m_size; } //! [allocatefunctions] //! Allocates interleaved across NUMA (memory) nodes. void allocateInterleaved(size_type n) { allocate(n, Interleaved); } /** * Allocates using blocked memory policy * * @param n number of elements to allocate */ void allocateBlocked(size_type n) { allocate(n, Blocked); } /** * Allocates using Thread Local memory policy * * @param n number of elements to allocate */ void allocateLocal(size_type n) { allocate(n, Local); } /** * Allocates using no memory policy (no pre alloc) * * @param n number of elements to allocate */ void allocateFloating(size_type n) { allocate(n, Floating); } /** * Allocate memory to threads based on a provided array specifying which * threads receive which elements of data. * * @tparam RangeArrayTy The type of the threadRanges array; should either * be uint32_t* or uint64_t* * @param numberOfElements Number of elements to allocate space for * @param threadRanges An array specifying how elements should be split * among threads */ template void allocateSpecified(size_type numberOfElements, RangeArrayTy& threadRanges) { assert(!m_data); m_realdata = substrate::largeMallocSpecified(numberOfElements * sizeof(T), runtime::activeThreads, threadRanges, sizeof(T)); m_size = numberOfElements; m_data = reinterpret_cast(m_realdata.get()); } //! [allocatefunctions] template void construct(Args&&... args) { for (T *ii = m_data, *ei = m_data + m_size; ii != ei; ++ii) new (ii) T(std::forward(args)...); } template void constructAt(size_type n, Args&&... args) { new (&m_data[n]) T(std::forward(args)...); } //! Allocate and construct template void create(size_type n, Args&&... args) { allocateInterleaved(n); construct(std::forward(args)...); } void deallocate() { m_realdata.reset(); m_data = 0; m_size = 0; } void destroy() { if (!m_data) return; galois::ParallelSTL::destroy(m_data, m_data + m_size); } template std::enable_if_t::value> destroyAt(size_type n) { (&m_data[n])->~T(); } template std::enable_if_t::value> destroyAt(size_type) {} // The following methods are not shared with void specialization const_pointer data() const { return m_data; } pointer data() { return m_data; } }; //! Void specialization template <> class LargeArray { private: /* * To support boost serialization * Can use single function serialize instead of save and load, since both save * and load have identical code. */ friend class boost::serialization::access; template void serialize(Archive&, const unsigned int) const {} public: LargeArray(void*, size_t) {} LargeArray() = default; LargeArray(const LargeArray&) = delete; LargeArray& operator=(const LargeArray&) = delete; friend void swap(LargeArray&, LargeArray&) {} typedef void raw_value_type; typedef void* value_type; typedef size_t size_type; typedef ptrdiff_t difference_type; typedef value_type reference; typedef value_type const_reference; typedef value_type* pointer; typedef value_type* const_pointer; typedef pointer iterator; typedef const_pointer const_iterator; const static bool has_value = false; struct size_of { const static size_t value = 0; }; const_reference at(difference_type) const { return 0; } reference at(difference_type) { return 0; } const_reference operator[](size_type) const { return 0; } template void set(difference_type, AnyTy) {} size_type size() const { return 0; } iterator begin() { return 0; } const_iterator begin() const { return 0; } iterator end() { return 0; } const_iterator end() const { return 0; } void allocateInterleaved(size_type) {} void allocateBlocked(size_type) {} void allocateLocal(size_type, bool = true) {} void allocateFloating(size_type) {} template void allocateSpecified(size_type, RangeArrayTy) {} template void construct(Args&&...) {} template void constructAt(size_type, Args&&...) {} template void create(size_type, Args&&...) {} void deallocate() {} void destroy() {} void destroyAt(size_type) {} const_pointer data() const { return 0; } pointer data() { return 0; } }; } // namespace galois #endif ================================================ FILE: libgalois/include/galois/LazyArray.h ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ #ifndef GALOIS_LAZYARRAY_H #define GALOIS_LAZYARRAY_H #include #include #include #include #include #include #include "galois/config.h" #include "galois/LazyObject.h" namespace galois { /** * This is a container that encapsulates space for a constant size array. The * initialization and destruction of items is explicitly under the control of * the user. */ template class LazyArray { typedef typename std::aligned_storage< sizeof(_Tp), std::alignment_of<_Tp>::value>::type CharData; LazyObject<_Tp> data_[(_Size > 0 ? _Size : 1)]; _Tp* get(size_t __n) { return &data_[__n].get(); } const _Tp* get(size_t __n) const { return &data_[__n].get(); } public: typedef _Tp value_type; typedef size_t size_type; typedef ptrdiff_t difference_type; typedef value_type& reference; typedef const value_type& const_reference; typedef value_type* pointer; typedef const value_type* const_pointer; typedef pointer iterator; typedef const_pointer const_iterator; typedef std::reverse_iterator reverse_iterator; typedef std::reverse_iterator const_reverse_iterator; // iterators: iterator begin() { return iterator(get(0)); } const_iterator begin() const { return const_iterator(get(0)); } iterator end() { return iterator(get(_Size)); } const_iterator end() const { return const_iterator(get(_Size)); } reverse_iterator rbegin() { return reverse_iterator(end()); } const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); } reverse_iterator rend() { return reverse_iterator(begin()); } const_reverse_iterator rend() const { return const_reverse_iterator(begin()); } const_iterator cbegin() const { return begin(); } const_iterator cend() const { return end(); } const_reverse_iterator crbegin() const { return rbegin(); } const_reverse_iterator crend() const { return rend(); } // capacity: size_type size() const { return _Size; } size_type max_size() const { return _Size; } bool empty() const { return _Size == 0; } // element access: reference operator[](size_type __n) { return *get(__n); } const_reference operator[](size_type __n) const { return *get(__n); } reference at(size_type __n) { if (__n >= _Size) throw std::out_of_range("lazyArray::at"); return get(__n); } const_reference at(size_type __n) const { if (__n >= _Size) throw std::out_of_range("lazyArray::at"); return get(__n); } reference front() { return *get(0); } const_reference front() const { return *get(0); } reference back() { return *get(_Size > 0 ? _Size - 1 : 0); } const_reference back() const { return *get(_Size > 0 ? _Size - 1 : 0); } pointer data() { return get(0); } const_pointer data() const { return get(0); } // missing: fill swap template pointer emplace(size_type __n, Args&&... args) { return new (get(__n)) _Tp(std::forward(args)...); } pointer construct(size_type __n, const _Tp& val) { return emplace(__n, val); } pointer construct(size_type __n, _Tp&& val) { return emplace(__n, std::move(val)); } void destroy(size_type __n) { (get(__n))->~_Tp(); } }; } // namespace galois #endif // GALOIS_LAZYARRAY_H ================================================ FILE: libgalois/include/galois/LazyObject.h ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ #ifndef GALOIS_LAZYOBJECT_H #define GALOIS_LAZYOBJECT_H #include #include #include "galois/config.h" #include "galois/gIO.h" namespace galois { /** * Single object with specialization for void type. To take advantage of empty * member optimization, users should subclass this class, otherwise the * compiler will insert non-zero padding for fields (even when empty). */ template class StrictObject { T data; public: typedef T value_type; typedef T& reference; typedef const T& const_reference; const static bool has_value = true; StrictObject() {} StrictObject(const_reference t) : data(t) {} const_reference get() const { return data; } reference get() { return data; } }; template <> struct StrictObject { typedef void* value_type; typedef void* reference; typedef void* const_reference; const static bool has_value = false; StrictObject() {} StrictObject(const_reference) {} reference get() const { return 0; } }; /** * Single (uninitialized) object with specialization for void type. To take * advantage of empty member optimization, users should subclass this class, * otherwise the compiler will insert non-zero padding for fields (even when * empty). */ template class LazyObject { typedef typename std::aligned_storage::value>::type CharData; union Data { CharData buf; T value_; // Declare constructor explicitly because Data must be default // constructable regardless of the constructability of T. Data() {} // NOLINT(modernize-use-equals-default) ~Data() {} // NOLINT(modernize-use-equals-default) T& value() { return value_; } const T& value() const { return value_; } }; Data data_; T* cast() { return &data_.value(); } const T* cast() const { return &data_.value(); } public: typedef T value_type; typedef T& reference; typedef const T& const_reference; const static bool has_value = true; // Can't support incomplete T's but provide same interface as // {@link galois::LargeArray} for consistency struct size_of { const static size_t value = sizeof(T); }; void destroy() { cast()->~T(); } void construct(const_reference x) { new (cast()) T(x); } template void construct(Args&&... args) { new (cast()) T(std::forward(args)...); } const_reference get() const { return *cast(); } reference get() { return *cast(); } }; template <> struct LazyObject { typedef void* value_type; typedef void* reference; typedef void* const_reference; const static bool has_value = false; struct size_of { const static size_t value = 0; }; void destroy() {} void construct(const_reference) {} template void construct(Args&&...) {} const_reference get() const { return 0; } }; } // namespace galois #endif ================================================ FILE: libgalois/include/galois/Loops.h ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ #ifndef GALOIS_LOOPS_H #define GALOIS_LOOPS_H #include "galois/config.h" #include "galois/runtime/Executor_Deterministic.h" #include "galois/runtime/Executor_DoAll.h" #include "galois/runtime/Executor_ForEach.h" #include "galois/runtime/Executor_OnEach.h" #include "galois/runtime/Executor_Ordered.h" #include "galois/runtime/Executor_ParaMeter.h" #include "galois/worklists/WorkList.h" namespace galois { //////////////////////////////////////////////////////////////////////////////// // Foreach //////////////////////////////////////////////////////////////////////////////// /** * Galois unordered set iterator. * Operator should conform to fn(item, UserContext&) where item * is a value from the iteration range and T is the type of item. * * @param rangeMaker an iterate range maker typically returned by * galois::iterate(...) * (@see galois::iterate()). rangeMaker is a functor which when called returns a * range object * @param fn operator * @param args optional arguments to loop, e.g., {@see loopname}, {@see wl} */ template void for_each(const RangeFunc& rangeMaker, FunctionTy&& fn, const Args&... args) { auto tpl = std::make_tuple(args...); runtime::for_each_gen(rangeMaker(tpl), std::forward(fn), tpl); } /** * Standard do-all loop. All iterations should be independent. * Operator should conform to fn(item) where item is a value from * the iteration range. * * @param rangeMaker an iterate range maker typically returned by * galois::iterate(...) * (@see galois::iterate()). rangeMaker is a functor which when called returns a * range object * @param fn operator * @param args optional arguments to loop */ template void do_all(const RangeFunc& rangeMaker, FunctionTy&& fn, const Args&... args) { auto tpl = std::make_tuple(args...); runtime::do_all_gen(rangeMaker(tpl), std::forward(fn), tpl); } /** * Low-level parallel loop. Operator is applied for each running thread. * Operator should confirm to fn(tid, numThreads) where tid is * the id of the current thread and numThreads is the total number of running * threads. * * @param fn operator, which is never copied * @param args optional arguments to loop */ template void on_each(FunctionTy&& fn, const Args&... args) { runtime::on_each_gen(std::forward(fn), std::make_tuple(args...)); } /** * Preallocates hugepages on each thread. * * @param num number of pages to allocate of size {@link * galois::runtime::MM::hugePageSize} */ static inline void preAlloc(int num) { static const bool DISABLE_PREALLOC = false; if (DISABLE_PREALLOC) { galois::gWarn("preAlloc disabled"); } else { runtime::preAlloc_impl(num); } } /** * Reports number of hugepages allocated by the Galois system so far. The value * is printing using the statistics infrastructure. * * @param label Label to associated with report at this program point */ static inline void reportPageAlloc(const char* label) { runtime::reportPageAlloc(label); } /** * Galois ordered set iterator for stable source algorithms. * * Operator should conform to fn(item, UserContext&) where item * is a value from the iteration range and T is the type of item. Comparison * function should conform to bool r = cmp(item1, item2) where r is * true if item1 is less than or equal to item2. Neighborhood function should * conform to nhFunc(item) and should visit every element in the * neighborhood of active element item. * * @param b begining of range of initial items * @param e end of range of initial items * @param cmp comparison function * @param nhFunc neighborhood function * @param fn operator * @param loopname string to identity loop in statistics output */ template void for_each_ordered(Iter b, Iter e, const Cmp& cmp, const NhFunc& nhFunc, const OpFunc& fn, const char* loopname = 0) { runtime::for_each_ordered_impl(b, e, cmp, nhFunc, fn, loopname); } /** * Galois ordered set iterator for unstable source algorithms. * * Operator should conform to fn(item, UserContext&) where item * is a value from the iteration range and T is the type of item. Comparison * function should conform to bool r = cmp(item1, item2) where r is * true if item1 is less than or equal to item2. Neighborhood function should * conform to nhFunc(item) and should visit every element in the * neighborhood of active element item. The stability test should conform to * bool r = stabilityTest(item) where r is true if item is a stable * source. * * @param b begining of range of initial items * @param e end of range of initial items * @param cmp comparison function * @param nhFunc neighborhood function * @param fn operator * @param stabilityTest stability test * @param loopname string to identity loop in statistics output */ template void for_each_ordered(Iter b, Iter e, const Cmp& cmp, const NhFunc& nhFunc, const OpFunc& fn, const StableTest& stabilityTest, const char* loopname = 0) { runtime::for_each_ordered_impl(b, e, cmp, nhFunc, fn, stabilityTest, loopname); } /** * Helper functor class to invoke galois::do_all on provided args * Can be used to choose between galois::do_all and other equivalents such as * std::for_each */ struct DoAll { template void operator()(const RangeFunc& rangeMaker, const F& f, Args&&... args) const { galois::do_all(rangeMaker, f, std::forward(args)...); } }; /** * Helper functor to invoke std::for_each with the same interface as * galois::do_all */ struct StdForEach { template void operator()(const RangeFunc& rangeMaker, const F& f, Args&&... args) const { auto range = rangeMaker(std::make_tuple(args...)); std::for_each(range.begin(), range.end(), f); } }; struct ForEach { template void operator()(const RangeFunc& rangeMaker, const F& f, Args&&... args) const { galois::for_each(rangeMaker, f, std::forward(args)...); } }; template struct WhileQ { Q m_q; WhileQ(Q&& q = Q()) : m_q(std::move(q)) {} template void operator()(const RangeFunc& rangeMaker, const F& f, Args&&... args) { auto range = rangeMaker(std::make_tuple(args...)); m_q.push(range.begin(), range.end()); while (!m_q.empty()) { auto val = m_q.pop(); f(val, m_q); } } }; } // namespace galois #endif // GALOIS_LOOPS_H ================================================ FILE: libgalois/include/galois/Mem.h ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ #ifndef GALOIS_MEM_H #define GALOIS_MEM_H #include "galois/config.h" #include "galois/runtime/Mem.h" namespace galois { //! [PerIterAllocTy example] //! Base allocator for per-iteration allocator typedef galois::runtime::BumpWithMallocHeap< galois::runtime::FreeListHeap> IterAllocBaseTy; //! Per-iteration allocator that conforms to STL allocator interface typedef galois::runtime::ExternalHeapAllocator PerIterAllocTy; //! [PerIterAllocTy example] //! Scalable fixed-sized allocator for T that conforms to STL allocator //! interface but does not support variable sized allocations template using FixedSizeAllocator = galois::runtime::FixedSizeAllocator; //! Scalable variable-sized allocator for T that allocates blocks of sizes in //! powers of 2 Useful for small and medium sized allocations, e.g. small or //! medium vectors, strings, deques template using Pow_2_VarSizeAlloc = typename runtime::Pow_2_BlockAllocator; } // namespace galois #endif ================================================ FILE: libgalois/include/galois/MethodFlags.h ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ #ifndef GALOIS_METHODFLAGS_H #define GALOIS_METHODFLAGS_H #include "galois/config.h" namespace galois { /** * What should the runtime do when executing a method. * * Various methods take an optional parameter indicating what actions * the runtime should do on the user's behalf: (1) checking for conflicts, * and/or (2) saving undo information. By default, both are performed (ALL). */ enum class MethodFlag : char { UNPROTECTED = 0, WRITE = 1, READ = 2, INTERNAL_MASK = 3, PREVIOUS = 4, }; //! Bitwise & for method flags inline MethodFlag operator&(MethodFlag x, MethodFlag y) { return (MethodFlag)(((int)x) & ((int)y)); } //! Bitwise | for method flags inline MethodFlag operator|(MethodFlag x, MethodFlag y) { return (MethodFlag)(((int)x) | ((int)y)); } } // namespace galois #endif // GALOIS_METHODFLAGS_H ================================================ FILE: libgalois/include/galois/NoDerefIterator.h ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ #ifndef GALOIS_NODEREFITERATOR_H #define GALOIS_NODEREFITERATOR_H #include "boost/iterator/iterator_adaptor.hpp" #include "galois/config.h" namespace galois { //! Modify an iterator so that *it == it template struct NoDerefIterator : public boost::iterator_adaptor, Iterator, Iterator, boost::use_default, const Iterator&> { NoDerefIterator() : NoDerefIterator::iterator_adaptor_() {} explicit NoDerefIterator(Iterator it) : NoDerefIterator::iterator_adaptor_(it) {} const Iterator& dereference() const { return NoDerefIterator::iterator_adaptor_::base_reference(); } Iterator& dereference() { return NoDerefIterator::iterator_adaptor_::base_reference(); } }; //! Convenience function to create {@link NoDerefIterator}. template NoDerefIterator make_no_deref_iterator(Iterator it) { return NoDerefIterator(it); } } // namespace galois #endif ================================================ FILE: libgalois/include/galois/PODResizeableArray.h ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ #ifndef GALOIS_PODRESIZEABLEARRAY_H #define GALOIS_PODRESIZEABLEARRAY_H #include #include #include #include #include #include #include "galois/config.h" namespace galois { /** * This is a container that encapsulates a resizeable array * of plain-old-datatype (POD) elements. * There is no initialization or destruction of elements. */ template class PODResizeableArray { _Tp* data_; size_t capacity_; size_t size_; public: typedef _Tp value_type; typedef size_t size_type; typedef ptrdiff_t difference_type; typedef value_type& reference; typedef const value_type& const_reference; typedef value_type* pointer; typedef const value_type* const_pointer; typedef pointer iterator; typedef const_pointer const_iterator; typedef std::reverse_iterator reverse_iterator; typedef std::reverse_iterator const_reverse_iterator; PODResizeableArray() : data_(NULL), capacity_(0), size_(0) {} template PODResizeableArray(InputIterator first, InputIterator last) : data_(NULL), capacity_(0), size_(0) { size_t to_add = last - first; resize(to_add); std::copy_n(first, to_add, begin()); } PODResizeableArray(size_t n) : data_(NULL), capacity_(0), size_(0) { resize(n); } //! disabled (shallow) copy constructor PODResizeableArray(const PODResizeableArray&) = delete; //! move constructor PODResizeableArray(PODResizeableArray&& v) : data_(v.data_), capacity_(v.capacity_), size_(v.size_) { v.data_ = NULL; v.capacity_ = 0; v.size_ = 0; } //! disabled (shallow) copy assignment operator PODResizeableArray& operator=(const PODResizeableArray&) = delete; //! move assignment operator PODResizeableArray& operator=(PODResizeableArray&& v) { if (data_ != NULL) free(data_); data_ = v.data_; capacity_ = v.capacity_; size_ = v.size_; v.data_ = NULL; v.capacity_ = 0; v.size_ = 0; return *this; } ~PODResizeableArray() { if (data_ != NULL) free(data_); } // iterators: iterator begin() { return iterator(&data_[0]); } const_iterator begin() const { return const_iterator(&data_[0]); } iterator end() { return iterator(&data_[size_]); } const_iterator end() const { return const_iterator(&data_[size_]); } reverse_iterator rbegin() { return reverse_iterator(end()); } const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); } reverse_iterator rend() { return reverse_iterator(begin()); } const_reverse_iterator rend() const { return const_reverse_iterator(begin()); } const_iterator cbegin() const { return begin(); } const_iterator cend() const { return end(); } const_reverse_iterator crbegin() const { return rbegin(); } const_reverse_iterator crend() const { return rend(); } // size: size_type size() const { return size_; } size_type max_size() const { return capacity_; } bool empty() const { return size_ == 0; } void reserve(size_t n) { if (n > capacity_) { if (capacity_ == 0) { capacity_ = 1; } while (capacity_ < n) { capacity_ <<= 1; } data_ = static_cast<_Tp*>( realloc(reinterpret_cast(data_), capacity_ * sizeof(_Tp))); } } void resize(size_t n) { reserve(n); size_ = n; } void clear() { size_ = 0; } // element access: reference operator[](size_type __n) { return data_[__n]; } const_reference operator[](size_type __n) const { return data_[__n]; } reference at(size_type __n) { if (__n >= size_) throw std::out_of_range("PODResizeableArray::at"); return data_[__n]; } const_reference at(size_type __n) const { if (__n >= size_) throw std::out_of_range("PODResizeableArray::at"); return data_[__n]; } void assign(iterator first, iterator last) { size_t n = last - first; resize(n); memcpy(reinterpret_cast(data_), first, n * sizeof(_Tp)); } reference front() { return data_[0]; } const_reference front() const { return data_[0]; } reference back() { return data_[size_ - 1]; } const_reference back() const { return data_[size_ - 1]; } pointer data() { return data_; } const_pointer data() const { return data_; } void push_back(const _Tp& value) { resize(size_ + 1); data_[size_ - 1] = value; } template void insert(iterator GALOIS_USED_ONLY_IN_DEBUG(position), InputIterator first, InputIterator last) { assert(position == end()); size_t old_size = size_; size_t to_add = last - first; resize(old_size + to_add); std::copy_n(first, to_add, begin() + old_size); } void swap(PODResizeableArray& v) { std::swap(data_, v.data_); std::swap(size_, v.size_); std::swap(capacity_, v.capacity_); } }; } // namespace galois #endif // GALOIS_PODRESIZEABLEARRAY_H ================================================ FILE: libgalois/include/galois/ParallelSTL.h ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ #ifndef GALOIS_PARALLELSTL_H #define GALOIS_PARALLELSTL_H #include "galois/config.h" #include "galois/GaloisForwardDecl.h" #include "galois/NoDerefIterator.h" #include "galois/runtime/Range.h" #include "galois/Reduction.h" #include "galois/Traits.h" #include "galois/UserContext.h" #include "galois/Threads.h" #include "galois/worklists/Chunk.h" namespace galois { //! Parallel versions of STL library algorithms. // TODO: rename to gstl? namespace ParallelSTL { template size_t count_if(InputIterator first, InputIterator last, Predicate pred) { galois::GAccumulator count; galois::do_all(galois::iterate(first, last), [&](const auto& v) { if (pred(v)) { count += 1; } }); return count.reduce(); } template struct find_if_helper { typedef galois::optional ElementTy; typedef substrate::PerThreadStorage AccumulatorTy; AccumulatorTy& accum; Predicate& f; find_if_helper(AccumulatorTy& a, Predicate& p) : accum(a), f(p) {} void operator()(const InputIterator& v, UserContext& ctx) { if (f(*v)) { *accum.getLocal() = v; ctx.breakLoop(); } } }; template InputIterator find_if(InputIterator first, InputIterator last, Predicate pred) { typedef find_if_helper HelperTy; typedef typename HelperTy::AccumulatorTy AccumulatorTy; typedef galois::worklists::PerSocketChunkFIFO<256> WL; AccumulatorTy accum; HelperTy helper(accum, pred); for_each(galois::iterate(make_no_deref_iterator(first), make_no_deref_iterator(last)), helper, galois::disable_conflict_detection(), galois::no_pushes(), galois::parallel_break(), galois::wl()); for (unsigned i = 0; i < accum.size(); ++i) { if (*accum.getRemote(i)) return **accum.getRemote(i); } return last; } template Iterator choose_rand(Iterator first, Iterator last) { size_t dist = std::distance(first, last); if (dist) std::advance(first, rand() % dist); return first; } template struct sort_helper { Compare comp; //! Not equal in terms of less-than template struct neq_to { Compare comp; neq_to(Compare c) : comp(c) {} bool operator()(const value_type& a, const value_type& b) const { return comp(a, b) || comp(b, a); } }; sort_helper(Compare c) : comp(c) {} template void operator()(std::pair bounds, Context& ctx) { if (std::distance(bounds.first, bounds.second) <= 1024) { std::sort(bounds.first, bounds.second, comp); } else { typedef typename std::iterator_traits::value_type VT; RandomAccessIterator pivot = choose_rand(bounds.first, bounds.second); VT pv = *pivot; pivot = std::partition(bounds.first, bounds.second, std::bind(comp, std::placeholders::_1, pv)); // push the lower bit if (bounds.first != pivot) ctx.push(std::make_pair(bounds.first, pivot)); // adjust the upper bit pivot = std::find_if(pivot, bounds.second, std::bind(neq_to(comp), std::placeholders::_1, pv)); // push the upper bit if (bounds.second != pivot) ctx.push(std::make_pair(pivot, bounds.second)); } } }; template std::pair dual_partition(RandomAccessIterator first1, RandomAccessIterator last1, RandomAccessIterator first2, RandomAccessIterator last2, Predicate pred) { typedef std::reverse_iterator RI; RI first3(last2), last3(first2); while (true) { while (first1 != last1 && pred(*first1)) ++first1; if (first1 == last1) break; while (first3 != last3 && !pred(*first3)) ++first3; if (first3 == last3) break; std::swap(*first1++, *first3++); } return std::make_pair(first1, first3.base()); } template struct partition_helper { typedef std::pair RP; struct partition_helper_state { RandomAccessIterator first, last; RandomAccessIterator rfirst, rlast; substrate::SimpleLock Lock; Predicate pred; typename std::iterator_traits::difference_type BlockSize() { return 1024; } partition_helper_state(RandomAccessIterator f, RandomAccessIterator l, Predicate p) : first(f), last(l), rfirst(l), rlast(f), pred(p) {} RP takeHigh() { Lock.lock(); unsigned BS = std::min(BlockSize(), std::distance(first, last)); last -= BS; RandomAccessIterator rv = last; Lock.unlock(); return std::make_pair(rv, rv + BS); } RP takeLow() { Lock.lock(); unsigned BS = std::min(BlockSize(), std::distance(first, last)); RandomAccessIterator rv = first; first += BS; Lock.unlock(); return std::make_pair(rv, rv + BS); } void update(RP low, RP high) { Lock.lock(); if (low.first != low.second) { rfirst = std::min(rfirst, low.first); rlast = std::max(rlast, low.second); } if (high.first != high.second) { rfirst = std::min(rfirst, high.first); rlast = std::max(rlast, high.second); } Lock.unlock(); } }; partition_helper(partition_helper_state* s) : state(s) {} partition_helper_state* state; void operator()(unsigned, unsigned) { RP high, low; do { RP parts = dual_partition(low.first, low.second, high.first, high.second, state->pred); low.first = parts.first; high.second = parts.second; if (low.first == low.second) low = state->takeLow(); if (high.first == high.second) high = state->takeHigh(); } while (low.first != low.second && high.first != high.second); state->update(low, high); } }; template RandomAccessIterator partition(RandomAccessIterator first, RandomAccessIterator last, Predicate pred) { if (std::distance(first, last) <= 1024) return std::partition(first, last, pred); typedef partition_helper P; typename P::partition_helper_state s(first, last, pred); on_each(P(&s)); if (s.rfirst == first && s.rlast == last) { // perfect ! // abort(); return s.first; } return std::partition(s.rfirst, s.rlast, pred); } struct pair_dist { template bool operator()(const RP& x, const RP& y) { return std::distance(x.first, x.second) > std::distance(y.first, y.second); } }; template void sort(RandomAccessIterator first, RandomAccessIterator last, Compare comp) { if (std::distance(first, last) <= 1024) { std::sort(first, last, comp); return; } typedef galois::worklists::PerSocketChunkFIFO<1> WL; for_each(galois::iterate({std::make_pair(first, last)}), sort_helper(comp), galois::disable_conflict_detection(), galois::wl()); } template void sort(RandomAccessIterator first, RandomAccessIterator last) { galois::ParallelSTL::sort( first, last, std::less< typename std::iterator_traits::value_type>()); } template T accumulate(InputIterator first, InputIterator last, const T& identity, const BinaryOperation& binary_op) { auto id_fn = [=]() { return identity; }; auto r = make_reducible(binary_op, id_fn); do_all(galois::iterate(first, last), [&](const T& v) { r.update(v); }); return r.reduce(); } template T accumulate(InputIterator first, InputIterator last, const T& identity = T()) { return accumulate(first, last, identity, std::plus()); } template T map_reduce(InputIterator first, InputIterator last, MapFn map_fn, ReduceFn reduce_fn, const T& identity) { auto id_fn = [=]() { return identity; }; auto r = make_reducible(reduce_fn, id_fn); galois::do_all(galois::iterate(first, last), [&](const auto& v) { r.update(map_fn(v)); }); return r.reduce(); } template std::enable_if_t>::value> destroy(I first, I last) { using T = internal::Val_ty; do_all(iterate(first, last), [=](T& i) { (&i)->~T(); }); } template std::enable_if_t>::value> destroy(I, I) {} /** * Does a partial sum from first -> last and writes the results to the d_first * iterator. */ template OutputIt partial_sum(InputIt first, InputIt last, OutputIt d_first) { using ValueType = typename std::iterator_traits::value_type; size_t sizeOfVector = std::distance(first, last); // only bother with parallel execution if vector is larger than some size if (sizeOfVector >= 1024) { const size_t numBlocks = galois::getActiveThreads(); const size_t blockSize = (sizeOfVector + numBlocks - 1) / numBlocks; assert(numBlocks * blockSize >= sizeOfVector); std::vector localSums(numBlocks); // get the block sums galois::do_all( galois::iterate((size_t)0, numBlocks), [&](const size_t& block) { // block start can extend past sizeOfVector if doesn't divide evenly size_t blockStart = std::min(block * blockSize, sizeOfVector); size_t blockEnd = std::min((block + 1) * blockSize, sizeOfVector); assert(blockStart <= blockEnd); // partial accumulation of each block done now std::partial_sum(first + blockStart, first + blockEnd, d_first + blockStart); // save the last number in this block: used for block prefix sum if (blockEnd > 0) { localSums[block] = *(d_first + blockEnd - 1); } else { localSums[block] = 0; } }); // bulkPrefix[i] holds the starting sum of a particular block i std::vector bulkPrefix(numBlocks); // exclusive scan on local sums to get number to add to each block's // set of indices // Not using std::exclusive_scan because apparently it doesn't work for // some compilers ValueType runningSum = 0; for (size_t i = 0; i < numBlocks; i++) { bulkPrefix[i] = runningSum; runningSum += localSums[i]; } galois::do_all( galois::iterate((size_t)0, numBlocks), [&](const size_t& block) { // add the sums of previous elements to blocks ValueType numToAdd = bulkPrefix[block]; size_t blockStart = std::min(block * blockSize, sizeOfVector); size_t blockEnd = std::min((block + 1) * blockSize, sizeOfVector); assert(blockStart <= blockEnd); // transform applies addition to appropriate range std::transform(d_first + blockStart, d_first + blockEnd, d_first + blockStart, [&](ValueType& val) { return val + numToAdd; }); }); // return the iterator past the last element written return d_first + sizeOfVector; } else { // vector is small; do it serially using standard library return std::partial_sum(first, last, d_first); } } } // end namespace ParallelSTL } // end namespace galois #endif ================================================ FILE: libgalois/include/galois/PerThreadContainer.h ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ #ifndef GALOIS_PERTHREADCONTAINER_H #define GALOIS_PERTHREADCONTAINER_H #include #include #include #include #include #include #include #include #include #include #include #include "galois/config.h" #include "galois/gdeque.h" #include "galois/gIO.h" #include "galois/gstl.h" #include "galois/PriorityQueue.h" #include "galois/runtime/Executor_DoAll.h" #include "galois/runtime/Executor_OnEach.h" #include "galois/runtime/Mem.h" #include "galois/substrate/PerThreadStorage.h" #include "galois/substrate/ThreadPool.h" #include "galois/Threads.h" #include "galois/TwoLevelIterator.h" namespace galois { namespace { enum GlobalPos { GLOBAL_BEGIN, GLOBAL_END }; #define ADAPTOR_BASED_OUTER_ITER // XXX: use a combination of boost::transform_iterator and // boost::counting_iterator to implement the following OuterPerThreadWLIter #ifdef ADAPTOR_BASED_OUTER_ITER template struct WLindexer { typedef typename PerThrdCont::container_type Ret_ty; PerThrdCont* wl; WLindexer() : wl(NULL) {} WLindexer(PerThrdCont& _wl) : wl(&_wl) {} Ret_ty& operator()(unsigned i) const { assert(wl != NULL); assert(i < wl->numRows()); return const_cast(wl->get(i)); } }; template struct TypeFactory { typedef typename boost::transform_iterator, boost::counting_iterator> OuterIter; typedef typename std::reverse_iterator RvrsOuterIter; }; template typename TypeFactory::OuterIter make_outer_begin(PerThrdCont& wl) { return boost::make_transform_iterator(boost::counting_iterator(0), WLindexer(wl)); } template typename TypeFactory::OuterIter make_outer_end(PerThrdCont& wl) { return boost::make_transform_iterator( boost::counting_iterator(wl.numRows()), WLindexer(wl)); } template typename TypeFactory::RvrsOuterIter make_outer_rbegin(PerThrdCont& wl) { return typename TypeFactory::RvrsOuterIter(make_outer_end(wl)); } template typename TypeFactory::RvrsOuterIter make_outer_rend(PerThrdCont& wl) { return typename TypeFactory::RvrsOuterIter(make_outer_begin(wl)); } #else template class OuterPerThreadWLIter : public boost::iterator_facade, typename PerThrdCont::container_type, boost::random_access_traversal_tag> { using container_type = typename PerThrdCont::container_type; using Diff_ty = ptrdiff_t; friend class boost::iterator_core_access; PerThrdCont* workList; // using Diff_ty due to reverse iterator, whose // end is -1, and, begin is numRows - 1 Diff_ty row; void assertInRange() const { assert((row >= 0) && (row < workList->numRows())); } // container_type& getWL() { // assertInRange(); // return (*workList)[row]; // } container_type& getWL() const { assertInRange(); return (*workList)[row]; } public: OuterPerThreadWLIter() : workList(NULL), row(0) {} OuterPerThreadWLIter(PerThrdCont& wl, const GlobalPos& pos) : workList(&wl), row(0) { switch (pos) { case GLOBAL_BEGIN: row = 0; break; case GLOBAL_END: row = wl.numRows(); break; default: std::abort(); } } container_type& dereference(void) const { return getWL(); } // const container_type& dereference (void) const { // getWL (); // } void increment(void) { ++row; } void decrement(void) { --row; } bool equal(const OuterPerThreadWLIter& that) const { assert(this->workList == that.workList); return this->row == that.row; } void advance(ptrdiff_t n) { row += n; } Diff_ty distance_to(const OuterPerThreadWLIter& that) const { assert(this->workList == that.workList); return that.row - this->row; } }; template OuterPerThreadWLIter make_outer_begin(PerThrdCont& wl) { return OuterPerThreadWLIter(wl, GLOBAL_BEGIN); } template OuterPerThreadWLIter make_outer_end(PerThrdCont& wl) { return OuterPerThreadWLIter(wl, GLOBAL_END); } template std::reverse_iterator> make_outer_rbegin(PerThrdCont& wl) { typedef typename std::reverse_iterator> Ret_ty; return Ret_ty(make_outer_end(wl)); } template std::reverse_iterator> make_outer_rend(PerThrdCont& wl) { typedef typename std::reverse_iterator> Ret_ty; return Ret_ty(make_outer_begin(wl)); } #endif } // end namespace template class PerThreadContainer { public: typedef Cont_tp container_type; typedef typename container_type::value_type value_type; typedef typename container_type::reference reference; typedef typename container_type::pointer pointer; typedef typename container_type::size_type size_type; typedef typename container_type::iterator local_iterator; typedef typename container_type::const_iterator local_const_iterator; typedef typename container_type::reverse_iterator local_reverse_iterator; typedef typename container_type::const_reverse_iterator local_const_reverse_iterator; typedef PerThreadContainer This_ty; #ifdef ADAPTOR_BASED_OUTER_ITER typedef typename TypeFactory::OuterIter OuterIter; typedef typename TypeFactory::RvrsOuterIter RvrsOuterIter; #else typedef OuterPerThreadWLIter OuterIter; typedef typename std::reverse_iterator RvrsOuterIter; #endif typedef typename galois::ChooseStlTwoLevelIterator< OuterIter, typename container_type::iterator>::type global_iterator; typedef typename galois::ChooseStlTwoLevelIterator< OuterIter, typename container_type::const_iterator>::type global_const_iterator; typedef typename galois::ChooseStlTwoLevelIterator< RvrsOuterIter, typename container_type::reverse_iterator>::type global_reverse_iterator; typedef typename galois::ChooseStlTwoLevelIterator< RvrsOuterIter, typename container_type::const_reverse_iterator>::type global_const_reverse_iterator; typedef global_iterator iterator; typedef global_const_iterator const_iterator; typedef global_reverse_iterator reverse_iterator; typedef global_const_reverse_iterator const_reverse_iterator; private: // XXX: for testing only #if 0 struct FakePTS { std::vector v; FakePTS () { v.resize (size ()); } container_type** getLocal () const { return getRemote (galois::runtime::LL::getTID ()); } container_type** getRemote (size_t i) const { assert (i < v.size ()); return const_cast (&v[i]); } size_t size () const { return galois::runtime::LL::getMaxThreads(); } }; #endif // typedef FakePTS PerThrdCont_ty; typedef galois::substrate::PerThreadStorage PerThrdCont_ty; PerThrdCont_ty perThrdCont; void destroy() { for (unsigned i = 0; i < perThrdCont.size(); ++i) { delete *perThrdCont.getRemote(i); *perThrdCont.getRemote(i) = NULL; } } protected: PerThreadContainer() : perThrdCont() { for (unsigned i = 0; i < perThrdCont.size(); ++i) { *perThrdCont.getRemote(i) = NULL; } } template void init(Args&&... args) { for (unsigned i = 0; i < perThrdCont.size(); ++i) { *perThrdCont.getRemote(i) = new container_type(std::forward(args)...); } } ~PerThreadContainer() { clear_all_parallel(); destroy(); } public: unsigned numRows() const { return perThrdCont.size(); } container_type& get() { return **(perThrdCont.getLocal()); } const container_type& get() const { return **(perThrdCont.getLocal()); } container_type& get(unsigned i) { return **(perThrdCont.getRemote(i)); } const container_type& get(unsigned i) const { return **(perThrdCont.getRemote(i)); } container_type& operator[](unsigned i) { return get(i); } const container_type& operator[](unsigned i) const { return get(i); } global_iterator begin_all() { return galois::stl_two_level_begin(make_outer_begin(*this), make_outer_end(*this)); } global_iterator end_all() { return galois::stl_two_level_end(make_outer_begin(*this), make_outer_end(*this)); } global_const_iterator begin_all() const { return cbegin_all(); } global_const_iterator end_all() const { return cend_all(); } // for compatibility with Range.h global_iterator begin() { return begin_all(); } global_iterator end() { return end_all(); } global_const_iterator begin() const { return begin_all(); } global_const_iterator end() const { return end_all(); } global_const_iterator cbegin() const { return cbegin_all(); } global_const_iterator cend() const { return cend_all(); } global_const_iterator cbegin_all() const { return galois::stl_two_level_cbegin(make_outer_begin(*this), make_outer_end(*this)); } global_const_iterator cend_all() const { return galois::stl_two_level_cend(make_outer_begin(*this), make_outer_end(*this)); } global_reverse_iterator rbegin_all() { return galois::stl_two_level_rbegin(make_outer_rbegin(*this), make_outer_rend(*this)); } global_reverse_iterator rend_all() { return galois::stl_two_level_rend(make_outer_rbegin(*this), make_outer_rend(*this)); } global_const_reverse_iterator rbegin_all() const { return crbegin_all(); } global_const_reverse_iterator rend_all() const { return crend_all(); } global_const_reverse_iterator crbegin_all() const { return galois::stl_two_level_crbegin(make_outer_rbegin(*this), make_outer_rend(*this)); } global_const_reverse_iterator crend_all() const { return galois::stl_two_level_crend(make_outer_rbegin(*this), make_outer_rend(*this)); } local_iterator local_begin() { return get().begin(); } local_iterator local_end() { return get().end(); } // legacy STL local_const_iterator local_begin() const { return get().begin(); } local_const_iterator local_end() const { return get().end(); } local_const_iterator local_cbegin() const { return get().cbegin(); } local_const_iterator local_cend() const { return get().cend(); } local_reverse_iterator local_rbegin() { return get().rbegin(); } local_reverse_iterator local_rend() { return get().rend(); } local_const_reverse_iterator local_crbegin() const { return get().crbegin(); } local_const_reverse_iterator local_crend() const { return get().crend(); } size_type size_all() const { size_type sz = 0; for (unsigned i = 0; i < perThrdCont.size(); ++i) { sz += get(i).size(); } return sz; } // XXX: disabling because of per thread memory allocators // void clear_all() { // for (unsigned i = 0; i < perThrdCont.size(); ++i) { // get(i).clear(); // } // } void clear_all_parallel(void) { galois::runtime::on_each_gen( [this](const unsigned, const unsigned) { get().clear(); }, std::make_tuple()); } bool empty_all() const { bool res = true; for (unsigned i = 0; i < perThrdCont.size(); ++i) { res = res && get(i).empty(); } return res; } template void fill_parallel(const Range& range, Ret (container_type::*pushFn)(const value_type&) = &container_type::push_back) { galois::runtime::do_all_gen( range, [this, pushFn](const typename Range::value_type& v) { container_type& my = get(); (my.*pushFn)(v); // (get ().*pushFn)(v); }, std::make_tuple()); } }; template class PerThreadVector : public PerThreadContainer> { public: typedef typename gstl::template Pow2Alloc Alloc_ty; typedef typename gstl::template Vector container_type; protected: typedef PerThreadContainer Super_ty; Alloc_ty alloc; public: PerThreadVector() : Super_ty(), alloc() { Super_ty::init(alloc); } void reserve_all(size_t sz) { size_t numT = galois::getActiveThreads(); size_t perT = (sz + numT - 1) / numT; // round up for (unsigned i = 0; i < numT; ++i) { Super_ty::get(i).reserve(perT); } } }; template class PerThreadDeque : public PerThreadContainer> { public: typedef typename gstl::template Pow2Alloc Alloc_ty; protected: typedef typename gstl::template Deque container_type; typedef PerThreadContainer Super_ty; Alloc_ty alloc; public: PerThreadDeque() : Super_ty(), alloc() { Super_ty::init(alloc); } }; template class PerThreadGdeque : public PerThreadContainer> { using Super_ty = PerThreadContainer>; public: PerThreadGdeque() : Super_ty() { Super_ty::init(); } }; template class PerThreadList : public PerThreadContainer> { public: typedef typename gstl::template FixedSizeAlloc Alloc_ty; protected: typedef typename gstl::template List container_type; typedef PerThreadContainer Super_ty; Alloc_ty alloc; public: PerThreadList() : Super_ty(), alloc() { Super_ty::init(alloc); } }; template > class PerThreadMap : public PerThreadContainer> { public: typedef typename gstl::template Map container_type; typedef typename gstl::template FixedSizeAlloc< typename container_type::value_type> Alloc_ty; protected: typedef PerThreadContainer Super_ty; Alloc_ty alloc; public: explicit PerThreadMap(const C& cmp = C()) : Super_ty(), alloc() { Super_ty::init(cmp, alloc); } typedef typename Super_ty::global_const_iterator global_const_iterator; typedef typename Super_ty::global_const_reverse_iterator global_const_reverse_iterator; // hiding non-const (and const) versions in Super_ty global_const_iterator begin_all() const { return Super_ty::cbegin_all(); } global_const_iterator end_all() const { return Super_ty::cend_all(); } // hiding non-const (and const) versions in Super_ty global_const_reverse_iterator rbegin_all() const { return Super_ty::crbegin_all(); } global_const_reverse_iterator rend_all() const { return Super_ty::crend_all(); } }; template > class PerThreadSet : public PerThreadContainer> { public: typedef typename gstl::template FixedSizeAlloc Alloc_ty; protected: typedef typename gstl::template Set container_type; typedef PerThreadContainer Super_ty; Alloc_ty alloc; public: explicit PerThreadSet(const C& cmp = C()) : Super_ty(), alloc() { Super_ty::init(cmp, alloc); } typedef typename Super_ty::global_const_iterator global_const_iterator; typedef typename Super_ty::global_const_reverse_iterator global_const_reverse_iterator; // hiding non-const (and const) versions in Super_ty global_const_iterator begin_all() const { return Super_ty::cbegin_all(); } global_const_iterator end_all() const { return Super_ty::cend_all(); } // hiding non-const (and const) versions in Super_ty global_const_reverse_iterator rbegin_all() const { return Super_ty::crbegin_all(); } global_const_reverse_iterator rend_all() const { return Super_ty::crend_all(); } }; template > class PerThreadMinHeap : public PerThreadContainer> { public: typedef typename gstl::template Pow2Alloc Alloc_ty; protected: typedef typename gstl::template Vector Vec_ty; typedef typename gstl::template PQ container_type; typedef PerThreadContainer Super_ty; Alloc_ty alloc; public: explicit PerThreadMinHeap(const C& cmp = C()) : Super_ty(), alloc() { Super_ty::init(cmp, Vec_ty(alloc)); } typedef typename Super_ty::global_const_iterator global_const_iterator; typedef typename Super_ty::global_const_reverse_iterator global_const_reverse_iterator; // hiding non-const (and const) versions in Super_ty global_const_iterator begin_all() const { return Super_ty::cbegin_all(); } global_const_iterator end_all() const { return Super_ty::cend_all(); } // hiding non-const (and const) versions in Super_ty global_const_reverse_iterator rbegin_all() const { return Super_ty::crbegin_all(); } global_const_reverse_iterator rend_all() const { return Super_ty::crend_all(); } }; } // end namespace galois #endif // GALOIS_PERTHREADCONTAINER_H ================================================ FILE: libgalois/include/galois/PriorityQueue.h ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ #ifndef GALOIS_PRIORITYQUEUE_H #define GALOIS_PRIORITYQUEUE_H #include #include #include #include "galois/config.h" #include "galois/Mem.h" #include "galois/substrate/PaddedLock.h" #include "galois/substrate/CompilerSpecific.h" namespace galois { /** * Thread-safe ordered set. Faster than STL heap operations (about 10%-15% * faster on serially) and can use scalable allocation, e.g., {@link * FixedSizeAllocator}. */ template , typename Alloc = galois::FixedSizeAllocator> class ThreadSafeOrderedSet { typedef std::set Set; public: typedef Set container_type; typedef typename container_type::value_type value_type; typedef typename container_type::reference reference; typedef typename container_type::const_reference const_reference; typedef typename container_type::pointer pointer; typedef typename container_type::size_type size_type; typedef typename container_type::const_iterator iterator; typedef typename container_type::const_iterator const_iterator; typedef typename container_type::const_reverse_iterator reverse_iterator; typedef typename container_type::const_reverse_iterator const_reverse_iterator; typedef galois::substrate::SimpleLock Lock_ty; private: alignas(substrate::GALOIS_CACHE_LINE_SIZE) Lock_ty mutex; Set orderedSet; public: template , typename _Alloc = galois::FixedSizeAllocator<_T>> using retype = ThreadSafeOrderedSet<_T, _Cmp, _Alloc>; // FIXME: loses Alloc and Cmp types explicit ThreadSafeOrderedSet(const Cmp& cmp = Cmp(), const Alloc& alloc = Alloc()) : orderedSet(cmp, alloc) {} template ThreadSafeOrderedSet(Iter b, Iter e, const Cmp& cmp = Cmp(), const Alloc& alloc = Alloc()) : orderedSet(cmp, alloc) { for (; b != e; ++b) { orderedSet.insert(*b); } } bool empty() const { mutex.lock(); bool ret = orderedSet.empty(); mutex.unlock(); return ret; } size_type size() const { mutex.lock(); size_type sz = orderedSet.size(); mutex.unlock(); return sz; } value_type top() const { mutex.lock(); value_type x = *orderedSet.begin(); mutex.unlock(); return x; } bool find(const value_type& x) const { mutex.lock(); bool ret = (orderedSet.find(x) != orderedSet.end()); mutex.unlock(); return ret; } // for compatibility with various stl types inline void push_back(const value_type& x) { this->push(x); } inline void insert(const value_type& x) { this->push(x); } bool push(const value_type& x) { mutex.lock(); auto p = orderedSet.insert(x); mutex.unlock(); return p.second; } value_type pop() { mutex.lock(); value_type x = *orderedSet.begin(); orderedSet.erase(orderedSet.begin()); mutex.unlock(); return x; } bool remove(const value_type& x) { mutex.lock(); bool ret = false; if (x == *orderedSet.begin()) { orderedSet.erase(orderedSet.begin()); ret = true; } else { size_type s = orderedSet.erase(x); ret = (s > 0); } mutex.unlock(); return ret; } void clear() { mutex.lock(); orderedSet.clear(); mutex.unlock(); } const_iterator begin() const { return orderedSet.begin(); } const_iterator end() const { return orderedSet.end(); } }; template , typename Cont = std::vector>> class MinHeap { public: typedef runtime::Pow_2_BlockAllocator alloc_type; typedef Cont container_type; typedef typename container_type::value_type value_type; typedef typename container_type::reference reference; typedef typename container_type::const_reference const_reference; typedef typename container_type::pointer pointer; typedef typename container_type::size_type size_type; typedef typename container_type::const_iterator iterator; typedef typename container_type::const_iterator const_iterator; typedef typename container_type::const_reverse_iterator reverse_iterator; typedef typename container_type::const_reverse_iterator const_reverse_iterator; // typedef typename container_type::const_iterator iterator; protected: struct RevCmp { Cmp cmp; explicit RevCmp(const Cmp& cmp) : cmp(cmp) {} bool operator()(const T& left, const T& right) const { return cmp(right, left); } }; Cont container; RevCmp revCmp; const_reference top_internal() const { assert(!container.empty()); return container.front(); } value_type pop_internal() { assert(!container.empty()); std::pop_heap(container.begin(), container.end(), revCmp); value_type x = container.back(); container.pop_back(); return x; } public: explicit MinHeap(const Cmp& cmp = Cmp(), const Cont& container = Cont()) : container(container), revCmp(cmp) {} template MinHeap(Iter b, Iter e, const Cmp& cmp = Cmp()) : container(b, e), revCmp(cmp) { std::make_heap(container.begin(), container.end()); } bool empty() const { return container.empty(); } size_type size() const { return container.size(); } const_reference top() const { return container.front(); } // for compatibility with various stl types inline void push_back(const value_type& x) { this->push(x); } inline void insert(const value_type& x) { this->push(x); } void push(const value_type& x) { container.push_back(x); std::push_heap(container.begin(), container.end(), revCmp); } value_type pop() { assert(!container.empty()); std::pop_heap(container.begin(), container.end(), revCmp); value_type x = container.back(); container.pop_back(); return x; } bool remove(const value_type& x) { bool ret = false; // TODO: write a better remove method if (x == top()) { pop(); ret = true; } else { typename container_type::iterator nend = std::remove(container.begin(), container.end(), x); ret = (nend != container.end()); container.erase(nend, container.end()); std::make_heap(container.begin(), container.end(), revCmp); } return ret; } bool find(const value_type& x) const { return (std::find(begin(), end(), x) != end()); } void clear() { container.clear(); } const_iterator begin() const { return container.begin(); } const_iterator end() const { return container.end(); } void reserve(size_type s) { container.reserve(s); } }; /** * Thread-safe min heap. */ template > class ThreadSafeMinHeap { public: typedef MinHeap container_type; typedef typename container_type::value_type value_type; typedef typename container_type::reference reference; typedef typename container_type::const_reference const_reference; typedef typename container_type::pointer pointer; typedef typename container_type::size_type size_type; typedef typename container_type::const_iterator iterator; typedef typename container_type::const_iterator const_iterator; typedef typename container_type::const_reverse_iterator reverse_iterator; typedef typename container_type::const_reverse_iterator const_reverse_iterator; protected: typedef galois::substrate::SimpleLock Lock_ty; alignas(substrate::GALOIS_CACHE_LINE_SIZE) Lock_ty mutex; container_type heap; public: explicit ThreadSafeMinHeap(const Cmp& cmp = Cmp()) : heap(cmp) {} template ThreadSafeMinHeap(Iter b, Iter e, const Cmp& cmp = Cmp()) : heap(b, e, cmp) {} bool empty() const { mutex.lock(); bool ret = heap.empty(); mutex.unlock(); return ret; } size_type size() const { mutex.lock(); size_type sz = heap.size(); mutex.unlock(); return sz; } // can't return a reference, because the reference may not be pointing // to a valid location due to vector doubling in size and moving to // another memory location value_type top() const { mutex.lock(); value_type x = heap.top(); mutex.unlock(); return x; } // for compatibility with various stl types inline void push_back(const value_type& x) { this->push(x); } inline void insert(const value_type& x) { this->push(x); } void push(const value_type& x) { mutex.lock(); heap.push(x); mutex.unlock(); } value_type pop() { mutex.lock(); value_type x = heap.pop(); mutex.unlock(); return x; } bool remove(const value_type& x) { // TODO: write a better remove method mutex.lock(); bool ret = heap.remove(x); mutex.unlock(); return ret; } bool find(const value_type& x) const { mutex.lock(); bool ret = heap.find(x); mutex.unlock(); return ret; } void clear() { mutex.lock(); heap.clear(); mutex.unlock(); } // TODO: can't use in parallel context const_iterator begin() const { return heap.begin(); } const_iterator end() const { return heap.end(); } void reserve(size_type s) { heap.reserve(s); } }; } // namespace galois #endif ================================================ FILE: libgalois/include/galois/Reduction.h ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ #ifndef GALOIS_REDUCTION_H #define GALOIS_REDUCTION_H #include #include #include "galois/config.h" #include "galois/substrate/PerThreadStorage.h" namespace galois { /** * A Reducible stores per-thread values of a variable of type T and merges * multiple values into one. * * The reduced value is obtained by merging per thread values using the binary * functor MergeFunc. MergeFunc takes two values of type T and produces the * resulting merged value: * * T operator()(T lhs, T rhs) * * If T is expensive to copy, a moving merge function is more appropriate: * * T& operator()(T& lhs, T&& rhs) * * IdFunc returns the identity element, which is used to initialize and reset * the per thread values. * * Both MergeFunc and IdFunc should be copy constructable. * * The MergeFunc and IdFunc should be related as follows: * * MergeFunc(x, IdFunc()) == x for all x in X * * An example of using a move merge function: * * auto merge_func = [](T& lhs, T&& rhs) -> T& { ... } * auto identity_func = []() -> T { ... } * * auto r = make_reducible(merge_func, identity_func); * T u = ... * r.update(std::move(u)); * T& result = r.reduce(); */ template class Reducible : public MergeFunc, public IdFunc { galois::substrate::PerThreadStorage data_; void merge(T& lhs, T&& rhs) { T v{std::move(MergeFunc::operator()(lhs, std::move(rhs)))}; lhs = std::move(v); } void merge(T& lhs, const T& rhs) { lhs = MergeFunc::operator()(lhs, rhs); } public: using value_type = T; Reducible(MergeFunc merge_func, IdFunc id_func) : MergeFunc(merge_func), IdFunc(id_func) { for (unsigned i = 0; i < data_.size(); ++i) { *(data_.getRemote(i)) = IdFunc::operator()(); } } /** * Updates the thread local value by applying the reduction operator to * current and newly provided value */ void update(T&& rhs) { merge(*data_.getLocal(), std::move(rhs)); } void update(const T& rhs) { merge(*data_.getLocal(), rhs); } /** * Returns a reference to the local value of T. */ T& getLocal() { return *data_.getLocal(); } /** * Returns the final reduction value. Only valid outside the parallel region. */ T& reduce() { T& lhs = *data_.getLocal(); for (unsigned int i = 1; i < data_.size(); ++i) { T& rhs = *data_.getRemote(i); merge(lhs, std::move(rhs)); rhs = IdFunc::operator()(); } return lhs; } void reset() { for (unsigned int i = 0; i < data_.size(); ++i) { *data_.getRemote(i) = IdFunc::operator()(); } } }; /** * make_reducible creates a Reducible from a merge function and identity * function. */ template auto make_reducible(const MergeFn& mergeFn, const IdFn& idFn) { return Reducible, MergeFn, IdFn>(mergeFn, idFn); } //! gmax is the functional form of std::max template struct gmax { constexpr T operator()(const T& lhs, const T& rhs) const { return std::max(lhs, rhs); } }; //! gmax is the functional form of std::max template struct gmin { constexpr T operator()(const T& lhs, const T& rhs) const { return std::min(lhs, rhs); } }; template struct identity_value { constexpr T operator()() const { return T{value}; } }; // The following identity_value specializations exist because floating point // numbers cannot be template arguments. template struct identity_value_zero { constexpr T operator()() const { return T{0}; } }; template struct identity_value_min { constexpr T operator()() const { return std::numeric_limits::min(); } }; template struct identity_value_max { constexpr T operator()() const { return std::numeric_limits::max(); } }; //! Accumulator for T where accumulation is plus template class GAccumulator : public Reducible, identity_value_zero> { using base_type = Reducible, identity_value_zero>; public: GAccumulator() : base_type(std::plus(), identity_value_zero()) {} GAccumulator& operator+=(const T& rhs) { base_type::update(rhs); return *this; } GAccumulator& operator-=(const T& rhs) { base_type::update(rhs); return *this; } }; //! Accumulator for T where accumulation is max template class GReduceMax : public Reducible, identity_value_min> { using base_type = Reducible, identity_value_min>; public: GReduceMax() : base_type(gmax(), identity_value_min()) {} }; //! Accumulator for T where accumulation is min template class GReduceMin : public Reducible, identity_value_max> { using base_type = Reducible, identity_value_max>; public: GReduceMin() : base_type(gmin(), identity_value_max()) {} }; //! logical AND reduction class GReduceLogicalAnd : public Reducible, identity_value> { using base_type = Reducible, identity_value>; public: GReduceLogicalAnd() : base_type(std::logical_and(), identity_value()) {} }; //! logical OR reduction class GReduceLogicalOr : public Reducible, identity_value> { using base_type = Reducible, identity_value>; public: GReduceLogicalOr() : base_type(std::logical_or(), identity_value()) {} }; } // namespace galois #endif // GALOIS_REDUCTION_H ================================================ FILE: libgalois/include/galois/SharedMemSys.h ================================================ #ifndef GALOIS_SHAREDMEMSYS_H #define GALOIS_SHAREDMEMSYS_H #include "galois/config.h" #include "galois/runtime/SharedMem.h" namespace galois { /** * SharedMemSys is an explicit class to initialize the Galois runtime. The * runtime is destroyed when this object is destroyed. */ class SharedMemSys : public runtime::SharedMem { public: explicit SharedMemSys(); ~SharedMemSys(); SharedMemSys(const SharedMemSys&) = delete; SharedMemSys& operator=(const SharedMemSys&) = delete; SharedMemSys(SharedMemSys&&) = delete; SharedMemSys& operator=(SharedMemSys&&) = delete; }; } // namespace galois #endif ================================================ FILE: libgalois/include/galois/Threads.h ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ #ifndef GALOIS_THREADS_H #define GALOIS_THREADS_H #include "galois/config.h" namespace galois { /** * Sets the number of threads to use when running any Galois iterator. Returns * the actual value of threads used, which could be less than the requested * value. System behavior is undefined if this function is called during * parallel execution or after the first parallel execution. */ unsigned int setActiveThreads(unsigned int num) noexcept; /** * Returns the number of threads in use. */ unsigned int getActiveThreads() noexcept; } // namespace galois #endif ================================================ FILE: libgalois/include/galois/Timer.h ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ #ifndef GALOIS_TIMER_H #define GALOIS_TIMER_H #include #include "galois/config.h" #include "galois/gstl.h" namespace galois { //! A simple timer class Timer { typedef std::chrono::steady_clock clockTy; // typedef std::chrono::high_resolution_clock clockTy; std::chrono::time_point startT, stopT; public: void start(); void stop(); uint64_t get() const; uint64_t get_usec() const; }; //! A multi-start time accumulator. //! Gives the final runtime for a series of intervals class TimeAccumulator { Timer ltimer; uint64_t acc; public: TimeAccumulator(); void start(); //! adds the current timed interval to the total void stop(); uint64_t get() const; uint64_t get_usec() const; TimeAccumulator& operator+=(const TimeAccumulator& rhs); TimeAccumulator& operator+=(const Timer& rhs); }; //! Galois Timer that automatically reports stats upon destruction //! Provides statistic interface around timer class StatTimer : public TimeAccumulator { gstl::Str name_; gstl::Str region_; bool valid_; public: StatTimer(const char* name, const char* region); StatTimer(const char* const n) : StatTimer(n, nullptr) {} StatTimer() : StatTimer(nullptr, nullptr) {} StatTimer(const StatTimer&) = delete; StatTimer(StatTimer&&) = delete; StatTimer& operator=(const StatTimer&) = delete; StatTimer& operator=(StatTimer&&) = delete; ~StatTimer(); void start(); void stop(); uint64_t get_usec() const; }; template class CondStatTimer : public StatTimer { public: CondStatTimer(const char* const n, const char* region) : StatTimer(n, region) {} CondStatTimer(const char* region) : CondStatTimer("Time", region) {} }; template <> class CondStatTimer { public: CondStatTimer(const char*) {} CondStatTimer(const char* const, const char*) {} void start() const {} void stop() const {} uint64_t get_usec() const { return 0; } }; template void timeThis(const F& f, const char* const name) { StatTimer t("Time", name); t.start(); f(); t.stop(); } } // end namespace galois #endif ================================================ FILE: libgalois/include/galois/Traits.h ================================================ /* * This file belongs to the Galois project, a C++ library for exploiting * parallelism. The code is being released under the terms of the 3-Clause BSD * License (a copy is located in LICENSE.txt at the top-level directory). * * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances * shall University be liable for incidental, special, indirect, direct or * consequential damages or loss of profits, interruption of business, or * related expenses which may arise from use of Software or Documentation, * including but not limited to those resulting from defects in Software and/or * Documentation, or loss or inaccuracy of data of any kind. */ #ifndef GALOIS_TRAITS_H #define GALOIS_TRAITS_H #include #include #include "galois/config.h" #include "galois/worklists/WorkList.h" namespace galois { // Trait classifications template struct trait_has_type { typedef T type; }; template struct trait_has_value { typedef T type; type value; trait_has_value(const type& v) : value(v) {} trait_has_value(type&& v) : value(std::move(v)) {} T getValue() const { return value; } }; template struct trait_has_svalue { typedef T type; static const type value = V; T getValue() const { return V; } }; /** * Utility function to simplify creating traits that take unnamed functions * (i.e., lambdas). */ template