Showing preview only (3,837K chars total). Download the full file or copy to clipboard to get everything.
Repository: luckiezhou/DynamicTriad
Branch: master
Commit: 06ab0d8dd566
Files: 65
Total size: 3.6 MB
Directory structure:
gitextract_6ehqdt73/
├── .gitignore
├── LICENSE
├── README.md
├── __main__.py
├── build.sh
├── core/
│ ├── __init__.py
│ ├── algorithm/
│ │ ├── CMakeLists.txt
│ │ ├── __init__.py
│ │ ├── boost_python_omp.h
│ │ ├── cmake/
│ │ │ └── FindNumPy.cmake
│ │ ├── dynamic_triad.py
│ │ ├── dynamic_triad_cimpl.cpp
│ │ ├── embutils.py
│ │ └── samplers/
│ │ ├── __init__.py
│ │ ├── pos_neg.py
│ │ ├── pos_neg_tri.py
│ │ └── sampler.py
│ ├── cython_src/
│ │ ├── README.txt
│ │ └── utils_cy.pyx
│ ├── dataset/
│ │ ├── __init__.py
│ │ ├── adjlist.py
│ │ ├── citation.py
│ │ └── dataset_utils.py
│ ├── gconfig.py
│ ├── gconv.py
│ ├── graph/
│ │ ├── CMakeLists.txt
│ │ ├── CMakeLists.txt.user
│ │ ├── _test_graph.cpp
│ │ ├── defs.h
│ │ ├── exception.cpp
│ │ ├── exception.h
│ │ ├── graph.cpp
│ │ ├── graph.h
│ │ ├── graph_pywrapper.cpp
│ │ ├── graph_pywrapper.h
│ │ ├── ioutils.cpp
│ │ ├── ioutils.h
│ │ ├── nodemap.cpp
│ │ ├── nodemap.h
│ │ ├── nodeset.cpp
│ │ ├── nodeset.h
│ │ ├── types.cpp
│ │ ├── types.h
│ │ ├── utils.cpp
│ │ └── utils.h
│ ├── graphtool_utils.py
│ ├── kerasext/
│ │ ├── __init__.py
│ │ ├── debug/
│ │ │ ├── __init__.py
│ │ │ └── finite_number_check.py
│ │ └── keras_backend_patches/
│ │ ├── __init__.py
│ │ ├── tensorflow_patches.py
│ │ └── theano_patches.py
│ ├── mygraph_utils.py
│ ├── utils.py
│ └── utils_py.py
├── data/
│ └── academic_toy.pickle
├── docs/
│ ├── README.md
│ ├── _config.yml
│ └── _layouts/
│ └── default.html
├── requirements.txt
└── scripts/
├── academic2adjlist.py
├── demo.sh
├── demo_raw.sh
├── stdtests.py
└── test.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# dotenv
.env
# virtualenv
.venv
venv/
ENV/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
================================================
FILE: LICENSE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: README.md
================================================
# DynamicTriad
This project implements the DynamicTriad algorithm proposed in [1], which is a node embedding algorithm for undirected dynamic graphs.
## Quick Links
- [Building and Testing](#building-and-testing)
- [Usage](#usage)
- [Performance](#performance)
- [Reference](#reference)
## Building and Testing
This project is implemented primarily in Python 2.7, with some c/c++ extensions written for time efficiency.
Though the program falls back to pure Python implementation if c/c++ extensions fail to build, we **DISCOURAGE** you from using these code because they might have not been actively maintained and properly tested.
The c/c++ code is **ONLY** compiled and tested with standard GNU gcc/g++ compilers (with c++11 and OpenMP support), and other compilers are explicitly disabled in our build scripts. If you have to use another compiler, modifications on build scripts are required.
### Dependencies
- [Boost.Python](https://www.boost.org/doc/libs/release/libs/python/). Version 1.54.0 has been tested. You can find instructions to install from source [here](http://www.boost.org/doc/libs/1_65_1/libs/python/doc/html/building/installing_boost_python_on_your_.html).
- [CMake](https://cmake.org).
Version >= 2.8 required. You can find installation instructions [here](https://cmake.org/install/).
- [Eigen 3](https://eigen.tuxfamily.org/).
Version 3.2.8 has been tested, and later versions are expected to be compatible. You can find installation instructions [here](https://eigen.tuxfamily.org/dox/GettingStarted.html).
- [Python 2.7](https://www.python.org).
Version 2.7.13 has been tested. Note that Python development headers are required to build the c/c++ extensions.
- [graph-tool](https://graph-tool.skewed.de).
Version 2.18 has been tested. You can find installation instructions [here](https://git.skewed.de/count0/graph-tool/wikis/installation-instructions).
- [TensorFlow](https://www.tensorflow.org). Version 1.1.0 has been tested. You can find installation instructions [here](https://www.tensorflow.org/install/). Note that the GPU support is **ENCOURAGED** as it greatly boosts training efficiency.
- [Other Python modules](https://pypi.python.org). Some other Python module dependencies are listed in ```requirements.txt```, which can be easily installed with pip:
```
pip install -r requirements.txt
```
Although not necessarily mentioned in all the installation instruction links above, you can find most of the libraries in the package repository of a regular Linux distribution.
### Building the Project
Before building the project, we recommend switching the working directory to the project root directory. Assume the project root is at ``<dynamic_triad_root>``, then run command
```
cd <dynamic_triad_root>
```
Note that we assume ``<dynamic_triad_root>`` as your working directory in all the commands presented in the rest of this documentation.
A building script ```build.sh``` is available in the root directory of this project, simplifying the building process to executing a single command
```
bash build.sh
```
Before running the actual building commands, the script requires you to configure some of the environment variables. You can either use the default values or specify your custom installation paths for certain libraries. For example,
```
PYTHON_LIBRARY? (default: /usr/lib64/libpython2.7.so.1.0, use a space ' ' to leave it empty)
PYTHON_INCLUDE_DIR? (default: /usr/include/python2.7, use a space ' ' to leave it empty)
EIGEN3_INCLUDE_DIR? (default: /usr/include, use a space ' ' to leave it empty)
BOOST_ROOT? (default: , use a space ' ' to leave it empty) ~/boost_1_54_1
```
If everything goes well, the ```build.sh``` script will automate the building process and create all necessary binaries.
Note that the project also contains some Cython modules, however, they will be automatically built as soon as the module is imported if the environment is ready.
### Testing the Project
A test script ```scripts/test.py``` is available, run
```
python scripts/test.py
```
to see if everything is fine with building.
## Usage
Given a sequence of undirected graphs, each for a time step, this program can be used to compute a real-valued vector for each vertex at each time step.
### Input Format
The input is expected to be a directory containing ``N`` input files named ``0, 1, 2...``, where `N` is the length of the graph sequence. Each file contains an adjacency list of the corresponding graph, and the adjacency list consists of multiple lines, each in the format:
```
<from_node_name> [<to_node_name1> <weight1> [<to_node_name2> <weight2> ...] ]
```
where ``x_node_name`` can be any ascii string without white space characters in it, and ``weight`` are float or integer values. The line describes edges from ``from_node_name`` to ``to_node_name1`` and ``to_node_name2`` with weight ``weight1`` and ``weight2`` respectively.
Note that:
- The graph is expected to be undirected, however, it should be presented in a **directed format**. That is, if there is an edge (u, v, w), its reciprocal edge (v, u, w) must also exists in the adjacency list.
- The vertex set should be same for all graphs, if a vertex is missing in a certain graph, simply present it as an isolated vertex.
- If a vertex has no outbound vertices, you should write a line with only the ``from_node_name``, instead of ignoring this vertex.
- If the graph is unweighted, place a ``1.0`` for all ``weight`` placeholders, rather than ignoring all weights in the adjacency list.
- Loopback edges (u, u, w) will be ignored when the adjacency list is loaded.
### Output Format
The program outputs to a directory creating ``N`` files named ``0.out, 1.out, 2.out, ...``, each corresponds to an input file (time step). Each output file contains ``V`` lines, where ``V`` is the number of vertices in each graph. And each line is in format:
```
<node_name> <r1> <r2> ... <rK>
```
where ``<node_name>`` is the name of the vertex defined in the input files, which is followed by ``K`` real values, i.e. the ``K``-length embedding vector for vertex ``<node_name>`` at the corresponding time step.
### Main Script
Now that the input data is ready, the main script will be called to compute dynamic vertex embeddings. Following the assumption that the current working directory is ``<dynamic_triad_root>``, the help information of the main script can be obtain by executing command
```
python . -h
usage: . [-h] [-I NITERS] -d DATAFILE [-b BATCHSIZE] -n NSTEPS
[-K EMBDIM] [-l STEPSIZE] [-s STEPSTRIDE] -o OUTDIR
[--cachefn CACHEFN] [--lr LR] [--beta BETA [BETA ...]]
[--negdup NEGDUP] [--validation VALIDATION]
optional arguments:
-h, --help show this help message and exit
-I NITERS, --niters NITERS
number of optimization iterations (default: 10)
-d DATAFILE, --datafile DATAFILE
input directory name (default: None)
-b BATCHSIZE, --batchsize BATCHSIZE
batchsize for training (default: 5000)
-n NSTEPS, --nsteps NSTEPS
number of time steps (default: None)
-K EMBDIM, --embdim EMBDIM
number of embedding dimensions (default: 48)
-l STEPSIZE, --stepsize STEPSIZE
size of of a time steps (default: 1)
-s STEPSTRIDE, --stepstride STEPSTRIDE
interval between two time steps (default: 1)
-o OUTDIR, --outdir OUTDIR
output directory name (default: None)
--cachefn CACHEFN prefix for data cache files (default: None)
--lr LR initial learning rate (default: 0.1)
--beta-smooth BETA_SMOOTH
coefficients for smooth component (default: 0.1)
--beta-triad BETA_TRIAD
coefficients for triad component (default: 0.1)
--negdup NEGDUP neg/pos ratio during sampling (default: 1)
--validation VALIDATION
link_prediction, link_reconstruction, node_classify,
node_predict, none (default: link_reconstruction)
```
Some of the arguments may require extra explanation:
- ``--beta-smooth/--beta-triad``, two hyper parameters used in the model, see reference [1] for details about the hyper parameters of DynamicTriad. Empirically, the hyper parameters need to be tuned in order to achieve the best performance, and the best choice depends on the task and the stability of the target dynamic network.
- ``-l/--stepsize`` and ``-s/--stepstride``, see [Time Model](#time-model) for details.
- ``--cachefn``, sometimes you find that the data preprocessing becomes intolerably time consuming (see [Time Model](#time-model)), and a solution is to specify ``--cachefn`` so that the program creates or uses a cache file of the preprocessed data. The cache file consists of two parts -- a file named ``<--cachefile>.cache`` as well as a file named ``<--cachefile>.cache.args``. If you have changed your configuration for preprocessing, remove ``<--cachefile>.cache.args`` and the cache will be regenerated.
- ``--validation``, the four tasks available for validation are as defined in [1], please refer to the paper for details.
### Demo
We include a toy data set in the ``data`` directory, namely ``data/academic_toy.pickle``, which is a subset of ``Academic`` data set in [1] stored using Python pickle module. See [Data Sets](#data-sets) for more details.
A demo script is available as ``scripts/demo.sh``, which primarily does three things:
- Call ``scripts/academic2adjlist.py`` to convert the toy data to the input format described in [Input Format](#input-format).
- Call the main script to compute the vertex embeddings and save them to ``output`` directory.
- Call ``scripts/stdtest.py`` to experiment on standard tasks described in paper [1].
In the demo script, you can find an example for the standard usage of the main script, as well as hints for the usage of the other two scripts, if you are interested in them.
To run the demo, execute command
```
bash scripts/demo.sh
```
### Time Model
TL;DR: If you would like the main script to treat your graphs exactly as they are specified in your input files, please leave the arguments ``-l`` and ``-s`` to their default values.
For flexibility, a part of the data preprocessing functionalities are included into our main script. Specifically, if we call each graph file in the input directory a **unit graph**, our main script provides interfaces to create the graph for each time step out of these unit graphs.
Before describing this preprocessing step, we shall first define a **time step**. According to our assumption, a time step consists of ``<stepsize>`` consecutive unit graphs, where ``<stepsize>`` is a constant value shared across all time steps. There are ``<stepstride> - 1`` unit graphs between the leading unit graphs of two adjacent time steps, where ``<stepstride>`` is also a constant value. For example, we set ``<stepsize>=4`` and ``<stepstride>=2`` in our demo script, as a result, the time steps are:
```
time step #1: unit graph 0 -- unit graph 3
time step #2: unit graph 2 -- unit graph 5
time step #3: unit graph 4 -- unit graph 7
...
```
Once ``<stepsize>`` and ``<stepstride>`` are given, each time step now corresponds to a subsequence of unit graphs, and the graph for this time step is created by merging these unit graphs, i.e. by summing up weights for the same edge.
Note that if you set both ``<stepsize>`` and ``<stepstride>`` to 1, the graphs will be used as is specified in the input directory. If the merging operation is found very time expensive, specifying a ``<--cachefile>`` avoids re-merging everytime you run the script, as long as the data configuration is kept unchanged.
## Evaluation
### Data Sets
One out of the three data sets reported in [1] -- the Academic Data Set -- was made public by [AMiner](https://www.aminer.cn/citation), which consists of information about papers published in a recent few decades. We keep only those papers published between 1980 and 2015 (included), and we remove from the data those researchers with less than 15 publications in total and conferences with less than 20 participants in total, so that the resulting dynamic network becomes more stable.
In this data set, labels are extracted for each researcher indicating the research fields he/she focuses on. We manually specify a set of representing conferences for each research field, and try to find out for a researcher in which field he/she publishes most of his/her work, given a certain time step.
A toy data is included in this project as ``data/academic_toy.pickle``, which was originally the ``ACM-Citation-network V8`` data set from AMiner, and was preprocessed as we describe above, with the only difference that the vertices are further sampled to a limited size of 2000. And our full preprocessing result can be downloaded [here](https://drive.google.com/file/d/1AF5soBDb2AbAhCNKUeYa_om6IcldEU83/view?usp=sharing).
__Update__: For those who are interested in the academic dataset and wish to avoid the bothering building process, the dataset in clean format is released [here](https://drive.google.com/file/d/1vzvVhZ-FIY3iY3nBQlW77GRfJO0o_Ugg/view?usp=sharing) (Please cite the [original publisher](https://www.aminer.cn/citation) of the data if you wish use the dataset). See readme.txt in the package for detailed information, and feel free to contact me if there are anything wrong or unclarified in the data.
### Performance
As reported in [1], the performance of DynamicTriad on Academic Data Set with embedding dimension set to 48 is:
| F1-score on Academic | Vertex Classification | Link Reconstruction | C.Link Reconstruction |
|----------------------------|-----------------------|---------------------|------------------------|
| [DeepWalk](https://github.com/phanein/deepwalk) | 0.630 | 0.694 | 0.702 |
| [node2vec]( https://github.com/aditya-grover/node2vec) | 0.359 | 0.574 | 0.611 |
| [Temporal Network Embedding](https://github.com/linhongseba/Temporal-Network-Embedding) | 0.625 | 0.974 | 0.899 |
| DynamicTriad | **0.704** | **0.985** | **0.925** |
| F1-score on Academic | Vertex Prediction | Link Prediction | C.Link Prediction |
|----------------------------|-----------------------|---------------------|------------------------|
| [DeepWalk](https://github.com/phanein/deepwalk) | 0.591 | 0.612 | 0.674 |
| [node2vec]( https://github.com/aditya-grover/node2vec) | 0.355 | 0.548 | 0.617 |
| [Temporal Network Embedding](https://github.com/linhongseba/Temporal-Network-Embedding) | 0.596 | 0.772 | 0.889 |
| DynamicTriad | **0.671** | **0.836** | **0.924** |
Please refer to [1] for more information about our experiments, where you can find the definition of tasks, the experimental settings, the description of unpublished data sets and the full results of our experiments.
## Reference
[1] Zhou, L; Yang, Y; Ren, X; Wu, F and Zhuang, Y, 2018, Dynamic Network Embedding by Modelling Triadic Closure Process, In AAAI, 2018
```
@inproceedings{zhou2018dynamic,
title = "{Dynamic Network Embedding by Modelling Triadic Closure Process}",
author = {{Zhou}, L. and {Yang}, Y. and {Ren}, X. and {Wu}, F. and {Zhuang}, Y.},
booktitle={AAAI},
year = 2018,
}
```
================================================
FILE: __main__.py
================================================
from __future__ import print_function
from core import main
main()
================================================
FILE: build.sh
================================================
pushd () {
command pushd "$@" > /dev/null
}
popd () {
command popd "$@" > /dev/null
}
set -e
filedir=$(dirname $(readlink -f $0))
echo entering $filedir
pushd $filedir
ask() {
local ret
echo -n "$1 (default: $2, use a space ' ' to leave it empty) " 1>&2
read ret
if [ -z "$ret" ]; then
ret=$2
elif [ "$ret" == " " ]; then
ret=""
fi
echo $ret
}
echo "You may need to specify some environments before building"
pylib=$(python -c "from distutils.sysconfig import get_config_var; print('{}/{}'.format(get_config_var('LIBDIR'), get_config_var('INSTSONAME')))")
pylib=$(ask "PYTHON_LIBRARY?" $pylib)
export PYTHON_LIBRARY=$pylib
pyinc=$(python -c "from distutils.sysconfig import get_config_var; print(get_config_var('INCLUDEPY'))")
pyinc=$(ask "PYTHON_INCLUDE_DIR?" $pyinc)
export PYTHON_INCLUDE_DIR=$pyinc
eigeninc=$(ask "EIGEN3_INCLUDE_DIR?" /usr/include)
export EIGEN3_INCLUDE_DIR=$eigeninc
boostroot=$(ask "BOOST_ROOT?" "")
export BOOST_ROOT=$boostroot
boost_pylib=$(ask "name for boost_python library? (useful when boost_python cannot be detected by cmake)" "boost_python")
export BOOST_PYTHON_LIBNAME=$boost_pylib
echo building mygraph module ...
rm -rf core/mygraph-build
mkdir -p core/mygraph-build
pushd core/mygraph-build
cmake ../graph
make && ln -sf mygraph-build/mygraph.so ../mygraph.so
popd
echo building c extensions for dynamic triad ...
pushd core/algorithm
rm -rf build && mkdir build && cd build
cmake ..
make && make install
popd
popd
================================================
FILE: core/__init__.py
================================================
from __future__ import print_function
from os import sys, path
sys.path.append(path.dirname(path.dirname(path.abspath(__file__))))
def main():
import sys
from six.moves import cPickle
import argparse
import importlib
import time
from os.path import isfile
import dataset.dataset_utils as du
import algorithm.embutils as eu
# random.seed(977) # for reproducability
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('-I', '--niters', type=int, help="number of optimization iterations", default=10)
parser.add_argument('-m', '--starttime', type=str, help=argparse.SUPPRESS, default=0)
parser.add_argument('-d', '--datafile', type=str, required=True, help='input directory name')
parser.add_argument('-b', '--batchsize', type=int, help="batchsize for training", default=5000)
parser.add_argument('-n', '--nsteps', type=int, help="number of time steps", required=True)
parser.add_argument('-K', '--embdim', type=int, help="number of embedding dimensions", default=48)
parser.add_argument('-l', '--stepsize', type=int, help="size of of a time steps", default=1)
parser.add_argument('-s', '--stepstride', type=int, help="interval between two time steps", default=1)
parser.add_argument('-o', '--outdir', type=str, required=True, help="output directory name")
parser.add_argument('--cachefn', type=str, help="prefix for data cache files", default=None)
parser.add_argument('--lr', type=float, help="initial learning rate", default=0.1)
parser.add_argument('--beta-smooth', type=float, default=0.1, help="coefficients for smooth component")
parser.add_argument('--beta-triad', type=float, default=0.1, help="coefficients for triad component")
parser.add_argument('--negdup', type=int, help="neg/pos ratio during sampling", default=1)
parser.add_argument('--datasetmod', type=str, help=argparse.SUPPRESS, default='core.dataset.adjlist',
# help='module name for dataset loading',
)
# parser.add_argument('--dataname', type=str, default=None, help='name for the current data file')
parser.add_argument('--validation', type=str, default='link_reconstruction',
help=', '.join(list(sorted(set(du.TestSampler.tasks) & set(eu.Validator.tasks)))))
args = parser.parse_args()
args.beta = [args.beta_smooth, args.beta_triad]
# some fixed arguments in published code
args.pretrain_size = args.nsteps
args.trainmod = 'core.algorithm.dynamic_triad'
args.sampling_args = {}
if args.validation not in du.TestSampler.tasks:
raise NotImplementedError("Validation task {} not supported in TestSampler".format(args.validation))
if args.validation not in eu.Validator.tasks:
raise NotImplementedError("Validation task {} not supported in Validator".format(args.validation))
print("running with options: ", args.__dict__)
def load_trainmod(modname):
mod = importlib.import_module(modname)
return getattr(mod, 'Model')
def load_datamod(modname):
mod = importlib.import_module(modname)
return getattr(mod, 'Dataset')
def load_or_update_cache(ds, cachefn):
if cachefn is None:
return
cachefn += '.cache'
if isfile(cachefn + '.args'):
args = cPickle.load(open(cachefn + '.args', 'r'))
try:
ds.load_cache(args, lambda: cPickle.load(open(cachefn, 'r')))
print("Data loaded from cache file {}".format(cachefn))
return
except (ValueError, EOFError) as e:
print("Failed to load cache file {}: {}".format(cachefn, e.message))
# update cache
print("updating cache file for prefix {}".format(cachefn))
ar, args = ds.cache()
cPickle.dump(args, open(cachefn + '.args', 'w'))
cPickle.dump(ar, open(cachefn, 'w'))
print("cache file {} updated".format(cachefn))
def export(vertices, data, outdir):
for i in range(len(data)):
assert len(vertices) == len(data[i]), (len(vertices), len(data[i]))
fn = "{}/{}.out".format(outdir, i)
fh = open(fn, 'w')
for j in range(len(vertices)):
print("{} {}".format(vertices[j], ' '.join(["{:.3f}".format(d) for d in data[i][j]])), file=fh)
fh.close()
TrainModel = load_trainmod(args.trainmod)
Dataset = load_datamod(args.datasetmod)
ds = Dataset(args.datafile, args.starttime, args.nsteps, stepsize=args.stepsize, stepstride=args.stepstride)
load_or_update_cache(ds, args.cachefn)
# dsargs = {'datafile': args.datafile, 'starttime': args.starttime, 'nsteps': args.nsteps,
# 'stepsize': args.stepsize, 'stepstride': args.stepstride, 'datasetmod': args.datasetmod}
tm = TrainModel(ds, pretrain_size=args.pretrain_size, embdim=args.embdim, beta=args.beta,
lr=args.lr, batchsize=args.batchsize, sampling_args=args.sampling_args)
edgecnt = [g.num_edges() for g in ds.gtgraphs]
k_edgecnt = sum(edgecnt[:args.pretrain_size])
print("{} edges in pretraining graphs".format(k_edgecnt))
if args.pretrain_size > 0:
initstep = ds.time2step(args.starttime)
tm.pretrain_begin(initstep, initstep + args.pretrain_size)
print("generating validation set")
validargs = tm.dataset.sample_test_data(args.validation, initstep, initstep + args.pretrain_size, size=10000)
#print(validargs)
print("{} validation samples generated".format(len(validargs[0])))
max_val, max_idx, maxmodel = -1, 0, None
# for early stopping
start_time = time.time()
scores = []
for i in range(args.niters):
tm.pretrain_begin_iteration()
epoch_loss = 0
for batidx, bat in enumerate(tm.batches(args.batchsize)):
inputs = tm.make_pretrain_input(bat)
l = tm.pretrain['lossfunc'](inputs)
if isinstance(l, (list, tuple)):
l = l[0]
epoch_loss += l
print("\repoch {}: {:.0%} completed, cur loss: {:.3f}".format(i, float(batidx * args.batchsize)
/ tm.sample_size(), l.flat[0]), end='')
sys.stdout.flush()
tm.pretrain_end_iteration()
print(" training completed, total loss {}".format(epoch_loss), end='')
# without validation, the model exists only after I iterations
if args.validation != 'none':
val_score = tm.validate(args.validation, *validargs)
if val_score > max_val:
max_val = val_score
max_idx = i
maxmodel = tm.save_model()
print(", validation score {:.3f}".format(val_score))
else:
max_idx, max_val = i, epoch_loss
# maxmodel is not saved here in order to save time
print("")
# checkpoint disabled
# if i % 5 == 0:
# lastmodel = tm.save_model()
# if args.validation == 'none':
# maxmodel = lastmodel
#
# tm.restore_model(maxmodel) # restore parameters while preserving other info
# cPickle.dump([tm.archive(), dsargs, lastmodel], open(args.outdir, 'w'))
# tm.restore_model(lastmodel)
if args.validation != 'none':
scores.append(val_score)
if max_val > 0 and i - max_idx > 5:
break
print("best validation score at itr {}: {}".format(max_idx, max_val))
print("{} seconds elapsed for pretraining".format(time.time() - start_time))
#lastmodel = tm.save_model() # for debug
print("saving output to {}".format(args.outdir))
tm.restore_model(maxmodel)
tm.pretrain_end()
export(tm.dataset.mygraphs['any'].vertices(), tm.export(), args.outdir)
# online training disabled
startstep = tm.dataset.time2step(args.starttime)
for y in range(startstep + args.pretrain_size, startstep + args.nsteps):
raise NotImplementedError()
================================================
FILE: core/algorithm/CMakeLists.txt
================================================
cmake_minimum_required(VERSION 2.8)
project(algorithm)
#set(CMAKE_PREFIX_PATH ${LD_LIBRARY_PATH})
# use local find_packages
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/")
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "RELEASE")
#set(CMAKE_BUILD_TYPE "DEBUG")
#set(CMAKE_BUILD_TYPE "RELWITHDEBINFO")
#set(CMAKE_BUILD_TYPE "MINSIZEREL")
endif()
if(CMAKE_COMPILER_IS_GNUCXX)
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wall -DDEBUG -g")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2")
else()
MESSAGE(FATAL_ERROR "CMakeLists.txt has not been tested/written for your compiler.")
endif()
add_library(dynamic_triad SHARED dynamic_triad_cimpl.cpp)
set_target_properties(dynamic_triad PROPERTIES PREFIX "" OUTPUT_NAME "dynamic_triad_cimpl")
#add_library(dynamic_triad_extfeat SHARED dynamic_triad_extfeat_cimpl.cpp)
#set_target_properties(dynamic_triad_extfeat PROPERTIES PREFIX "" OUTPUT_NAME "dynamic_triad_extfeat_cimpl")
#add_library(resolve_pyapi SHARED resolve.cpp resolve_pywrapper.cpp)
#add_library(logfile_capi SHARED logfile_capi.cpp logfile.cpp)
#add_library(unitfeat_pyapi SHARED unit_feat_pywrapper.cpp)
#add_executable(test_logfile logfile.cpp test_logfile.cpp)
#set_target_properties(resolve_pyapi PROPERTIES PREFIX "" OUTPUT_NAME "cresolve")
#set_target_properties(unitfeat_pyapi PROPERTIES PREFIX "" OUTPUT_NAME "cunitfeat")
#add_library(gounitfeat STATIC IMPORTED)
#set_target_properties(gounitfeat PROPERTIES
# IMPORTED_LOCATION ${CMAKE_BINARY_DIR}/libgounitfeat.a)
#include_directories(${CMAKE_BINARY_DIR} ${CMAKE_SOURCE_DIR})
#link_directories(${CMAKE_BINARY_DIR})
#target_link_libraries(unitfeat_pyapi gounitfeat logfile_capi)
#if(CMAKE_STATIC_GLIB_PATH)
# find_package(Glibc 3.14 REQUIRED PATH ${CMAKE_STATIC_GLIBC_PATH}
# NO_SYSTEM_ENVIRONMENT_PATH)
# target_link_libraries(cppimpl ${GLIBC_LIBRARIES})
# target_link_libraries(mygraph ${GLIBC_LIBRARIES})
# set_target_properties(cppimpl mygraph PROPERTIES LINK_SEARCH_START_STATIC 1)
# set_target_properties(cppimpl mygraph PROPERTIES LINK_SEARCH_END_STATIC 1)
#endif()
# we MUST find python before boost_python,
# because boost python somehow sets PYTHON_EXECUTABLE in a wrong manner
# and find_package(python) will not reset this variable once it exists
# as a result, find_package(python) is broken due to wrong path
# assume the project main directory is ..
include_directories(${CMAKE_SOURCE_DIR}/..)
# respect user environment if the following variables are set
if((NOT "$ENV{PYTHON_LIBRARY}" STREQUAL "") AND (NOT
"$ENV{PYTHON_INCLUDE_DIR}" STREQUAL ""))
message("Using custom python path $ENV{PYTHON_LIBRARY} and $ENV{PYTHON_INCLUDE_DIR}")
set(PYTHON_LIBRARY $ENV{PYTHON_LIBRARY})
set(PYTHON_INCLUDE_DIR $ENV{PYTHON_INCLUDE_DIR})
endif()
set(Python_ADDITIONAL_VERSIONS 2.7)
find_package(PythonLibs 2.7 REQUIRED)
include_directories(${PYTHON_INCLUDE_DIRS})
link_directories(${PYTHON_LIBRARIES})
target_link_libraries(dynamic_triad python2.7)
#target_link_libraries(dynamic_triad_extfeat python2.7)
# pythoninterp required by findnumpy
find_package(PythonInterp 2.7 REQUIRED)
find_package(NumPy 1.10 REQUIRED)
if(NOT ${PYTHON_NUMPY_FOUND})
message(FATAL_ERROR "Cannot find numpy headers")
endif()
include_directories(${PYTHON_NUMPY_INCLUDE_DIR})
if("$ENV{EIGEN3_INCLUDE_DIR}" STREQUAL "")
message(WARNING "EIGEN3_INCLUDE_DIR not set, trying to find eigen headers in standard directories")
else()
message("Using eigen path $ENV{EIGEN3_INCLUDE_DIR}")
include_directories("$ENV{EIGEN3_INCLUDE_DIR}")
endif()
find_package(Boost 1.54.0)
if(Boost_FOUND)
include_directories("${Boost_INCLUDE_DIRS}")
set(Boost_USE_STATIC_LIBS OFF)
set(Boost_USE_MULTITHREADED ON)
set(Boost_USE_STATIC_RUNTIME OFF)
find_package(Boost 1.54.0 COMPONENTS python)
if(NOT Boost_FOUND)
if(NOT "$ENV{BOOST_PYTHON_LIBNAME}" STREQUAL "")
message("boost_python not detected by cmake, trying custom lib name
$ENV{BOOST_PYTHON_LIBNAME}")
else()
message(FATAL_ERROR "boost_python not detected by cmake, try setting
environment variable $BOOST_PYTHON_LIBNAME for custom library name")
endif()
set (Boost_LIBRARIES "${Boost_LIBRARIES}-l boost_python-2.7")
endif()
ELSEIF(NOT Boost_FOUND)
MESSAGE(FATAL_ERROR "Unable to find correct Boost version. Did you set BOOST_ROOT?")
ENDIF()
target_link_libraries(dynamic_triad ${Boost_LIBRARIES})
#target_link_libraries(dynamic_triad_extfeat ${Boost_LIBRARIES})
FIND_PACKAGE(OpenMP REQUIRED)
if(OPENMP_FOUND)
message("OPENMP FOUND")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
endif()
set_property(TARGET dynamic_triad PROPERTY CXX_STANDARD 11)
set_property(TARGET dynamic_triad PROPERTY CXX_STANDARD_REQUIRED ON)
install(TARGETS dynamic_triad DESTINATION ${CMAKE_SOURCE_DIR} LIBRARY)
================================================
FILE: core/algorithm/__init__.py
================================================
================================================
FILE: core/algorithm/boost_python_omp.h
================================================
#ifndef OMP_UTILS_H
#define OMP_UTILS_H
#include <Python.h>
#include <cmath>
#define OMP_INIT_FOR(sz, deg) \
do { \
int __omp_step_size = int(ceil(float(sz) / deg) + 0.5f); \
int __omp_deg = deg; \
int __omp_sz = sz;
#define OMP_BEGIN_FOR(lbvar, ubvar) \
for(int __omp_par = 0; __omp_par < __omp_deg; __omp_par++) { \
int lbvar = __omp_par * __omp_step_size; \
int ubvar = (__omp_par + 1) * __omp_step_size; \
ubvar = (ubvar > __omp_sz ? __omp_sz : ubvar); \
lbvar = (lbvar > __omp_sz ? __omp_sz : lbvar);
#define OMP_END_FOR() \
} \
} while(0);
class GILRelease {
public:
inline GILRelease() { m_thread_state = PyEval_SaveThread(); }
inline ~GILRelease() { PyEval_RestoreThread(m_thread_state); m_thread_state = NULL; }
private:
PyThreadState* m_thread_state;
};
struct GILAcquire{
GILAcquire() {
state = PyGILState_Ensure();
}
~GILAcquire() {
PyGILState_Release(state);
}
private:
PyGILState_STATE state;
};
#endif // OMP_UTILS_H
================================================
FILE: core/algorithm/cmake/FindNumPy.cmake
================================================
# Find the native numpy includes
# This module defines
# PYTHON_NUMPY_INCLUDE_DIR, where to find numpy/arrayobject.h, etc.
# PYTHON_NUMPY_FOUND, If false, do not try to use numpy headers.
if (NOT PYTHON_NUMPY_INCLUDE_DIR)
exec_program ("${PYTHON_EXECUTABLE}"
ARGS "-c" "\"import numpy; print(numpy.get_include())\""
OUTPUT_VARIABLE PYTHON_NUMPY_INCLUDE_DIR
RETURN_VALUE NUMPY_NOT_FOUND)
if (PYTHON_NUMPY_INCLUDE_DIR MATCHES "Traceback")
# Did not successfully include numpy
set(PYTHON_NUMPY_FOUND FALSE)
else (PYTHON_NUMPY_INCLUDE_DIR MATCHES "Traceback")
# successful
set (PYTHON_NUMPY_FOUND TRUE)
set (PYTHON_NUMPY_INCLUDE_DIR ${PYTHON_NUMPY_INCLUDE_DIR} CACHE PATH "Numpy include path")
endif (PYTHON_NUMPY_INCLUDE_DIR MATCHES "Traceback")
if (PYTHON_NUMPY_FOUND)
if (NOT NUMPY_FIND_QUIETLY)
message (STATUS "Numpy headers found")
endif (NOT NUMPY_FIND_QUIETLY)
else (PYTHON_NUMPY_FOUND)
if (NUMPY_FIND_REQUIRED)
message (FATAL_ERROR "Numpy headers missing")
endif (NUMPY_FIND_REQUIRED)
endif (PYTHON_NUMPY_FOUND)
mark_as_advanced (PYTHON_NUMPY_INCLUDE_DIR)
endif (NOT PYTHON_NUMPY_INCLUDE_DIR)
================================================
FILE: core/algorithm/dynamic_triad.py
================================================
from __future__ import print_function
from __future__ import print_function
import keras.backend as K
from core.kerasext import keras_backend_patches
from keras import optimizers, constraints
import numpy as np
import math
import warnings
import sys
from .samplers.pos_neg_tri import Sampler
try:
from sklearn.model_selection import cross_val_score, KFold, StratifiedKFold
except ImportError:
from sklearn.cross_validation import cross_val_score, KFold, StratifiedKFold
from core import utils
from core import gconfig as gconf
from .embutils import TrainFlow, WithData, Validator
try:
import dynamic_triad_cimpl as cimpl
except ImportError:
warnings.warn("dynamic_triad_cimpl.so not found, falling back to python implementation")
cimpl = None
class Model(Sampler, TrainFlow, WithData, Validator):
def __init__(self, ds, pretrain_size=10, embdim=16, beta=None,
lr=0.1, batchsize=None, sampling_args=None):
if beta is None:
beta = [0.1, 0.1]
if sampling_args is None:
sampling_args = {}
self.__dataset = ds
TrainFlow.__init__(self, embdim=embdim, beta=beta, trainmod=self.name, datasetmod=ds.name)
Sampler.__init__(self, **sampling_args)
self.pretrain_size = pretrain_size
self.lr = lr
self.batchsize = batchsize
self.__pretrain = None
self.__online = None
@property
def name(self):
return "dynamic_triad"
@property
def dataset(self):
return self.__dataset
@property
def pretrain(self):
if self.__pretrain is not None:
return self.__pretrain
lf, pr, vs, cache = self.make_pretrain()
self.__pretrain = {'lossfunc': lf, 'predfunc': pr, 'vars': vs, 'cache': cache}
return self.__pretrain
@property
def online(self):
if self.__online is not None:
return self.__online
lf, pr, vs, cache = self.make_online()
self.__online = {'lossfunc': lf, 'predfunc': pr, 'vars': vs, 'cache': cache}
return self.__online
def verbose(self, inputs):
comp = self.pretrain['cache']['debug']
return comp(inputs)
# the current implementation of x function is (j - i) + (k - i)
def make_pretrain(self):
embedding = K.variable(
np.random.uniform(0, 1, (self.pretrain_size, self.dataset.nsize, self.flowargs['embdim'])))
theta = K.variable(np.random.uniform(0, 1, (self.flowargs['embdim'] + 1, )))
data = K.placeholder(ndim=2, dtype='int32') # (batchsize, 5), [k, from_pos, to_pos, from_neg, to_neg]
weight = K.placeholder(ndim=1, dtype='float32') # (batchsize, )
triag_int = K.placeholder(ndim=2, dtype='int32') # (batchsize, 4), [k, from, to1, to2]
triag_float = K.placeholder(ndim=2, dtype='float32') # (batchsize, 3), [coef, w1, w2]
pred_data = K.placeholder(ndim=2, dtype='int32') # (batchsize, 2) [timestep, nodeid]
if K._BACKEND == 'theano':
# (batchsize, nsize, d) => (batchsize, nsize)
pred = embedding[pred_data[:, 0] - 1, pred_data[:, 1]][:, None, :] - embedding[pred_data[:, 0] - 1]
pred = -K.sum(K.square(pred), axis=-1) # the closer the more probable
# (batchsize, d) => (batchsize, )
dist_pos = embedding[data[:, 0], data[:, 1]] - embedding[data[:, 0], data[:, 2]]
dist_pos = K.sum(dist_pos * dist_pos, axis=-1)
dist_neg = embedding[data[:, 0], data[:, 3]] - embedding[data[:, 0], data[:, 4]]
dist_neg = K.sum(dist_neg * dist_neg, axis=-1)
else:
pred_tm = K.slice(pred_data, [0, 0], [-1, 1]) - 1
node_idx = K.concatenate((pred_tm, K.slice(data, [0, 1], [-1, 1])), axis=1)
pred = K.expand_dims(K.gather_nd(embedding, node_idx), 1) - K.gather(embedding, K.squeeze(pred_tm, 1))
pred = -K.sum(K.square(pred), axis=-1)
tm = K.slice(data, [0, 0], [-1, 1])
posedge1 = K.concatenate((tm, K.slice(data, [0, 1], [-1, 1])), axis=1)
posedge2 = K.concatenate((tm, K.slice(data, [0, 2], [-1, 1])), axis=1)
negedge1 = K.concatenate((tm, K.slice(data, [0, 3], [-1, 1])), axis=1)
negedge2 = K.concatenate((tm, K.slice(data, [0, 4], [-1, 1])), axis=1)
dist_pos = K.gather_nd(embedding, posedge1) - K.gather_nd(embedding, posedge2)
dist_pos = K.sum(dist_pos * dist_pos, axis=-1)
dist_neg = K.gather_nd(embedding, negedge1) - K.gather_nd(embedding, negedge2)
dist_neg = K.sum(dist_neg * dist_neg, axis=-1)
margin = 1
lprox = K.maximum(dist_pos - dist_neg + margin, 0) * weight
# (1, )
lprox = K.mean(lprox)
# lsmooth
lsmooth = embedding[1:] - embedding[:-1] # (k - 1, nsize, d)
lsmooth = K.sum(K.square(lsmooth), axis=-1) # (k - 1, nsize)
lsmooth = K.mean(lsmooth)
# ltriag
if K._BACKEND == 'theano':
e1 = embedding[triag_int[:, 0], triag_int[:, 1]] - embedding[triag_int[:, 0], triag_int[:, 2]] # (batchsize_t, d)
e2 = embedding[triag_int[:, 0], triag_int[:, 1]] - embedding[triag_int[:, 0], triag_int[:, 3]]
x = e1 * triag_float[:, 1, None] + e2 * triag_float[:, 2, None]
iprod = K.dot(x, K.expand_dims(theta[:-1], axis=1)) + theta[-1] # (batchsize_d, )
iprod = K.clip(iprod, -50, 50) # for numerical stability
logprob = K.log(1 + K.exp(-iprod))
ltriag = K.mean(triag_float[:, 0] * iprod + logprob)
else:
tm = K.slice(triag_int, [0, 0], [-1, 1])
nc = K.concatenate((tm, K.slice(triag_int, [0, 1], [-1, 1])), axis=1)
n1 = K.concatenate((tm, K.slice(triag_int, [0, 2], [-1, 1])), axis=1)
n2 = K.concatenate((tm, K.slice(triag_int, [0, 3], [-1, 1])), axis=1)
e1 = K.gather_nd(embedding, nc) - K.gather_nd(embedding, n1)
e2 = K.gather_nd(embedding, nc) - K.gather_nd(embedding, n2)
w1 = K.slice(triag_float, [0, 1], [-1, 1])
w2 = K.slice(triag_float, [0, 2], [-1, 1])
x = e1 * w1 + e2 * w2
iprod = K.dot(x, K.expand_dims(theta[:-1], axis=1)) + theta[-1] # (batchsize_d, )
# logprob = K.log(1 + K.exp(-iprod))
logprob = -K.log_softmax(K.concatenate((iprod, K.zeros_like(iprod)), axis=1), axis=1)
logprob = K.slice(logprob, [0, 0], [-1, 1]) # discard results for appended zero line
logprob = K.clip(logprob, -50, 50) # if the softmax if too small
coef = K.slice(triag_float, [0, 0], [-1, 1])
ltriag = K.mean(coef * iprod + logprob)
loss = lprox + self.flowargs['beta'][0] * lsmooth + self.flowargs['beta'][1] * ltriag
opt = optimizers.get({'class_name': 'Adagrad', 'config': {'lr': self.lr}})
cstr = {embedding: constraints.get({'class_name': 'maxnorm', 'config': {'max_value': 1, 'axis': 2}}),
theta: constraints.get({'class_name': 'unitnorm', 'config': {'axis': 0}})}
upd = opt.get_updates([embedding, theta], cstr, loss)
lf = K.function([data, weight, triag_int, triag_float], [loss], updates=upd)
pf = K.function([pred_data], [pred])
if gconf.debug:
debug = K.function([data, weight, triag_int, triag_float],
[lprox, lsmooth * self.flowargs['beta'][0], ltriag * self.flowargs['beta'][1],
K.mean(triag_float[:, 0]) * self.flowargs['beta'][1],
K.mean(iprod) * self.flowargs['beta'][1],
K.mean(logprob) * self.flowargs['beta'][1]])
return lf, pf, [embedding, theta], {'debug': debug}
else:
return lf, pf, [embedding, theta], {}
def make_online(self):
embedding = K.variable(np.random.uniform(0, 1, (self.dataset.nsize, self.flowargs['embdim'])))
prevemb = K.placeholder(ndim=2, dtype='float32') # (nsize, d)
data = K.placeholder(ndim=2, dtype='int32') # (batchsize, 5), [k, from_pos, to_pos, from_neg, to_neg]
weight = K.placeholder(ndim=1, dtype='float32') # (batchsize, )
if K._BACKEND == 'theano':
# (batchsize, d) => (batchsize, )
# data[:, 0] should be always 0, so we simply ignore it
# note, when you want to use it, that according to data generation procedure, the actual data[:, 0] is not 0
dist_pos = embedding[data[:, 1]] - embedding[data[:, 2]]
dist_pos = K.sum(dist_pos * dist_pos, axis=-1)
dist_neg = embedding[data[:, 3]] - embedding[data[:, 4]]
dist_neg = K.sum(dist_neg * dist_neg, axis=-1)
else:
dist_pos = K.gather(embedding, K.squeeze(K.slice(data, [0, 1], [-1, 1]), axis=1)) - \
K.gather(embedding, K.squeeze(K.slice(data, [0, 2], [-1, 1]), axis=1))
dist_pos = K.sum(dist_pos * dist_pos, axis=-1)
dist_neg = K.gather(embedding, K.squeeze(K.slice(data, [0, 3], [-1, 1]), axis=1)) - \
K.gather(embedding, K.squeeze(K.slice(data, [0, 4], [-1, 1]), axis=1))
dist_neg = K.sum(dist_neg * dist_neg, axis=-1)
# (batchsize, )
margin = 1
lprox = K.maximum(margin + dist_pos - dist_neg, 0) * weight
# (1, )
lprox = K.mean(lprox)
# lsmooth
lsmooth = embedding - prevemb # (nsize, d)
lsmooth = K.sum(K.square(lsmooth), axis=-1) # (nsize)
lsmooth = K.mean(lsmooth)
loss = lprox + self.flowargs['beta'][0] * lsmooth
opt = optimizers.get({'class_name': 'Adagrad', 'config': {'lr': self.lr}})
cstr = {embedding: constraints.get({'class_name': 'maxnorm', 'config': {'max_value': 1, 'axis': 1}})}
upd = opt.get_updates([embedding], cstr, loss)
lf = K.function([data, weight, prevemb], [loss], updates=upd)
return lf, None, [embedding], {}
def save_model(self, copy=True):
if self.cur_train_begin < self.init_train_begin + self.pretrain_size < self.cur_train_end:
raise RuntimeError("current training process crosses the boarder of pretraining???")
# load from keras resources
if self.cur_train_end <= self.init_train_begin + self.pretrain_size:
self._sequence[self.init_train_begin:self.init_train_begin + self.pretrain_size] = K.get_value(self.pretrain['vars'][0])
self._tagged['theta'] = K.get_value(self.pretrain['vars'][1])
else:
self._sequence[self.cur_train_begin] = K.get_value(self.online['vars'][0])
return super(Model, self).save_model(copy=copy)
def restore_model(self, model, begin=None, end=None, copy=True):
super(Model, self).restore_model(model, begin, end, copy=copy)
if begin is None:
begin = self.cur_train_begin
if end is None:
end = self.cur_train_end
if self.cur_train_begin < self.init_train_begin + self.pretrain_size < self.cur_train_end:
raise RuntimeError("current training process crosses the boarder of pretraining???")
# store to keras resources
if self.is_training:
if end <= self.init_train_begin + self.pretrain_size:
K.set_value(self.pretrain['vars'][0], self._sequence[self.init_train_begin:self.init_train_begin + self.pretrain_size])
K.set_value(self.pretrain['vars'][1], self._tagged['theta'])
else:
K.set_value(self.online['vars'][0], self._sequence[begin])
def pretrain_begin(self, begin, end):
TrainFlow.clear(self)
TrainFlow.start_training(self, begin, end)
Sampler.pretrain_begin(self, begin, end)
self._sequence.extend([None] * self.pretrain_size)
def pretrain_begin_iteration(self):
Sampler.pretrain_begin_iteration(self)
if self.cur_train_end > self.init_train_begin + self.pretrain_size: # online phase
return
# compute EM coefficients here
neg1_int, neg1_float = self.__emcoef(self._neg[1])
self._neg = self._neg[:1] + [neg1_int, neg1_float]
def pretrain_end_iteration(self):
Sampler.pretrain_end_iteration(self)
self.save_model()
def pretrain_end(self):
Sampler.pretrain_end(self)
TrainFlow.stop_training(self)
def online_begin(self, begin, end):
TrainFlow.start_training(self, begin, end)
Sampler.online_begin(self, begin, end)
initv = np.random.uniform(0, 1, (self.dataset.nsize, self.flowargs['embdim'])).astype('float32')
K.set_value(self.online['vars'][0], initv)
self._sequence.append(None)
# ends the current online training
# store online training results
# we need to reset online training variables
def online_end(self):
Sampler.online_end(self)
assert self.cur_train_end == self.cur_train_begin + 1, "{} {}".format(self.cur_train_end, self.cur_train_begin)
self.save_model()
TrainFlow.stop_training(self)
def make_pretrain_input(self, batch):
ret = Sampler.make_pretrain_input(self, batch)
# (data, weight, triad)
# because embedding variable starts from index 0
for d in ret[0]: # data
d[0] -= self.init_train_begin
for d in ret[2]:
d[0] -= self.init_train_begin
return ret
# return a list whatever number of inputs required
def make_online_input(self, batch):
ret = Sampler.make_online_input(self, batch)
for d in ret[0]: # data
d[0] -= self.init_train_begin
ret.append(self._sequence[self.cur_train_begin - 1])
return ret
def __emcoef_cimpl(self, data):
nodenames = list(self.__dataset.gtgraphs['any'].vp['name'])
emb, theta = [K.get_value(v) for v in self.pretrain['vars']]
mygraphs = list(self.__dataset.mygraphs)
res = cimpl.emcoef(data, emb, theta, mygraphs, nodenames, self.__dataset.localstep)
neg1_int = [r[0] for r in res]
neg1_float = [r[1] for r in res]
return neg1_int, neg1_float
def __emcoef_pyimpl(self, data):
slices = utils.group_by(self._neg[1], key=lambda x: x[0])
for i in range(len(slices)):
mapper = utils.ParMap(self.__emcoef_calculator_factory(slices[i][0][0]), self.__emcoef_monitor)
slices[i] = mapper.run(slices[i], chunk=min(10000, mapper.default_chunk(len(slices[i]))))
# TODO: need a shuffle?
neg1_int = [r[0] for s in slices for r in s]
neg1_float = [r[1] for s in slices for r in s]
assert len(neg1_int) == len(neg1_float) and len(neg1_int) == len(data), \
"{} {} {}".format(len(neg1_int), len(neg1_float), len(data))
return neg1_int, neg1_float
def __emcoef(self, data):
if cimpl is not None:
if gconf.debug:
data = [r for s in utils.group_by(self._neg[1], key=lambda x: x[0]) for r in s]
cimpl_res = self.__emcoef_cimpl(data)
if gconf.debug:
pyimpl_res = self.__emcoef_pyimpl(data)
print("checking cimpl results according to pyimpl results")
assert len(pyimpl_res[0]) == len(cimpl_res[0]), "{} {}".format(len(pyimpl_res[0]), len(cimpl_res[0]))
for i in range(len(pyimpl_res[0])):
assert pyimpl_res[0][i] == cimpl_res[0][i] and [math.fabs(i - j) < 1e-3 for i, j in zip(pyimpl_res[1][i], cimpl_res[1][i])], \
"{} {} {} {} {} {}".format(i, pyimpl_res[0][i], pyimpl_res[1][i], cimpl_res[0][i], cimpl_res[1][i], data[i])
print("OK")
return cimpl_res
else:
return self.__emcoef_pyimpl(data)
@staticmethod
def __emcoef_monitor(reportq):
total_proccnt = {}
while True:
obj = reportq.get()
if isinstance(obj, StopIteration):
break
pid, proccnt = obj
total_proccnt[pid] = proccnt
print("EM coefficients calculated for {} samples\r".format(sum(total_proccnt.values())), end='')
sys.stdout.flush()
print("EM coefficients calculated for {} samples".format(sum(total_proccnt.values())))
def __emcoef_calculator_factory(self, timestep):
# TODO: split data by year so that we need not share the whole emb and mygraph
nodenames = list(self.__dataset.gtgraphs['any'].vp['name'])
name2idx = {n: i for i, n in enumerate(nodenames)}
emb, theta = [K.get_value(v) for v in self.pretrain['vars']]
# localstep = self.__dataset.localstep
# localize for current time step
g = self.__dataset.mygraphs[timestep]
emb = emb[timestep - self.__dataset.localstep]
# mygraphs = list(self.__dataset.mygraphs)
def emcoef_calc(procinfo, data, reportq):
ret = []
for didx, d in enumerate(data):
if didx % 10000 == 0:
reportq.put([id(procinfo), didx])
y, k, i, j, lb, wtv1, wtv2 = d
# y0based = y - localstep
# g = mygraphs[y0based]
# w = g.edge_properties['weight']
if lb == 0:
C = 1
else:
def x(a, b, c):
w1, w2 = g.edge(nodenames[a], nodenames[c]), g.edge(nodenames[b], nodenames[c])
return (emb[c] - emb[a]) * w1 + (emb[c] - emb[b]) * w2
def P(a, b, c):
power = -(np.dot(theta[:-1], x(a, b, c)) + theta[-1])
if power > 100:
return 0
else:
return 1.0 / (1 + math.exp(power))
C0 = P(i, j, k)
inbr = set(list(g.out_neighbours(nodenames[i])))
jnbr = set(list(g.out_neighbours(nodenames[j])))
cmnbr = inbr.intersection(jnbr)
C1 = 1 - np.prod([1 - P(i, j, name2idx[v]) for v in cmnbr])
eps = 1e-6
C = 1 - C0 / (C1 + eps)
if not np.isfinite(C):
print(C0, C1, C, [1 - P(i, j, name2idx[v]) for v in cmnbr])
print(i, j, k)
print(g.exists(nodenames[i], nodenames[k]),
g.exists(nodenames[j], nodenames[k]))
print([name2idx[n] for n in inbr], [name2idx[n] for n in jnbr])
assert 0
ret.append(([y, k, i, j], [C, wtv1, wtv2]))
reportq.put([id(procinfo), len(data)])
return ret
return emcoef_calc
================================================
FILE: core/algorithm/dynamic_triad_cimpl.cpp
================================================
#include <Python.h>
#include <numpy/ndarraytypes.h>
#include <numpy/ndarrayobject.h>
#include <boost/python.hpp>
namespace py = boost::python;
#include <Eigen/Core>
namespace ei = Eigen;
#include <map>
#include <vector>
#include <string>
#include <exception>
#include <set>
#include <map>
#include <cmath>
#include <iostream>
using namespace std;
#include <omp.h>
#include "graph/graph_pywrapper.h"
#include "boost_python_omp.h"
struct Record
{
int i, j, k;
int tm, lb;
float wtv1, wtv2;
};
using Tensor1D = ei::Map<ei::Array<float, 1, ei::Dynamic, ei::RowMajor>, ei::RowMajor>;
using Tensor1D_Managed = ei::Array<float, 1, ei::Dynamic, ei::RowMajor>;
using Tensor2D = ei::Map<ei::Array<float, ei::Dynamic, ei::Dynamic, ei::RowMajor>, ei::RowMajor>;
using Tensor2D_Managed = ei::Array<float, ei::Dynamic, ei::Dynamic, ei::RowMajor>;
using Tensor3D = PyArrayObject*;
template <typename graph_t> using cgraph_type = const typename graph_t::CGraph*;
template <typename graph_t> using node_type = typename graph_t::CGraph::node_type;
template <typename graph_t>
Tensor1D_Managed X(int a, int b, int c, cgraph_type<graph_t> g, Tensor2D emb, const vector<node_type<graph_t>>& nodenames)
{
float w1 = g->edge_value(nodenames[a], nodenames[c]);
float w2 = g->edge_value(nodenames[b], nodenames[c]);
if(w1 < 1e-6 || w2 < 1e-6) // save computation
if(!g->exists(nodenames[a], nodenames[c]) || !g->exists(nodenames[b], nodenames[c]))
throw runtime_error("invalid open triangle");
return (emb.row(c) - emb.row(a)) * w1 + (emb.row(c) - emb.row(b)) * w2;
}
template <typename graph_t>
float P(int a, int b, int c, cgraph_type<graph_t> g, Tensor2D emb, Tensor1D theta, const vector<node_type<graph_t>>& nodenames)
{
Tensor1D_Managed x = X<graph_t>(a, b, c, g, emb, nodenames);
float power = theta.segment(0, theta.size() - 1).cwiseProduct(x).sum();
power = -(power + theta(0, theta.size() - 1));
if(power > 100.0f)
return 0.0f;
else
return 1.0f / (1 + exp(power));
}
template <typename graph_t>
void translate_input(const py::list py_graph, const py::list py_nodenames, vector<cgraph_type<graph_t>> *graphs, vector<node_type<graph_t>> *nodenames)
{
// graph
for(int i = 0; i < py::len(py_graph); i++)
{
cgraph_type<graph_t> g = (cgraph_type<graph_t>)py::extract<uintptr_t>(py_graph[i].attr("data")())();
graphs->push_back(g);
}
// nodenames
for(int i = 0; i < py::len(py_nodenames); i++)
{
py::extract<node_type<graph_t>> ext(py_nodenames[i]);
if(!ext.check())
throw runtime_error("Type check failed for nodename convertion");
nodenames->push_back(ext());
}
}
// this is required because eigen::map REQUIRES proper init in debug mode,
// so we HAVE TO directly return it rather than passing a pointer
Tensor1D translate_1darray(py::object arr)
{
PyArrayObject *obj = (PyArrayObject*)arr.ptr();
int sz = PyArray_DIM(obj, 0);
// assert float type
if(PyArray_DESCR(obj)->kind != 'f')
throw logic_error("dtype of ndarray is not float32!");
return Tensor1D((float*)PyArray_DATA(obj), 1, sz);
}
Tensor3D translate_3darray(py::object arr)
{
return (Tensor3D)arr.ptr();
}
template <typename T>
T extract(py::object obj)
{
py::extract<T> ext(obj);
if(!ext.check())
{
ostringstream oss;
oss << "Type check failed for type " << typeid(T).name();
throw runtime_error(oss.str());
}
return ext();
}
void extract_record(py::object rec, Record *out)
{
py::extract<py::list> ext(rec);
if(!ext.check())
throw runtime_error("Type check failed for data record, expecting py::list");
py::list lst = ext();
out->tm = extract<int>(lst[0]);
out->k = extract<int>(lst[1]); // center node
out->i = extract<int>(lst[2]);
out->j = extract<int>(lst[3]);
out->lb = extract<int>(lst[4]);
out->wtv1 = extract<float>(lst[5]);
out->wtv2 = extract<float>(lst[6]);
}
Tensor2D slice_tensor3d(Tensor3D t, int idx)
{
void *data = PyArray_GETPTR1(t, idx);
// assert float type
if(PyArray_DESCR(t)->kind != 'f')
throw logic_error("dtype of ndarray is not float32!");
return Tensor2D((float*)data, PyArray_DIM(t, 1), PyArray_DIM(t, 2));
}
template <typename graph_t>
py::list _emcoef(py::list data, py::object py_emb, py::object py_theta, py::list py_graphs, py::list py_nodenames, int localstep)
{
vector<cgraph_type<graph_t>> graphs;
vector<node_type<graph_t>> nodenames;
py::list ret;
ret.append(0);
ret *= py::len(data);
translate_input<graph_t>(py_graphs, py_nodenames, &graphs, &nodenames);
Tensor3D emb = translate_3darray(py_emb);
Tensor1D theta = translate_1darray(py_theta);
// build name2idx
map<node_type<graph_t>, int> name2idx;
int idx_cnt = 0;
for(const auto& name : nodenames)
name2idx[name] = idx_cnt++;
double eps = 1e-6;
int datalen = py::len(data);
int pardeg = 120;
int num_threads = omp_get_num_procs();
//int num_threads = 1; // for debug
GILRelease gilrelease;
OMP_INIT_FOR(datalen, pardeg);
#ifdef DEBUG
cout << "step size " << __omp_step_size << ' ' << __omp_sz << ' ' << __omp_deg << endl;
#endif
#pragma omp parallel for shared(data, localstep, graphs, emb, theta, nodenames, ret) num_threads(num_threads) schedule(dynamic, 1)
OMP_BEGIN_FOR(lb, ub);
#ifdef DEBUG
cout << "thread " << omp_get_thread_num() << ": from " << lb << " to " << ub << endl;
#endif
Record currec[ub - lb];
double curC[ub - lb];
{ GILAcquire gil;
for(int i = lb; i < ub; i++)
extract_record(data[i], &currec[i - lb]);
}
for(int i = lb; i < ub; i++)
{
double C, C0, C1;
Record rec = currec[i - lb];
int tm0based = rec.tm - localstep;
if(tm0based < 0)
throw runtime_error("trying to access graph before the first time step");
const cgraph_type<graph_t> g = graphs[tm0based];
Tensor2D curemb = slice_tensor3d(emb, tm0based);
if(rec.lb == 0)
{
C = 1.0;
}
else
{
C0 = P<graph_t>(rec.i, rec.j, rec.k, g, curemb, theta, nodenames);
const auto& inbr = g->get_value(nodenames[rec.i]);
set<node_type<graph_t>> cmnbr;
for(const auto& itr : g->get_value(nodenames[rec.j]))
if(inbr.exists(itr.first))
cmnbr.insert(itr.first);
C1 = 1;
for(const auto& nbr : cmnbr)
{
C1 *= (1 - P<graph_t>(rec.i, rec.j, name2idx[nbr], g, curemb, theta, nodenames));
}
C1 = 1.0 - C1;
C = 1.0 - C0 / (C1 + eps);
if(!isfinite(C))
{
cerr << C0 << ' ' << C1 << ' ' << C << endl;
cerr << rec.i << ' ' << rec.j << ' ' << rec.k << endl;
cerr << g->exists(nodenames[rec.i], nodenames[rec.k]) << ' ' << g->exists(nodenames[rec.j], nodenames[rec.k]) << endl;
for(const auto& nbr : g->get_value(nodenames[rec.i]))
cerr << name2idx[nbr.first] << ' ';
cerr << endl;
for(const auto& nbr : g->get_value(nodenames[rec.j]))
cerr << name2idx[nbr.first] << ' ';
cerr << endl;
throw runtime_error("inf or nan detected when calculating em coefficients");
}
}
curC[i - lb] = float(C);
}
{ GILAcquire gil;
for(int i = lb; i < ub; i++)
{
Record rec = currec[i - lb];
ret[i] = py::make_tuple(py::list(py::make_tuple(rec.tm, rec.k, rec.i, rec.j)), py::list(py::make_tuple(curC[i - lb], rec.wtv1, rec.wtv2)));
} }
OMP_END_FOR();
return ret;
}
py::list emcoef(py::list data, py::object py_emb, py::object py_theta, py::list py_graphs, py::list py_nodenames, int localstep)
{
string cls = py::extract<string>(py::object(py_graphs[0]).attr("__class__").attr("__name__"));
if(cls == "Graph_Int32_Float")
{
return _emcoef<Graph_Int32_Float>(data, py_emb, py_theta, py_graphs, py_nodenames, localstep);
}
else if(cls == "Graph_String_Float")
{
return _emcoef<Graph_String_Float>(data, py_emb, py_theta, py_graphs, py_nodenames, localstep);
}
else
{
throw runtime_error(string("Unknown graph type ") + cls);
}
}
BOOST_PYTHON_MODULE(dynamic_triad_cimpl)
{
PyEval_InitThreads();
py::def("emcoef", &emcoef);
}
================================================
FILE: core/algorithm/embutils.py
================================================
from __future__ import print_function
import numpy as np
from collections import defaultdict
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score
from copy import deepcopy
try:
from sklearn.model_selection import cross_val_score, KFold, StratifiedKFold
except ImportError:
from sklearn.cross_validation import cross_val_score, KFold, StratifiedKFold
from core import utils
from core import gconfig as gconf
class WithData(object):
@property
def dataset(self):
raise NotImplementedError()
class TrainFlow(utils.Archivable, WithData):
def __init__(self, **flowargs):
self.__arg_names = ['embdim', 'beta', 'trainmod', 'datasetmod']
self.__args = {k: flowargs.get(k, None) for k in self.__arg_names}
# self.__args = {'embdim': embdim, 'beta': beta, 'trainmod': trainmod, 'datasetmod': datasetmod}
self._history = []
self._sequence = utils.OffsetList(0, 0, [], managed=True) # to make the sequence managed by OffsetList
self._tagged = {}
self.__training = False
self.__curbegin = None
self.__curend = None
def __add_history(self, hist):
if len(self._history) > 0:
if self._history[-1][1] != hist[0]:
raise RuntimeError("Expected to train from step {}, got {} instead"
.format(self._history[-1][1], hist[1]))
dsrg = [self.dataset.localstep, self.dataset.localstep + self.dataset.nsteps]
if self._history[-1][1] == dsrg[0]:
self._history.append(hist)
else:
if hist[0] >= dsrg[0] and hist[1] <= dsrg[1]:
self._history[-1][1] = hist[1] # merge
else:
raise RuntimeError("trying to train from {} to {} (excluded), out of dataset range [{}, {})"
.format(hist[0], hist[1], dsrg[0], dsrg[1]))
else:
self._history.append(hist)
def start_training(self, begin, end):
self.__training = True
self.__curbegin, self.__curend = begin, end
if len(self._history) == 0: # pretrain
self._sequence = utils.OffsetList(begin, 0, [], managed=True)
self.__add_history([begin, end])
def stop_training(self):
self.__training = False
self.__curbegin, self.__curend = None, None
@property
def cur_train_begin(self):
if not self.__training:
raise RuntimeError("TrainFlow not in training mode")
return self.__curbegin
@property
def cur_train_end(self):
if not self.__training:
raise RuntimeError("TrainFlow not in training mode")
return self.__curend
@property
def init_train_begin(self):
if len(self._history) == 0:
raise RuntimeError("no training records found")
return self._history[0][0]
@property
def init_train_end(self):
if len(self._history) == 0:
raise RuntimeError("no training records found")
return self._history[0][1]
@property
def last_train_begin(self):
if len(self._history) == 0:
raise RuntimeError("no training records found")
return self._history[-1][0]
@property
def last_train_end(self):
if len(self._history) == 0:
raise RuntimeError("no training records found")
return self._history[-1][1]
@property
def flowargs(self):
return self.__args
@property
def is_training(self):
return self.__training
def embeddings_at(self, step, allow_missing=False, default=None):
try:
return self._sequence[step]
except KeyError:
if allow_missing:
return default
else:
raise RuntimeError("trying to access missing embedding at step {}".format(step))
# samples: [(time, node1, node2, ...), ...]
def make_features(self, samples):
feat = []
emb_cache = utils.KeyDefaultDict(lambda x: self.embeddings_at(x))
for s in samples:
feat.append(emb_cache[s[0]][s[1:]])
feat = np.array(feat)
if gconf.debug:
print("features shape: {}".format(feat.shape)) # (sample_size, node_cnt, feat_dim)
return feat
def clear(self):
self._history = []
self._sequence = utils.OffsetList(0, 0, [], managed=True)
self._tagged = {}
self.__training = False
def slim_storage(self, keep_size):
startstep = min(self._history[-1][1] - keep_size, self._history[-1][0])
startstep = max(startstep, self._sequence.offset)
self._sequence = utils.OffsetList(startstep, self._history[-1][1] - startstep, self._sequence[startstep:],
copy=False, managed=True)
def export(self):
"""
exports the embedding vectors
:return:
"""
return list(self._sequence)
def archive(self, name=None, copy=True):
if name is None:
prefix = 'TrainFlow'
else:
prefix = '{}_TrainFlow'.format(name)
ar = super(TrainFlow, self).archive(name)
ar['{}_args'.format(prefix)] = self.__args
ar['{}_history'.format(prefix)] = self._history
ar['{}_sequence'.format(prefix)] = [self._sequence.offset, self._sequence.length, list(self._sequence)]
ar['{}_tagged'.format(prefix)] = self._tagged
if copy:
ar = deepcopy(ar)
return ar
def load_archive(self, ar, copy=True, name=None):
if self.__training:
raise RuntimeError("archive should be loaded before training starts")
if name is None:
prefix = 'TrainFlow'
else:
prefix = '{}_TrainFlow'.format(name)
super(TrainFlow, self).load_archive(ar, copy=copy, name=name)
self._sequence, self._tagged = utils.OffsetList(*ar['{}_sequence'.format(prefix)], copy=copy, managed=True), \
ar['{}_tagged'.format(prefix)]
self._history = ar['{}_history'.format(prefix)]
print("[debug] train history: {}".format(self._history))
self.__check_flowargs(ar['{}_args'.format(prefix)])
if copy:
self._history = deepcopy(self._history)
def __check_flowargs(self, old_args):
for n in old_args:
if self.__args.get(n, None) is not None and self.__args[n] != old_args[n]:
raise RuntimeError("Argument mismatch {}: {}(old) vs. {}(cur)".format(n, old_args[n], self.__args[n]))
self.__args[n] = old_args[n]
# checkpoints
# this is different from archive system that only params related to latest train are considered
def save_model(self, copy=True):
model = [[], self._tagged]
for i in range(self.cur_train_begin, self.cur_train_end):
model[0].append(self.embeddings_at(i))
if copy:
model = deepcopy(model)
return model
def restore_model(self, model, begin=None, end=None, copy=True):
if begin is None:
begin = self.cur_train_begin
if end is None:
end = self.cur_train_end
if len(model[0]) != end - begin:
raise RuntimeError("trying to restore invalid model with length {} to range [{}, {})"
.format(len(model[0]), begin, end))
if copy:
model = deepcopy(model)
self._tagged = model[1]
for i in range(begin, end):
self._sequence[i] = model[0][i - begin]
class TrainFlowView(TrainFlow, WithData):
def __init__(self, **flowargs):
super(TrainFlowView, self).__init__(**flowargs)
def start_training(self, begin, end):
raise NotImplementedError()
def stop_training(self):
raise NotImplementedError()
@property
def dataset(self):
return None
# the sub class must implement make_features,
# TODO: this is almost the same as class StdTests, with slight differences such as this class
# TODO: focuses only on f1 score, etc, consider merging these two classes in the future
class Validator(object):
tasks = 'link_prediction', 'link_reconstruction', 'node_classify', 'node_predict', 'none'
@property
def __task_handler(self):
ret = defaultdict(lambda: self.__unknown)
ret.update({'link_prediction': self._validate_link_reconstruction,
'link_reconstruction': self._validate_link_reconstruction,
'node_classify': self._validate_node_classify,
'node_predict': self._validate_node_classify,
'none': self.__none})
return ret
def _validate_link_reconstruction(self, samples, lbs):
# cache = utils.KeyDefaultDict(lambda x: self.embeddings_at(x))
# feat = []
# for v in samples:
# emb = cache[v[0] - 1]
# # feat.append(np.concatenate((emb[v[1]], emb[v[2]]), axis=0))
# feat.append(np.abs(emb[v[1]] - emb[v[2]]))
# feat = np.vstack(feat)
feat = self.make_features(samples)
feat = np.abs(feat[:, 0] - feat[:, 1])
clf = LogisticRegression()
try:
cv = StratifiedKFold(lbs, n_folds=2, shuffle=True)
parts = cv
except TypeError:
cv = StratifiedKFold(n_splits=2, shuffle=True)
parts = cv.split(feat, lbs)
val_score = []
for tr, te in parts:
model = clf.fit(feat[tr], lbs[tr])
p = model.predict(feat[te])
val_score.append(f1_score(lbs[te], p))
return np.mean(val_score)
def _validate_node_classify(self, samples, lbs):
# note that the 1-st dimension of feat is for each node in each sample (time, node1, node2, ...)
feat = self.make_features(samples)[:, 0]
assert len(feat) == len(lbs)
clf = LogisticRegression(class_weight='balanced')
try:
cv = StratifiedKFold(lbs, n_folds=2, shuffle=True)
parts = cv
except TypeError as e:
cv = StratifiedKFold(n_splits=2, shuffle=True)
parts = cv.split(feat, lbs)
val_score = []
for tr, te in parts:
model = clf.fit(feat[tr], lbs[tr])
p = model.predict(feat[te])
val_score.append(f1_score(lbs[te], p))
return np.mean(val_score)
def __none(self, samples, lbs):
return 0
def __unknown(self, samples, lbs):
raise NotImplementedError()
def validate(self, task, samples, lbs):
return self.__task_handler[task](samples, lbs)
# class Validation(Validator, ValidationSampler):
# @property
# def tasks(self):
# return list(set(Validator.tasks.fget(self)).intersection(set(ValidationSampler.tasks.fget(self))))
================================================
FILE: core/algorithm/samplers/__init__.py
================================================
================================================
FILE: core/algorithm/samplers/pos_neg.py
================================================
from __future__ import print_function
from __future__ import absolute_import
import numpy as np
import random
from itertools import izip
from . import sampler
from core import gconfig as gconf
from core import utils
from core.algorithm.embutils import WithData
class Sampler(sampler.Sampler, WithData):
def __init__(self, **kwargs):
self._pos = None
self._pos_range = [-1, 1]
self._neg = None
self._valid = None
self._negdup = kwargs.get('negdup', 1)
self.__enable_cache = kwargs.get('replace_cache', False)
self._replace_cache = utils.OffsetList(self.dataset.gtgraphs.offset, len(self.dataset.gtgraphs), lambda x: None)
def __make_pos(self, begin, end):
data = []
weight = []
# TODO: remove this ugly fix
nodenames = list(self.dataset.gtgraphs['any'].vp['name'])
for i in range(begin, end):
assert not self.dataset.gtgraphs[i].is_directed()
for e in self.dataset.gtgraphs[i].edges():
src, tgt = int(e.source()), int(e.target())
if src > tgt:
src, tgt = tgt, src
nsrc, ntgt = nodenames[src], nodenames[tgt]
# for debug
if gconf.debug: # because .edge is slow
assert self.dataset.mygraphs[i].exists(nsrc, ntgt), \
"{}: {} {}".format(i, nsrc, ntgt)
data.append([i, src, tgt])
weight.append(self.dataset.mygraphs[i].edge(nsrc, ntgt))
data = np.array(data, dtype='int32')
weight = np.array(weight, dtype='float32')
if len(data) == 0:
raise RuntimeError("No positive sample is generated given an empty graph")
assert len(data) == sum([g.num_edges() for g in self.dataset.gtgraphs[begin:end]]), \
"{}, {}".format(len(data), sum([g.num_edges() for g in self.dataset.gtgraphs[begin:end]]))
return [data, weight]
def pretrain_begin(self, begin, end):
self._pos_range = [begin, end]
self._pos = self.__make_pos(begin, end)
def pretrain_end(self):
pass
def __make_neg(self, posdata, negdup=1):
negdata = []
# TODO: this is an ugly fix, try to add indexing support in mygraph
nodenames = list(self.dataset.gtgraphs['any'].vp['name'])
for d in posdata:
k, src, tgt = d
negdata.append([])
for i in range(negdup):
if utils.crandint(2) == 0: # replace source
if self.__enable_cache:
curcache = self._rep_cache(k)[tgt]
new_src = curcache[utils.crandint(len(curcache))]
negdata[-1].extend([new_src, tgt])
else:
# TODO: although it is almost impossible for a node to have all edges, check this in advance
#new_src = random.randint(0, self.dataset.gtgraphs[k].num_vertices() - 1)
new_src = utils.crandint(self.dataset.gtgraphs[k].num_vertices())
assert not self.dataset.gtgraphs[k].is_directed()
while self.dataset.mygraphs[k].exists(nodenames[new_src], nodenames[tgt]):
#new_src = random.randint(0, self.dataset.gtgraphs[k].num_vertices() - 1)
new_src = utils.crandint(self.dataset.gtgraphs[k].num_vertices())
negdata[-1].extend([new_src, tgt])
else: # replace target
if self.__enable_cache:
curcache = self._rep_cache(k)[src]
#new_tgt = curcache[random.randint(0, len(curcache) - 1)]
new_tgt = curcache[utils.crandint(len(curcache))]
negdata[-1].extend([src, new_tgt])
else:
#new_tgt = random.randint(0, self.dataset.gtgraphs[k].num_vertices() - 1)
new_tgt = utils.crandint(self.dataset.gtgraphs[k].num_vertices())
while self.dataset.mygraphs[k].exists(nodenames[src], nodenames[new_tgt]):
#new_tgt = random.randint(0, self.dataset.gtgraphs[k].num_vertices() - 1)
new_tgt = utils.crandint(self.dataset.gtgraphs[k].num_vertices())
negdata[-1].extend([src, new_tgt])
negdata = np.array(negdata)
assert negdata.shape == (len(posdata), 2 * negdup), "{}, {}".format(negdata.shape, (len(posdata), 2 * negdup))
return negdata
# in this implementation, negative samples rely wholy on positive samples,
# as a result, begin and end params are ignored in make_neg
def pretrain_begin_iteration(self):
# TODO: modify __make_neg to return a list
self._neg = [self.__make_neg(self._pos[0], negdup=self._negdup)]
def pretrain_end_iteration(self):
pass
def online_begin(self, begin, end):
assert begin == end - 1
self._pos = self.__make_pos(begin, end)
self._pos_range = [begin, begin]
def online_end(self):
pass
def online_begin_iteration(self):
self.shuffle_sample()
self._neg = [self.__make_neg(self._pos[0], negdup=self._negdup)]
def online_end_iteration(self):
pass
def make_pretrain_input(self, batch):
pos, weight, neg = batch[:3]
assert neg.shape[1] % 2 == 0
dupneg = neg.shape[1] / 2
data = []
# TODO: by doing so, we always train samples from the same edge together, does this matter?
for i in range(len(pos)):
for j in range(0, len(neg[i]), 2):
data.append(list(pos[i]) + [neg[i][j], neg[i][j + 1]])
data = np.array(data)
assert data.shape == (len(pos) * dupneg, 5)
return [data, weight]
def make_online_input(self, batch):
# polymorphism is not expected here, since make_pretrain_input and make_online_input are conceptually different
# we call Sampler.make_pretrain_input here simply to avoid copying and pasting
return Sampler.make_pretrain_input(self, batch)
# def make_valid_input(self):
# return [self._valid[0]]
#
# def make_valid_labels(self):
# return self._valid[1]
def _make_rep_cache(self, k):
self._replace_cache[k] = []
g = self.dataset.gtgraphs[k]
all_nodes = set(range(g.num_vertices()))
for j in range(g.num_vertices()):
self._replace_cache[k].append(list(all_nodes - set([int(v) for v in g.vertex(j).out_neighbours()])))
def _rep_cache(self, k):
if self._replace_cache[k] is None:
self._make_rep_cache(k)
return self._replace_cache[k]
def shuffle_sample(self):
self._pos[0], order, invorder = utils.shuffle_sample(self._pos[0], return_order=True)
for i in range(1, len(self._pos)):
self._pos[i] = utils.apply_order(self._pos[i], order)
# in this model, negative samples are drawn after shuffling positive samples
def batches(self, batchsize):
for i, s in enumerate(self._pos + self._neg):
assert len(s) == self.sample_size(), "{}-th: {} {}".format(i, len(s), self.sample_size())
isamp = [utils.islice_sample(s, chunk=batchsize) for s in self._pos + self._neg]
for s in izip(*isamp):
yield s
def sample_size(self):
return len(self._pos[0])
================================================
FILE: core/algorithm/samplers/pos_neg_tri.py
================================================
# triagnle data format:
# time step of the open triangle
# open triangle ceter node
# open triangle node 1
# open triangle node 2
# label/coefficient
# weight 1
# weight 2
from __future__ import print_function
from __future__ import absolute_import
import sys
from . import pos_neg
from core.algorithm.embutils import WithData
import core.gconfig as gconf
from core import utils
from collections import defaultdict
class Sampler(pos_neg.Sampler, WithData):
# almost the same as datagen_pos_neg.DataGen, except that triangular data are sampled with negative samples
def __init__(self, **kwargs):
super(Sampler, self).__init__(**kwargs)
self.__enable_cache = kwargs.get('triangle_cache', False)
self._triangular_cache = utils.OffsetList(self.dataset.localstep, self.dataset.nsteps, lambda x: None)
# self._triangular_cache = [None for _ in range(len(self.dataset.gtgraphs))]
# self._edge_cache = [None for _ in range(len(self.dataset.gtgraphs))]
self.__nbr_cache = [{} for _ in range(self.dataset.nsteps)]
self.__succ_trial, self.__all_trial = 10, 12
# in this implementation, negative samples rely wholy on positive samples,
# as a result, begin and end params are ignored in make_neg
def pretrain_begin_iteration(self):
super(Sampler, self).pretrain_begin_iteration()
# sample triangular data
filtered_pos = [p for p in self._pos[0] if p[0] + 1 < self._pos_range[1]] # except the last time slice!
if len(filtered_pos) <= 0:
print("No possible triangular samples, given positive range {} to {}".
format(self._pos_range[0], self._pos_range[1]))
triagdata = [None] * len(self._pos[0]) # in order to pass assertion in datagen_pos_neg.batches()
else:
if not self.__enable_cache:
mapper = utils.ParMap(self.__uncached_sampler_factory(), self.__sample_uncached_monitor, njobs=gconf.njobs)
triagdata = []
sample_round = 0
while len(triagdata) < len(self._pos[0]):
left_cnt = len(self._pos[0]) - len(triagdata)
# verboses
print("sample round {}, target #samples {}".format(sample_round, left_cnt))
sample_round += 1
# increase the probability of finish sampling in a single round
left_cnt = int(left_cnt * (float(self.__all_trial) / self.__succ_trial + 0.2))
if left_cnt < 100:
left_cnt = 100
mapper.njobs = 1
lb = max(0, utils.crandint(len(filtered_pos) - left_cnt))
ub = min(lb + left_cnt, len(filtered_pos))
newsamples = mapper.run(filtered_pos[lb:ub])
self.__all_trial += (ub - lb)
self.__succ_trial += len(newsamples)
triagdata.extend(newsamples)
triagdata = triagdata[:len(self._pos[0])]
else:
raise NotImplementedError()
self._neg.append(triagdata) # neg, triangdata_int, triangdata_float
def _triag_cache(self, k, knode, onode):
# assert self._edge(k, knode, onode) is not None
key = "{},{}".format(knode, onode)
if self._triangular_cache[k] is None:
self._make_triag_cache(k)
try:
return self._triangular_cache[k][key]
except KeyError as e:
print(self._triangular_cache[k])
raise e
def _make_triag_cache(self, t):
print("making triag cache for {}".format(t))
self._triangular_cache[t] = {}
g = self.dataset.mygraphs[t]
name2idx = {n: i for i, n in enumerate(self.dataset.gtgraphs[t].vp['name'])}
# assume g is not a graphview here, because we rely on int(vertex)
# adj = spect.adjacency(g).todense()
# NOTE: all following computations are based on names instead of indices
for vi in g.vertices():
# nbr = list(vi.out_neighbours())
nbr = list(g.out_neighbours(vi))
for vj in nbr:
i, j = name2idx[vi], name2idx[vj]
key = "{},{}".format(i, j)
curcache = self._triangular_cache[t][key] = []
#curcache = []
for vk in nbr:
k = name2idx[vk]
#assert adj[j, k] == adj[k, j]
if vk != vj and not g.exists(vj, vk):
curcache.append(k)
print("end making triag cache for {}".format(t))
# batch: (pos, weight, neg, triadint, triadfloat)
def make_pretrain_input(self, batch):
input = pos_neg.Sampler.make_pretrain_input(self, batch)
return input + list(batch[3:])
# for parallel uncached sampling
@staticmethod
def __sample_uncached_monitor(reportq):
procinfo = {}
proc_reent = defaultdict(lambda: 0)
while True:
obj = reportq.get()
if isinstance(obj, StopIteration):
break
if obj[1] is None: # a proc terminates
procinfo["{}_{}".format(obj[0], proc_reent[obj[0]])] = procinfo[obj[0]]
del procinfo[obj[0]]
proc_reent[obj[0]] += 1
continue
procinfo[obj[0]] = obj[1:]
total_proccnt = sum([v[0] for v in procinfo.values()])
total_availcnt = sum([v[1] for v in procinfo.values()])
total_trycnt = sum([v[2] for v in procinfo.values()])
print("{} samples processed, {} succeeded with avg try cnt {}\r".
format(total_proccnt, total_availcnt, float(total_trycnt) / max(total_availcnt, 1)), end='')
sys.stdout.flush()
total_proccnt = sum([v[0] for v in procinfo.values()])
total_availcnt = sum([v[1] for v in procinfo.values()])
total_trycnt = sum([v[2] for v in procinfo.values()])
print("{} samples processed, {} succeeded with avg try cnt {}".
format(total_proccnt, total_availcnt, float(total_trycnt) / max(total_availcnt, 1)))
def __uncached_sampler_factory(self):
# this method is added to avoid sharing the whole self object between processes
sample_one_uncached = self.__sample_one_uncached
# dill seems not to work with cython objects, this workaround requires __sample_one_uncached to accept
# localstep as an argument
mygraphs = list(self.dataset.mygraphs)
nodenames = list(self.dataset.gtgraphs['any'].vp['name'])
# the order defined by vp should be the same as mygraphs.vertices(),
# which is also the storing order of the embedding
for v1, v2 in zip(nodenames, self.dataset.mygraphs['any'].vertices()):
assert v1 == v2, (v1, v2, type(v1), type(v2))
name2idx = {n: i for i, n in enumerate(nodenames)}
localstep = self.dataset.localstep
def __sample_uncached(process, data, reportq):
total_trycnt = 0
total_avail_cnt = 0
ret = []
for i, sample in enumerate(data):
if i % 10000 == 0:
reportq.put([id(process), i, total_avail_cnt, total_trycnt])
curres, trycnt = sample_one_uncached(sample, nodenames, name2idx, mygraphs, localstep)
total_trycnt += trycnt
if curres is not None:
total_avail_cnt += 1
ret.append(curres)
reportq.put([id(process), len(data), total_avail_cnt, total_trycnt])
reportq.put([id(process), None]) # signal for terminate
return ret
return __sample_uncached
@staticmethod
def __sample_one_uncached(data, nodenames, name2idx, mygraphs, localstep):
k, src, tgt = [int(d) for d in data] # convert from np types to int, to avoid problems in c extensions
myg = mygraphs[k - localstep]
mynextg = mygraphs[k + 1 - localstep]
if utils.crandint(2) == 0: # target as key point
trycnt = 0
# new_src = random.randint(0, self.dataset.graphs[k].num_vertices() - 1)
nbr = myg.out_neighbours(nodenames[tgt])
new_src = name2idx[nbr[utils.crandint(len(nbr))]]
# while self._edge(k, tgt, new_src) is None or self._edge(k, src, new_src) is not None:
while new_src == tgt or new_src == src or not myg.exists(nodenames[tgt], nodenames[new_src]) or \
myg.exists(nodenames[src], nodenames[new_src]):
if trycnt >= 5:
break
# new_src = random.randint(0, self.dataset.graphs[k].num_vertices() - 1)
new_src = name2idx[nbr[utils.crandint(len(nbr))]]
trycnt += 1
if trycnt >= 5:
# nbr = [int(v) for v in self.dataset.gtgraphs[k].vertex(tgt).out_neighbours()
# if int(v) != src and int(v) != tgt and not myg.exists(nodenames[int(v)], nodenames[src])]
# if int(v) != src and self._edge(k, v, src) is None]
cand = [name2idx[n] for n in nbr]
cand = [n for n in cand if n != src and n != tgt and
not myg.exists(nodenames[n], nodenames[src])]
if len(cand) <= 0:
return None, trycnt
# new_src = nbr[random.randint(0, len(nbr) - 1)]
new_src = cand[utils.crandint(len(cand))]
# triagdata.append([k, tgt, src, new_src, self._edge(k + 1, src, new_src) is not None,
# w[self._edge(k, tgt, src)], w[self._edge(k, tgt, new_src)]])
ret = [k, tgt, src, new_src, mynextg.exists(nodenames[src], nodenames[new_src]),
myg.edge(nodenames[tgt], nodenames[src]),
myg.edge(nodenames[tgt], nodenames[new_src])]
else: # src as key point
trycnt = 0
nbr = myg.out_neighbours(nodenames[src])
# new_tgt = random.randint(0, self.dataset.graphs[k].num_vertices() - 1)
new_tgt = name2idx[nbr[utils.crandint(len(nbr))]]
# while self._edge(k, src, new_tgt) is None or self._edge(k, tgt, new_tgt) is not None:
while new_tgt == src or new_tgt == tgt or not myg.exists(nodenames[src], nodenames[new_tgt]) or \
myg.exists(nodenames[tgt], nodenames[new_tgt]):
if trycnt >= 5:
break
# new_tgt = random.randint(0, self.dataset.graphs[k].num_vertices() - 1)
new_tgt = name2idx[nbr[utils.crandint(len(nbr))]]
trycnt += 1
if trycnt >= 5:
# nbr = [int(v) for v in self.dataset.gtgraphs[k].vertex(src).out_neighbours()
# if int(v) != tgt and int(v) != src and not myg.exists(nodenames[int(v)], nodenames[tgt])]
# if int(v) != tgt and self._edge(k, v, tgt) is None]
cand = [name2idx[n] for n in nbr]
cand = [n for n in cand if n != tgt and n != src and
not myg.exists(nodenames[n], nodenames[tgt])]
if len(cand) <= 0:
return None, trycnt
# new_tgt = nbr[random.randint(0, len(nbr) - 1)]
new_tgt = cand[utils.crandint(len(cand))]
# triagdata.append([k, src, tgt, new_tgt, self._edge(k + 1, tgt, new_tgt) is not None,
# w[self._edge(k, src, tgt)], w[self._edge(k, src, new_tgt)]])
ret = [k, src, tgt, new_tgt, mynextg.exists(nodenames[tgt], nodenames[new_tgt]),
myg.edge(nodenames[src], nodenames[tgt]),
myg.edge(nodenames[src], nodenames[new_tgt])]
assert len(set(ret[1:4])) == 3 and ret[5] > 0 and ret[5] > 0, ret
return ret, trycnt
def __debug_and_count_triangles(self, nodenames):
# for debug, count all possible triangles
for i in range(self.dataset.localstep, self.dataset.localstep + self.dataset.nsteps - 1):
# adji = spect.adjacency(self.dataset.graphs[i]).todense()
# adji1 = spect.adjacency(self.dataset.graphs[i + 1]).todense()
triagcnt = 0
edgeset = set()
# same stat as above two, except that the missing edge exists in next graph
postriagcnt = 0
posedgeset = set()
for e in self.dataset.gtgraphs[i].edges():
isrc, itgt = int(e.source()), int(e.target())
for v in self._triag_cache(i, isrc, itgt):
# assert adji[itgt, v] == 0
triagcnt += 1
v1, v2 = min(itgt, v), max(itgt, v)
edgeset.add((v1, v2))
# if adji1[itgt, v] != 0:
if self.dataset.mygraphs[i + 1].exists(nodenames[v1], nodenames[v2]):
postriagcnt += 1
posedgeset.add((v1, v2))
for v in self._triag_cache(i, itgt, isrc):
# assert adji[isrc, v] == 0
triagcnt += 1
v1, v2 = min(isrc, v), max(isrc, v)
edgeset.add((v1, v2))
# if adji1[isrc, v] != 0:
if self.dataset.mygraphs[i + 1].exists(nodenames[v1], nodenames[v2]):
postriagcnt += 1
posedgeset.add((v1, v2))
assert triagcnt % 2 == 0 and postriagcnt % 2 == 0
triagcnt /= 2
postriagcnt /= 2
print("for {}-th graph".format(i))
print("{} edges forms {} open triangles".format(len(edgeset), triagcnt))
print("{} edges appear in next graph, forming {} open triangles".format(len(posedgeset), postriagcnt))
================================================
FILE: core/algorithm/samplers/sampler.py
================================================
from __future__ import print_function
class Sampler(object):
def pretrain_begin(self, begin, end):
pass
def pretrain_end(self):
pass
def pretrain_begin_iteration(self):
pass
def pretrain_end_iteration(self):
pass
def online_begin(self, begin, end):
pass
def online_end(self):
pass
def online_begin_iteration(self):
pass
def online_end_iteration(self):
pass
def make_pretrain_input(self, batch):
pass
def make_online_input(self, batch):
pass
def shuffle_sample(self):
pass
def batches(self, batchsize):
raise NotImplementedError()
def sample_size(self):
raise NotImplementedError()
================================================
FILE: core/cython_src/README.txt
================================================
TODO: this directory is needed because cython always tries to detect context
package, i.e., always compiles and installs to $PROJROOT/graphemb.* (or
$PROJROOT/graphemb/server.* etc.). As a result, we have to create a
non-package directory. Any better way to do this?
================================================
FILE: core/cython_src/utils_cy.pyx
================================================
# distutils: language=c++
from __future__ import print_function
import numpy as np
from collections import defaultdict
import itertools
import sys
from copy import deepcopy
from libcpp cimport bool
from libcpp.map cimport map
from libc.stdlib cimport rand, srand, RAND_MAX
from libc.time cimport time as ctime
class KeyDefaultDict(defaultdict):
def __missing__(self, key):
if self.default_factory is None:
raise KeyError(key)
else:
ret = self[key] = self.default_factory(key)
return ret
def slice_sample(sample, chunk=None, nslice=None):
cdef int ichunk
cdef int curstart
slices = []
if chunk is None:
ichunk = int(len(sample) / nslice)
else:
if nslice is not None:
raise RuntimeError("chunk ({}) and slice ({}) should not be specified simultaneously".format(chunk, nslice))
else:
ichunk = int(chunk)
curstart = 0
while True:
if curstart >= len(sample):
break
slices.append(sample[curstart:min(curstart + ichunk, len(sample))])
curstart += ichunk
return slices
def islice_sample(sample, chunk=None, nslice=None):
if chunk is None:
chunk = int(len(sample) / nslice)
else:
if nslice is not None:
raise RuntimeError("chunk ({}) and slice ({}) should not be specified simultaneously".format(chunk, nslice))
curstart = 0
while True:
if curstart >= len(sample):
break
yield sample[curstart:min(curstart + chunk, len(sample))]
curstart += chunk
def shuffle_sample(sample, return_order=False):
# type: (iterable) -> tuple
order = np.random.permutation(np.arange(len(sample)))
invorder = np.zeros((len(sample), ), dtype='int32')
invorder[order] = np.arange(len(sample))
if return_order:
return apply_order(sample, order), order, invorder
else:
return apply_order(sample, order)
def apply_order(sample, order):
return [sample[o] for o in order]
# archive protocol
cdef class Archivable(object):
def archive(self, name=None, copy=True):
return {}
def load_archive(self, ar, copy=True, name=None):
pass
cdef class OffsetList(Archivable):
cdef public int offset
cdef public int length
cdef public int __iter
cdef public bool __managed
cdef public map[int, bool] __accessed
cdef public object __factory
cdef public object __items
def __cinit__(self, offset, length, datasrc, copy=True, managed=None):
self.offset = offset
self.length = length
if managed is None:
self.__managed = False
else:
self.__managed = managed
self.__iter = 0
def __init__(self, offset, length, datasrc, copy=True, managed=None):
if hasattr(datasrc, '__getitem__'):
if not copy:
self.__items = datasrc
if managed is None:
self.__managed = False
else:
self.__items = deepcopy(datasrc)
if managed is None:
self.__managed = True
self.__factory = lambda x: self.__items[x - self.offset]
else:
self.__items = [None] * self.length
self.__factory = datasrc
if managed is None:
self.__managed = True
# self.__iter = 0
# self.__accessed = {}
def __len__(self):
return self.length
cdef __normalize_slice(self, slc):
cdef int start, stop, step
if slc.start is None:
start = self.offset
else:
start = int(slc.start)
if slc.stop is None:
stop = self.offset + self.length
else:
stop = int(slc.stop)
if slc.step is None:
step = 1
else:
step = int(slc.step)
start = self.__normalize_neg_index(start)
stop = self.__normalize_neg_index(stop)
return slice(start, stop, step)
cdef int __normalize_neg_index(self, int idx):
if idx < 0:
return idx + self.offset + self.length
else:
return idx
def __setitem__(self, key, item):
if isinstance(key, slice):
slc = self.__normalize_slice(key)
rg = range(slc.start, slc.stop, slc.step)
if len(rg) != len(item):
raise ValueError("Trying to set {} items with {} value".format(len(rg), len(item)))
if slc.start < self.offset or slc.stop > self.offset + self.length:
raise KeyError("{} not in range [{}, {})"
.format(slc, self.offset, self.offset + self.length))
for step, itm in zip(rg, item):
self._store_item(step, itm)
else:
key = self.__normalize_neg_index(key)
if key < self.offset or key >= self.offset + self.length:
raise KeyError("{} not in range [{}, {})"
.format(key, self.offset, self.offset + self.length))
self._store_item(key, item)
def __getitem__(self, key):
if key == 'any':
return self._load_item(self.offset)
if isinstance(key, slice):
slc = self.__normalize_slice(key)
rg = range(slc.start, slc.stop, slc.step)
if slc.start < self.offset or slc.stop > self.offset + self.length:
raise KeyError("{} not in range [{}, {})"
.format(slc, self.offset, self.offset + self.length))
items = []
for step in rg:
items.append(self._load_item(step))
return items
else:
key = self.__normalize_neg_index(int(key))
if key < self.offset or key >= self.offset + self.length:
raise KeyError("{} not in range [{}, {})"
.format(key, self.offset, self.offset + self.length))
return self._load_item(int(key))
def __array__(self):
# np.array fails when len(self) == 1, I have no idea why this happens but have to specify array interface manually
return np.asarray(list(self))
def __iter__(self):
self.__iter = self.offset
return self
# for py3 compatibility
# def __next__(self):
# return self.next()
def append(self, item):
if not self.__managed:
raise RuntimeError("Cannot append to unmanaged OffsetList")
self.length += 1
self.__items.append(item)
def extend(self, lst):
if not self.__managed:
raise RuntimeError("Cannot extend an unmanaged OffsetList")
self.length += len(lst)
self.__items.extend(lst)
def __next__(self):
if self.__iter >= self.offset + self.length:
raise StopIteration
ret = self._load_item(self.__iter)
self.__iter += 1
return ret
cdef _load_item(self, int step):
# do some caching
#if self.__accessed.get(step, None) is None:
if self.__accessed.find(step) == self.__accessed.end():
self.__items[step - self.offset] = self.__factory(step)
self.__accessed[step] = True
return self.__items[step - self.offset]
def _store_item(self, step, itm):
self.__items[step - self.offset] = itm
self.__accessed[step] = True
def archive(self, name=None, copy=True):
if name is None:
prefix = 'OffsetList'
else:
prefix = '{}_OffsetList'.format(name)
ar = super(OffsetList, self).archive(name=name, copy=copy)
ar['{}_offset'.format(prefix)] = self.offset
ar['{}_length'.format(prefix)] = self.length
ar['{}_data'.format(prefix)] = list(self)
if copy:
ar['{}_data'.format(prefix)] = deepcopy(ar['{}_data'.format(prefix)])
return ar
def load_archive(self, ar, copy=False, name=None):
if name is None:
prefix = 'OffsetList'
else:
prefix = '{}_OffsetList'.format(name)
self.__init__(ar['{}_offset'.format(prefix)], ar['{}_length'.format(prefix)], ar['{}_data'.format(prefix)], copy=copy)
# TODO: optimize this
def group_by(data, key=lambda x: x):
ret = []
key2idx = {}
for d in data:
k = key(d)
idx = key2idx.get(k, None)
if idx is None:
idx = key2idx[k] = len(key2idx)
ret.append([])
ret[idx].append(d)
return ret
cpdef int crandint(int ub):
if ub <= 0:
return 0
return rand() % ub
# __init__
seed = ctime(NULL)
#seed = 1497269812
srand(seed)
#open("/tmp/random.seed", "w").write(str(seed))
================================================
FILE: core/dataset/__init__.py
================================================
================================================
FILE: core/dataset/adjlist.py
================================================
from __future__ import print_function
from dataset_utils import DatasetBase
from .. import mygraph
from .. import mygraph_utils as mgutils
class Dataset(DatasetBase):
@property
def inittime(self):
return 0
def __init__(self, datafn, localtime, nsteps, stepsize, stepstride, offset=0, dataname=None):
self.datafn = datafn
self.__datadir = datafn
DatasetBase.__init__(self, datafn, localtime, nsteps, stepsize, stepstride, offset, dataname)
self.__vertices = None
@property
def name(self):
return "adjlist"
# required by Timeline
def _time2unit(self, tm):
return int(tm)
def _unit2time(self, unit):
return str(unit)
def __check_vertices(self, vs):
assert len(vs) == len(self.__vertices), (len(vs), len(self.__vertices))
for i in range(len(vs)):
assert vs[i] == self.__vertices[i], (i, vs[i], self.__vertices[i])
# required by DyanmicGraph
def _load_unit_graph(self, tm):
tm = self._time2unit(tm)
fn = "{}/{}".format(self.__datadir, tm)
g = mgutils.load_adjlist(fn)
if self.__vertices is None:
self.__vertices = g.vertices()
else:
try:
self.__check_vertices(g.vertices()) # ensure all graphs share a same set of vertices
except AssertionError as e:
raise RuntimeError("Vertices in graph file {} are not compatible with files already loaded: {}"
.format(fn, e.message))
return g
def _merge_unit_graphs(self, graphs, curstep):
curunit = self._time2unit(self.step2time(curstep))
print("merging graph from year {} to {}".format(curunit, curunit + self.stepsize - 1))
ret = mygraph.Graph(graphs[0].node_type(), graphs[0].weight_type())
for g in graphs:
ret.merge(g, free_other=False)
return ret
# required by Archivable(Archive and Cache)
# def _full_archive(self, name=None):
# return self.archive(name)
def archive(self, name=None):
ar = super(Dataset, self).archive()
return ar
def load_archive(self, ar, copy=False, name=None):
super(Dataset, self).load_archive(ar, copy=copy)
================================================
FILE: core/dataset/citation.py
================================================
from __future__ import print_function
# import graphtool_utils as gtutils
import numpy as np
import re
from six.moves import cPickle, reduce
from collections import Counter
from dataset_utils import DatasetBase
import core.gconfig as gconf
from core import utils, mygraph
class Dataset(DatasetBase):
@property
def inittime(self):
return self.__data['args']['minyear']
def __init__(self, datafn, localyear=None, nsteps=None, stepsize=None, stepstride=None, offset=0, dataname=None):
self.datafn = datafn
self.__data = cPickle.load(open(self.datafn, 'r'))
nonecnt = sum([int(v is None) for v in (localyear, nsteps, stepsize, stepstride)])
if nonecnt == 4:
# use information from the data
localyear = self.__data['args']['minyear']
nsteps = self.__data['args']['maxyear'] - self.__data['args']['minyear'] + 1
stepsize = 1
stepstride = 1
elif nonecnt != 0:
raise RuntimeError("You should not specify a part of dataset arguments")
DatasetBase.__init__(self, datafn, localyear, nsteps, stepsize, stepstride, offset, dataname)
self.__vertex_raw_labels_cache = None
@property
def name(self):
return "citation"
# required by Timeline
def _time2unit(self, tm):
return int(tm)
def _unit2time(self, unit):
return str(unit)
# required by DyanmicGraph
def _load_unit_graph(self, tm):
year = self._time2unit(tm)
# return gtutils.load_graph(self.__data['graphs'][year],
# fmt='mygraph', convert_to='undirected')
return self.__data['graphs'][year]
def _merge_unit_graphs(self, graphs, curstep):
curunit = self._time2unit(self.step2time(curstep))
print("merging graph from year {} to {}".format(curunit, curunit + self.stepsize - 1))
ret = mygraph.Graph(graphs[0].node_type(), graphs[0].weight_type())
for g in graphs:
ret.merge(g, free_other=False)
return ret
# required by Archivable(Archive and Cache)
def _full_archive(self, name=None):
self.__vertex_raw_labels() # evaluate lazy operations
return self.archive(name)
def archive(self, name=None):
if name is None:
prefix = 'Dataset'
else:
prefix = '{}_Dataset'.format(name)
ar = super(Dataset, self).archive()
ar['{}_cache'.format(prefix)] = [self.__vertex_raw_labels_cache]
return ar
def load_archive(self, ar, copy=False, name=None):
if name is None:
prefix = 'Dataset'
else:
prefix = '{}_Dataset'.format(name)
super(Dataset, self).load_archive(ar, copy=copy)
self.__vertex_raw_labels_cache, = ar['{}_cache'.format(prefix)]
if copy:
self.__vertex_raw_labels_cache = self.__vertex_raw_labels_cache.copy()
@property
def manual_features(self):
raise NotImplementedError()
@property
def data(self):
return self.__data
@staticmethod
def __label_vertices(feats, featnames, confdata):
labels = []
for f in feats:
cur = [0] * len(confdata)
for idx in np.nonzero(f)[0]:
curconfidx = None
for k, v in confdata.items():
if re.match(v[1], featnames[idx]) or re.match(v[2], featnames[idx]):
if curconfidx is None:
curconfidx = v[0]
else:
print("[Warning]: {} satisfies both patterns {} and {}".format(featnames[idx], curconfidx,
v[0]))
if curconfidx is not None:
cur[curconfidx] += f[idx]
if np.max(cur) <= 0:
labels.append(-1)
else:
labels.append(np.argmax(cur))
print("label distribution: {}".format(Counter(labels)))
return np.array(labels)
def __vertex_raw_labels(self, return_name=False):
raw_names = {-1: 'Unknown', 0: 'Architecture', 1: 'Computer Network', 2: 'Computer Security',
3: 'Data Mining', 4: 'Theory', 5: 'Graphics'}
if self.__vertex_raw_labels_cache is not None:
if return_name:
return self.__vertex_raw_labels_cache, raw_names
else:
return self.__vertex_raw_labels_cache
# These are conferences that are to merged
confdata = [['ASPLOS|Architectural Support for Programming Languages and Operating Systems',
'FAST|Conference on File and Storage Technologies',
'HPCA|High-Performance Computer Architecture',
'ISCA|Symposium on Computer Architecture',
'MICRO|MICRO',
'USENIX ATC|USENIX Annul Technical Conference',
'PPoPP|Principles and Practice of Parallel Programming'],
['MOBICOM|Mobile Computing and Networking Transactions on Networking',
'SIGCOMM|applications, technologies, architectures, and protocols for computer communication',
'INFOCOM|Computer Communications'],
['CCS|Computer and Communications Security',
'NDSS|Network and Distributed System Security',
# 'CRYPTO|International Cryptology Conference',
# 'EUROCRYPT|European Cryptology Conference',
'S\&P|Symposium on Security and Privacy',
'USENIX Security|Usenix Security Symposium'],
['SIGMOD|Conference on Management of Data',
'SIGKDD|Knowledge Discovery and Data Mining',
'SIGIR|Research on Development in Information Retrieval',
'VLDB|Very Large Data Bases',
'ICDE|Data Engineering'],
['STOC|ACM Symposium on Theory of Computing',
'FOCS|Symposium on Foundations of Computer Science',
'LICS|Symposium on Logic in Computer Science',
'CAV|Computer Aided Verification'],
[ # 'ACM MM|Multimedia',
'SIGGRAPH|SIGGRAPH Annual Conference',
'IEEE VIS|Visualization Conference',
'VR|Virtual Reality'],
# ['AAAI|AAAI Conference on Artificial Intelligence',
# 'CVPR|Computer Vision and Pattern Recognition',
# 'ICCV|International Conference on Computer Vision',
# 'ICML|International Conference on Machine Learning',
# 'IJCAI|International Joint Conference on Artificial Intelligence',
# 'NIPS|Annual Conference on Neural Information Processing Systems',
# 'ACL|Annual Meeting of the Association for Computational Linguistics']
]
# confdata records the representing conferences for each field
confdata = {n: i for i, arr in enumerate(confdata) for n in arr}
for k in confdata.keys():
sname, lname = k.split('|')
confdata[k] = [confdata[k], re.compile(sname), re.compile(lname, re.I)]
# conffeat is a list of matrices, each of them is a user-conference matrix for participation information
conffeat = [self.__data['conf_feat'][y] for y in range(self.localunit, self.localunit + self.nunits)]
# names of conferences in the user-conference matrix, in global indices (before filtering)
conffeat_names = self.__data['conf_names']
confmap = self.__data['confmap']
# maps conference names from global indices to their original names
conffeat_names = [confmap[c] for c in conffeat_names]
# we use theory conferences because it is more independent
rawlb = []
for i in range(self.nsteps):
startunit = self._time2unit(self.step2time(i + self.localstep))
endunit = startunit + self.stepsize
relstartunit, relendunit = startunit - self.localunit, endunit - self.localunit
print("generating samples for years from {} to {}, i.e. featidx from {} to {}".
format(startunit, endunit - 1, relstartunit, relendunit - 1))
curconffeat = reduce(lambda x, y: x + y, conffeat[relstartunit:relendunit],
np.zeros(conffeat[relstartunit].shape, dtype=conffeat[relstartunit].dtype)).A
rawlb.append(self.__label_vertices(curconffeat, conffeat_names, confdata))
print("{}/{} positive samples at step {}".format(np.sum(rawlb[-1] == 1), len(rawlb[-1]), i + self.localstep))
rawlb = np.vstack(rawlb)
self.__vertex_raw_labels_cache = rawlb
if return_name:
return self.__vertex_raw_labels_cache, raw_names
else:
return self.__vertex_raw_labels_cache
# unlike classification_samples, this method returns labels for all nodes from time begin to end
# with pos labeled as 1, neg labeled as -1 and unknown labeled as 0
def vertex_labels(self, target=4, return_name=False):
rawlb, raw_names = self.__vertex_raw_labels(return_name=True)
if target == 'raw':
lb = rawlb
label_names = raw_names
else:
lb = rawlb.copy()
# TODO: make sure the order of lb (i.e. order of feat) is 0:nnodes
def mapper(x):
if x == target:
return 1
elif x == -1:
return 0
else:
return -1
lb = np.vectorize(mapper)(lb)
label_names = {-1: 'Others', 1: raw_names[target], 0: 'Unknown'}
assert lb.shape == (len(self.gtgraphs), self.gtgraphs['any'].num_vertices()), \
"{}, ({}, {})".format(lb.shape, len(self.gtgraphs), self.gtgraphs['any'].num_vertices())
if return_name:
return utils.OffsetList(self.localstep, len(lb), lb, copy=False), label_names
else:
return utils.OffsetList(self.localstep, len(lb), lb, copy=False)
================================================
FILE: core/dataset/dataset_utils.py
================================================
from __future__ import print_function
from __future__ import absolute_import
from core import utils, gconv
from core import gconfig as gconf
import random
import numpy as np
from collections import defaultdict
from six.moves import range
import os
class Timeline(utils.Archivable):
def __init__(self, inittime, stepsize, stepstride):
self.initunit = self._time2unit(inittime)
self.stepsize = stepsize
self.stepstride = stepstride
def time2step(self, tm):
tmunit = self._time2unit(tm) - self.initunit
if tmunit < 0 or tmunit % self.stepstride != 0:
raise RuntimeError("Invalid step time {}({}), with len={}, stride={}"
.format(tmunit, tm, self.stepsize, self.stepstride))
return tmunit / self.stepstride
def step2time(self, step):
tmunit = step * self.stepstride
return self._unit2time(tmunit + self.initunit)
def _step2unit(self, step):
return self._time2unit(self.step2time(step))
def _unit2step(self, unit):
return self.time2step(self._unit2time(unit))
def _time2unit(self, step):
raise NotImplementedError()
def _unit2time(self, unit):
raise NotImplementedError()
# note that archive is used in combination with init args
# Archive provides tool to build Dataset from an archive
class Archive(utils.Archivable):
def _archive_args(self):
return []
def archive(self, name=None):
if name is None:
prefix = 'Archive'
else:
prefix = '{}_Archive'.format(name)
ar = super(Archive, self).archive(name)
ar['{}_args'.format(prefix)] = self._archive_args()
return ar
@classmethod
def from_archive(cls, ar, copy=False, name=None):
if name is None:
prefix = 'Archive'
else:
prefix = '{}_Archive'.format(name)
obj = cls(*ar.get('{}_args'.format(prefix)))
obj.load_archive(ar, copy=copy)
return obj
# Cache provides tool to load cached data given an already built Dataset
class Cache(utils.Archivable):
def _cache_args(self):
return {}
def is_compatible(self, args):
return self._cache_args() == args
# evaluate all lazy operations before archiving
def _full_archive(self, name=None):
return self.archive(name)
def cache(self):
return self._full_archive(), self._cache_args()
def load_cache(self, args, datagen):
if self.is_compatible(args):
self.load_archive(datagen(), copy=False)
else:
raise ValueError("Incompatible args {} vs. {}".format(self._cache_args(), args))
class DynamicGraph(Timeline, utils.Archivable):
def __init__(self, inittime, localtime, nsteps, stepsize, stepstride, stepoffset):
# Timeline anchors the starting time while DynamicGraph bounds the ending
Timeline.__init__(self, inittime, stepsize, stepstride)
self.localstep = self.time2step(localtime) + stepoffset
self.localunit = self._step2unit(self.localstep)
if self._step2unit(self.localstep) < self._time2unit(inittime):
raise RuntimeError("localstep smaller than initial step, with inittime={}, localtime={},"
" step_stride={}, step_offset={}"
.format(inittime, localtime, stepstride, stepoffset))
self.nsteps = nsteps
self.nunits = self._step2unit(self.localstep + self.nsteps - 1) - self.localunit + self.stepsize
# we use mygraph as main data, with supportive gtgraphs available
self._mygraphs = utils.OffsetList(self.localstep, self.nsteps, lambda step: self._load_graph(step))
self._gtgraphs = utils.OffsetList(self.localstep, self.nsteps,
lambda i: gconv.mygraph2graphtool(self._mygraphs[i], convert_to='undirected'))
@property
def mygraphs(self):
return self._mygraphs
@property
def gtgraphs(self):
return self._gtgraphs
@property
def nsize(self):
return self.gtgraphs['any'].num_vertices()
# override this to apply acceleration techniques
def _load_graph(self, step):
curunit = self._time2unit(self.step2time(step))
graphs = []
for u in range(curunit, curunit + self.stepsize):
graphs.append(self._load_unit_graph(self._unit2time(u)))
return self._merge_unit_graphs(utils.OffsetList(0, self.stepsize, graphs, copy=False), step)
def _load_unit_graph(self, tm):
raise NotImplementedError()
def _merge_unit_graphs(self, graphs, curstep):
raise NotImplementedError()
# required by Archivable
def archive(self, name=None):
if name is None:
prefix = 'DynamicGraph'
else:
prefix = '{}_DynamicGraph'.format(name)
ar = super(DynamicGraph, self).archive(name)
# note that conversion from/to graph_tool is even slower than reading from file
ar['{}_gtgraphs'.format(prefix)] = self._gtgraphs.archive()
ar['{}_mygraphs'.format(prefix)] = self._mygraphs.archive()
return ar
def load_archive(self, ar, copy=False, name=None):
if name is None:
prefix = 'DynamicGraph'
else:
prefix = '{}_DynamicGraph'.format(name)
super(DynamicGraph, self).load_archive(ar, copy=copy, name=name)
self._gtgraphs.load_archive(ar['{}_gtgraphs'.format(prefix)], copy=copy)
self._mygraphs.load_archive(ar['{}_mygraphs'.format(prefix)], copy=copy)
class TestSampler(object):
# protocols
def vertex_labels(self):
raise NotImplementedError("vertex_labels")
def vertex_raw_labels(self):
raise NotImplementedError("vertex_raw_labels")
def vertex_static_labels(self):
raise NotImplementedError("vertex_static_labels")
# helpers
@staticmethod
def __stratified_sample_size(size, possize, negsize):
posrat = float(possize) / (possize + negsize)
newpossize = int(size * posrat)
newnegsize = size - newpossize
return newpossize, newnegsize
# main function and implementation
tasks = 'link_reconstruction', 'link_prediction', 'node_classify', 'node_predict', \
'changed_link_classify', 'changed_link_prediction', 'order_links', 'none'
@property
def __task_handler(self):
ret = defaultdict(lambda: self.__unknown)
ret.update({'link_reconstruction': self._sample_link_reconstruction,
'link_prediction': self._sample_link_prediction,
'node_classify': self._sample_node_classify,
'node_predict': self._sample_node_predict,
'changed_link_classify': self._sample_changed_link_classify,
'changed_link_prediction': self._sample_changed_link_prediction,
'order_links': self._sample_order_links,
'none': self.__none})
return ret
def _sample_order_links(self, begin, end, size=None, intv=0, name=""):
gtgraphs = self.gtgraphs[begin + intv:end]
def nodeitr():
for i in range(len(gtgraphs)):
for j in range(gtgraphs[i].num_vertices()):
if gtgraphs[i].vertex(j).out_degree() > 0:
yield (i + begin, j)
if size is None:
samples = list(nodeitr())
else:
samples = random.sample(utils.ilen(nodeitr(), gtgraphs[0].num_vertices() * len(gtgraphs)), size)
lbs = []
for v in samples:
# v[0] - begin is correct given intv, because v[0] is 'i + begin', whose label shall be found in
# 'i + begin + intv'-th graph, which is exactly gtgraphs[i + begin + intv - (begin + intv)], i.e.
# gtgraphs[v[0] - begin]
lb = np.zeros((gtgraphs[v[0] - begin].num_vertices(),), dtype='int8')
lb[[int(n) for n in gtgraphs[v[0] - begin].vertex(v[1]).out_neighbours()]] = 1
assert np.sum(lb) == gtgraphs[v[0] - begin].vertex(v[1]).out_degree()
lbs.append(lb)
assert len(lbs) == len(samples), "{} {}".format(len(lbs), len(samples))
return [samples, lbs]
def _sample_link_reconstruction(self, begin, end, size=None, negdup=1, intv=0, name=""):
pos = []
for i, g in enumerate(self.gtgraphs[begin + intv:end]):
for e in g.edges():
assert not g.is_directed()
if gconf.debug and int(e.source()) > int(e.target()):
# check symmetric
names = g.vertex_properties['name']
assert g.edge(e.target(), e.source()), "{}: {} {}".format(i + begin, names[e.source()],
names[e.target()])
assert g.edge_properties['weight'][e] == g.edge_properties['weight'][
g.edge(e.target(), e.source())]
continue
pos.append([i + begin, int(e.source()), int(e.target())])
pos = np.vstack(pos).astype('int32')
neg = []
vsize = self.gtgraphs['any'].num_vertices()
nodenames = list(self.gtgraphs['any'].vp['name'])
for i in range(negdup):
for p in pos:
tm, src, tgt = p
g = self.mygraphs[tm + intv]
assert g.out_degree(nodenames[src]) < vsize - 1 or g.out_degree(nodenames[tgt]) < vsize - 1, \
"We do not expect any node to connect to all other nodes"
while True:
if random.randint(0, 1) == 0: # replace source
# cur_range = negrange[tm][tgt]
# new_src = cur_range[random.randint(0, len(cur_range) - 1)]
new_src = random.randint(0, vsize - 1)
if not g.exists(nodenames[new_src], nodenames[tgt]):
neg.append([tm, new_src, tgt])
break
else: # replace target
# cur_range = negrange[tm][src]
# new_tgt = cur_range[random.randint(0, len(cur_range) - 1)]
new_tgt = random.randint(0, vsize - 1)
if not g.exists(nodenames[src], nodenames[new_tgt]):
neg.append([tm, src, new_tgt])
break
neg = np.vstack(neg).astype('int32')
lbs = np.concatenate((np.ones(len(pos)), -np.ones(len(neg))))
return np.concatenate((pos, neg), axis=0), lbs
def _sample_link_prediction(self, begin, end, size=None, name=""):
return self._sample_link_reconstruction(begin, end, size, intv=1)
# intv is used for predition from previous time steps
def _sample_node_classify(self, begin, end, size=None, intv=0, name=""):
lbs = np.array(self.vertex_labels()[begin + intv:end], copy=False)
possamp = np.transpose(np.vstack(np.nonzero(lbs == 1)))
negsamp = np.transpose(np.vstack(np.nonzero(lbs == -1)))
if size is not None:
possize, negsize = self.__stratified_sample_size(size, len(possamp), len(negsamp))
if possize < len(possamp):
possamp = random.sample(possamp, possize)
if negsize < len(negsamp):
negsamp = random.sample(negsamp, negsize)
if len(possamp) == 0:
raise RuntimeError("Not enough positive samples for training")
samples = np.concatenate((possamp, negsamp), axis=0)
samples[:, 0] += begin # from begin-based time to 0-based time
lbs = np.concatenate((np.ones(len(possamp)), -np.ones(len(negsamp))), axis=0)
return [samples, lbs]
def _sample_node_predict(self, begin, end, size=None, name=""):
return self._sample_node_classify(begin, end, size, intv=1)
def _sample_changed_link_classify(self, begin, end, size=None, intv=0, name=""):
if end - begin < 2:
raise RuntimeError("there must be at least 2 graphs in 'changed' sample method")
samp, lbs = [], []
for i in range(begin, end - 1 - intv):
prevg, curg = self.gtgraphs[i + intv:i + 2 + intv]
def edge_set(g):
ret = set()
for e in g.edges():
s, t = int(e.source()), int(e.target())
if s > t:
s, t = t, s
ret.add((s, t))
return ret
cure = edge_set(curg)
preve = edge_set(prevg)
for s, t in cure - preve:
# i + 1 because i enumerates all prev graphs
samp.append([i + 1, s, t])
lbs.append(1)
for s, t in preve - cure:
samp.append([i + 1, s, t])
lbs.append(-1)
if gconf.debug:
# only check in debug mode because it is time consuming to call g.edge
for i in range(len(samp)):
if lbs[i] == 1:
assert self.gtgraphs[samp[i][0]].edge(samp[i][1], samp[i][2]) is not None
else:
assert self.gtgraphs[samp[i][0]].edge(samp[i][1], samp[i][2]) is None
samp = np.array(samp)
lbs = np.array(lbs)
if size is not None:
sampidx = random.sample(range(len(samp)), size)
samp = samp[sampidx]
lbs = lbs[sampidx]
return samp, lbs
def _sample_changed_link_prediction(self, begin, end, size, name=""):
return self._sample_changed_link_classify(begin, end, size, intv=1)
def __none(self, begin, end, size=None, name=""):
return [[], []]
def __unknown(self, begin, end, size=None, name=""):
raise NotImplementedError("Unknown sampling task {}".format(name))
def sample_test_data(self, task, begin, end, size=None):
return self.__task_handler[task](begin, end, size, name=task)
class DatasetBase(DynamicGraph, Archive, Cache, TestSampler):
initarg_names = ['datafn', 'localtime', 'nsteps', 'stepsize', 'stepstride', 'offset', 'dataname']
@property
def inittime(self):
raise NotImplementedError()
def _archive_args(self):
return super(DatasetBase, self)._archive_args() + self.initargs
def _cache_args(self):
cargs = super(DatasetBase, self)._cache_args()
cargs.update({n: a for a, n in zip(self.initargs, self.initarg_names)})
if cargs['dataname'] is not None:
del cargs['datafn'] # use name instead of data file name
else:
del cargs['dataname']
cargs['datafn'] = os.path.abspath(cargs['datafn']) # use absolute path for cache args
return cargs
def __init__(self, datafn, localtime, nsteps, stepsize=5, stepstride=1, offset=0, dataname=None):
DynamicGraph.__init__(self, self.inittime, localtime, nsteps, stepsize, stepstride, offset)
self.initargs = [datafn, localtime, nsteps, stepsize, stepstride, offset, dataname]
================================================
FILE: core/gconfig.py
================================================
debug = False
use_cython = True
njobs = 30
================================================
FILE: core/gconv.py
================================================
from __future__ import print_function
import graphtool_utils as gtutils
import utils
import mygraph
import mygraph_utils as mgutils
def graphtool2mygraph(g, **_):
names = g.vp.get('name')
if names:
try:
name_type = mgutils.format_type(names.value_type())
except ValueError as e:
print("Auto resolving type alias failed, try resolving with graph tool type system: " + e.message)
name_type = mgutils.python2type(gtutils.type2python(names.value_type()))
else:
names = range(g.num_vertices())
name_type = 'int'
weight = g.ep.get('weight')
if weight:
try:
weight_type = mgutils.format_type(names.value_type())
except ValueError as e:
print("Auto resolving type alias failed, try resolving with graph tool type system: " + e.message)
weight_type = mgutils.python2type(gtutils.type2python(weight.value_type()))
else:
weight = utils.ConstantDict(1.0)
weight_type = 'float'
names = list(names) # get rid of sluggish gt.vertex_properties
mg = mygraph.Graph(name_type, weight_type)
for n in names:
mg.add_vertex(n)
for e in g.edges():
n1, n2 = names[int(e.source())], names[int(e.target())]
# n1, n2 = names[e.source()], names[e.target()]
if g.is_directed():
mg.inc_edge(n1, n2, weight[e])
else:
mg.inc_edge(n1, n2, weight[e])
mg.inc_edge(n2, n1, weight[e])
return mg
# TODO: modify mygraph to make it support non-directed graph, and remove convert_to arg here
def mygraph2graphtool(g, convert_to=None, **_):
vertices = g.vertices()
ret = gtutils.load_mygraph_core(vertices, utils.KeyDefaultDict(lambda x: g.get(vertices[x])), directed=True,
nametype=gtutils.python2type(mgutils.type2python(g.node_type())),
weighttype=gtutils.python2type(mgutils.type2python(g.weight_type())),
convert_to=convert_to, check=True)
print("converting into graph {}".format(gtutils.graph_summary(ret)))
return ret
================================================
FILE: core/graph/CMakeLists.txt
================================================
project(graph)
cmake_minimum_required(VERSION 3.1.0 FATAL_ERROR)
#set(CMAKE_PREFIX_PATH ${LD_LIBRARY_PATH})
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "RELEASE")
#set(CMAKE_BUILD_TYPE "DEBUG")
#set(CMAKE_BUILD_TYPE "RELWITHDEBINFO")
#set(CMAKE_BUILD_TYPE "MINSIZEREL")
endif()
if(CMAKE_COMPILER_IS_GNUCXX)
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wall -DDEBUG -ftemplate-backtrace-limit=0")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2 -ftemplate-backtrace-limit=0")
else()
MESSAGE(FATAL_ERROR "CMakeLists.txt has not been tested/written for your compiler.")
endif()
#add_executable(subgraph graph.cpp nodeset.cpp utils.cpp subgraph.cpp exception.cpp)
#add_executable(graphfeat graph.cpp nodeset.cpp utils.cpp graphfeat.cpp exception.cpp)
#add_executable(centrality graph.cpp nodeset.cpp utils.cpp centrality.cpp exception.cpp)
#add_executable(_test_gfi _test_gfi.cpp graphfullindexed.cpp nodeset.cpp utils.cpp exception.cpp)
#add_executable(_test_mygraph _test_graph.cpp nodemap.cpp nodeset.cpp graph.cpp exception.cpp utils.cpp graph_pywrapper.cpp ioutils.cpp types.cpp)
add_library(mygraph SHARED nodemap.cpp nodeset.cpp graph.cpp exception.cpp utils.cpp graph_pywrapper.cpp ioutils.cpp types.cpp)
set_target_properties(mygraph PROPERTIES PREFIX "" )
#if(CMAKE_STATIC_GLIB_PATH)
# find_package(Glibc 3.14 REQUIRED PATH ${CMAKE_STATIC_GLIBC_PATH}
# NO_SYSTEM_ENVIRONMENT_PATH)
# target_link_libraries(cppimpl ${GLIBC_LIBRARIES})
# target_link_libraries(mygraph ${GLIBC_LIBRARIES})
# set_target_properties(cppimpl mygraph PROPERTIES LINK_SEARCH_START_STATIC 1)
# set_target_properties(cppimpl mygraph PROPERTIES LINK_SEARCH_END_STATIC 1)
#endif()
# we MUST find python before boost_python,
# because boost python somehow sets PYTHON_EXECUTABLE in a wrong manner
# and find_package(python) will not reset this variable once it exists
# as a result, find_package(python) is broken due to wrong path
if((NOT "$ENV{PYTHON_LIBRARY}" STREQUAL "") AND (NOT
"$ENV{PYTHON_INCLUDE_DIR}" STREQUAL ""))
message("Using custom python path $ENV{PYTHON_LIBRARY} and $ENV{PYTHON_INCLUDE_DIR}")
set(PYTHON_LIBRARY $ENV{PYTHON_LIBRARY})
set(PYTHON_INCLUDE_DIR $ENV{PYTHON_INCLUDE_DIR})
endif()
set(Python_ADDITIONAL_VERSIONS 2.7)
find_package(PythonLibs 2.7 REQUIRED)
include_directories(${PYTHON_INCLUDE_DIRS})
link_directories(${PYTHON_LIBRARIES})
#target_link_libraries(_test_mygraph python2.7)
find_package(Boost 1.54.0)
if(Boost_FOUND)
include_directories("${Boost_INCLUDE_DIRS}" "/usr/include/python2.7")
set(Boost_USE_STATIC_LIBS OFF)
set(Boost_USE_MULTITHREADED ON)
set(Boost_USE_STATIC_RUNTIME OFF)
find_package(Boost 1.54.0 COMPONENTS python)
if(NOT Boost_FOUND)
if(NOT "$ENV{BOOST_PYTHON_LIBNAME}" STREQUAL "")
message("boost_python not detected by cmake, trying custom lib name
$ENV{BOOST_PYTHON_LIBNAME}")
else()
message(FATAL_ERROR "boost_python not detected by cmake, try setting
environment variable $BOOST_PYTHON_LIBNAME for custom library name")
endif()
set (Boost_LIBRARIES "${Boost_LIBRARIES}-l boost_python-2.7")
endif()
ELSEIF(NOT Boost_FOUND)
MESSAGE(FATAL_ERROR "Unable to find correct Boost version. Did you set BOOST_ROOT?")
ENDIF()
#target_link_libraries(_test_mygraph ${Boost_LIBRARIES})
target_link_libraries(mygraph ${Boost_LIBRARIES})
#set_property(TARGET _test_mygraph mygraph PROPERTY CXX_STANDARD 11)
#set_property(TARGET _test_mygraph mygraph PROPERTY CXX_STANDARD_REQUIRED ON)
set_property(TARGET mygraph PROPERTY CXX_STANDARD 11)
set_property(TARGET mygraph PROPERTY CXX_STANDARD_REQUIRED ON)
================================================
FILE: core/graph/CMakeLists.txt.user
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE QtCreatorProject>
<!-- Written by QtCreator 4.3.0, 2017-07-16T22:01:00. -->
<qtcreator>
<data>
<variable>EnvironmentId</variable>
<value type="QByteArray">{1fbab999-ec9b-4841-8ec0-4de4f9a26238}</value>
</data>
<data>
<variable>ProjectExplorer.Project.ActiveTarget</variable>
<value type="int">0</value>
</data>
<data>
<variable>ProjectExplorer.Project.EditorSettings</variable>
<valuemap type="QVariantMap">
<value type="bool" key="EditorConfiguration.AutoIndent">true</value>
<value type="bool" key="EditorConfiguration.AutoSpacesForTabs">false</value>
<value type="bool" key="EditorConfiguration.CamelCaseNavigation">true</value>
<valuemap type="QVariantMap" key="EditorConfiguration.CodeStyle.0">
<value type="QString" key="language">Cpp</value>
<valuemap type="QVariantMap" key="value">
<value type="QByteArray" key="CurrentPreferences">CppGlobal</value>
</valuemap>
</valuemap>
<valuemap type="QVariantMap" key="EditorConfiguration.CodeStyle.1">
<value type="QString" key="language">QmlJS</value>
<valuemap type="QVariantMap" key="value">
<value type="QByteArray" key="CurrentPreferences">QmlJSGlobal</value>
</valuemap>
</valuemap>
<value type="int" key="EditorConfiguration.CodeStyle.Count">2</value>
<value type="QByteArray" key="EditorConfiguration.Codec">UTF-8</value>
<value type="bool" key="EditorConfiguration.ConstrainTooltips">false</value>
<value type="int" key="EditorConfiguration.IndentSize">4</value>
<value type="bool" key="EditorConfiguration.KeyboardTooltips">false</value>
<value type="int" key="EditorConfiguration.MarginColumn">80</value>
<value type="bool" key="EditorConfiguration.MouseHiding">true</value>
<value type="bool" key="EditorConfiguration.MouseNavigation">true</value>
<value type="int" key="EditorConfiguration.PaddingMode">1</value>
<value type="bool" key="EditorConfiguration.ScrollWheelZooming">true</value>
<value type="bool" key="EditorConfiguration.ShowMargin">false</value>
<value type="int" key="EditorConfiguration.SmartBackspaceBehavior">2</value>
<value type="bool" key="EditorConfiguration.SmartSelectionChanging">true</value>
<value type="bool" key="EditorConfiguration.SpacesForTabs">true</value>
<value type="int" key="EditorConfiguration.TabKeyBehavior">0</value>
<value type="int" key="EditorConfiguration.TabSize">8</value>
<value type="bool" key="EditorConfiguration.UseGlobal">true</value>
<value type="int" key="EditorConfiguration.Utf8BomBehavior">1</value>
<value type="bool" key="EditorConfiguration.addFinalNewLine">true</value>
<value type="bool" key="EditorConfiguration.cleanIndentation">true</value>
<value type="bool" key="EditorConfiguration.cleanWhitespace">true</value>
<value type="bool" key="EditorConfiguration.inEntireDocument">false</value>
</valuemap>
</data>
<data>
<variable>ProjectExplorer.Project.PluginSettings</variable>
<valuemap type="QVariantMap"/>
</data>
<data>
<variable>ProjectExplorer.Project.Target.0</variable>
<valuemap type="QVariantMap">
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Desktop</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName">Desktop</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">{ce399ac2-bee5-4fec-b60f-9ee70c227ef8}</value>
<value type="int" key="ProjectExplorer.Target.ActiveBuildConfiguration">0</value>
<value type="int" key="ProjectExplorer.Target.ActiveDeployConfiguration">0</value>
<value type="int" key="ProjectExplorer.Target.ActiveRunConfiguration">0</value>
<valuemap type="QVariantMap" key="ProjectExplorer.Target.BuildConfiguration.0">
<valuelist type="QVariantList" key="CMake.Configuration"/>
<value type="QString" key="ProjectExplorer.BuildConfiguration.BuildDirectory">/home/luckiezhou/Projects/graphemb/build-graph-Desktop-Default</value>
<valuemap type="QVariantMap" key="ProjectExplorer.BuildConfiguration.BuildStepList.0">
<valuemap type="QVariantMap" key="ProjectExplorer.BuildStepList.Step.0">
<value type="QString" key="CMakeProjectManager.MakeStep.AdditionalArguments"></value>
<valuelist type="QVariantList" key="CMakeProjectManager.MakeStep.BuildTargets">
<value type="QString">all</value>
</valuelist>
<value type="bool" key="ProjectExplorer.BuildStep.Enabled">true</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Make</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">CMakeProjectManager.MakeStep</value>
</valuemap>
<value type="int" key="ProjectExplorer.BuildStepList.StepsCount">1</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Build</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">ProjectExplorer.BuildSteps.Build</value>
</valuemap>
<valuemap type="QVariantMap" key="ProjectExplorer.BuildConfiguration.BuildStepList.1">
<valuemap type="QVariantMap" key="ProjectExplorer.BuildStepList.Step.0">
<value type="QString" key="CMakeProjectManager.MakeStep.AdditionalArguments"></value>
<valuelist type="QVariantList" key="CMakeProjectManager.MakeStep.BuildTargets">
<value type="QString">clean</value>
</valuelist>
<value type="bool" key="ProjectExplorer.BuildStep.Enabled">true</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Make</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">CMakeProjectManager.MakeStep</value>
</valuemap>
<value type="int" key="ProjectExplorer.BuildStepList.StepsCount">1</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Clean</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">ProjectExplorer.BuildSteps.Clean</value>
</valuemap>
<value type="int" key="ProjectExplorer.BuildConfiguration.BuildStepListCount">2</value>
<value type="bool" key="ProjectExplorer.BuildConfiguration.ClearSystemEnvironment">false</value>
<valuelist type="QVariantList" key="ProjectExplorer.BuildConfiguration.UserEnvironmentChanges"/>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Default</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName">Default</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">CMakeProjectManager.CMakeBuildConfiguration</value>
</valuemap>
<valuemap type="QVariantMap" key="ProjectExplorer.Target.BuildConfiguration.1">
<valuelist type="QVariantList" key="CMake.Configuration">
<value type="QString">CMAKE_BUILD_TYPE:STRING=Debug</value>
</valuelist>
<value type="QString" key="ProjectExplorer.BuildConfiguration.BuildDirectory">/home/luckiezhou/Projects/graphemb/build-graph-Desktop-Debug</value>
<valuemap type="QVariantMap" key="ProjectExplorer.BuildConfiguration.BuildStepList.0">
<valuemap type="QVariantMap" key="ProjectExplorer.BuildStepList.Step.0">
<value type="QString" key="CMakeProjectManager.MakeStep.AdditionalArguments"></value>
<valuelist type="QVariantList" key="CMakeProjectManager.MakeStep.BuildTargets">
<value type="QString"></value>
</valuelist>
<value type="bool" key="ProjectExplorer.BuildStep.Enabled">true</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Make</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">CMakeProjectManager.MakeStep</value>
</valuemap>
<value type="int" key="ProjectExplorer.BuildStepList.StepsCount">1</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Build</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">ProjectExplorer.BuildSteps.Build</value>
</valuemap>
<valuemap type="QVariantMap" key="ProjectExplorer.BuildConfiguration.BuildStepList.1">
<valuemap type="QVariantMap" key="ProjectExplorer.BuildStepList.Step.0">
<value type="QString" key="CMakeProjectManager.MakeStep.AdditionalArguments"></value>
<valuelist type="QVariantList" key="CMakeProjectManager.MakeStep.BuildTargets">
<value type="QString">clean</value>
</valuelist>
<value type="bool" key="ProjectExplorer.BuildStep.Enabled">true</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Make</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">CMakeProjectManager.MakeStep</value>
</valuemap>
<value type="int" key="ProjectExplorer.BuildStepList.StepsCount">1</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Clean</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">ProjectExplorer.BuildSteps.Clean</value>
</valuemap>
<value type="int" key="ProjectExplorer.BuildConfiguration.BuildStepListCount">2</value>
<value type="bool" key="ProjectExplorer.BuildConfiguration.ClearSystemEnvironment">false</value>
<valuelist type="QVariantList" key="ProjectExplorer.BuildConfiguration.UserEnvironmentChanges"/>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Debug</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName">Debug</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">CMakeProjectManager.CMakeBuildConfiguration</value>
</valuemap>
<valuemap type="QVariantMap" key="ProjectExplorer.Target.BuildConfiguration.2">
<valuelist type="QVariantList" key="CMake.Configuration">
<value type="QString">CMAKE_BUILD_TYPE:STRING=Release</value>
</valuelist>
<value type="QString" key="ProjectExplorer.BuildConfiguration.BuildDirectory">/home/luckiezhou/Projects/graphemb/build-graph-Desktop-Release</value>
<valuemap type="QVariantMap" key="ProjectExplorer.BuildConfiguration.BuildStepList.0">
<valuemap type="QVariantMap" key="ProjectExplorer.BuildStepList.Step.0">
<value type="QString" key="CMakeProjectManager.MakeStep.AdditionalArguments"></value>
<valuelist type="QVariantList" key="CMakeProjectManager.MakeStep.BuildTargets">
<value type="QString"></value>
</valuelist>
<value type="bool" key="ProjectExplorer.BuildStep.Enabled">true</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Make</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">CMakeProjectManager.MakeStep</value>
</valuemap>
<value type="int" key="ProjectExplorer.BuildStepList.StepsCount">1</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Build</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">ProjectExplorer.BuildSteps.Build</value>
</valuemap>
<valuemap type="QVariantMap" key="ProjectExplorer.BuildConfiguration.BuildStepList.1">
<valuemap type="QVariantMap" key="ProjectExplorer.BuildStepList.Step.0">
<value type="QString" key="CMakeProjectManager.MakeStep.AdditionalArguments"></value>
<valuelist type="QVariantList" key="CMakeProjectManager.MakeStep.BuildTargets">
<value type="QString">clean</value>
</valuelist>
<value type="bool" key="ProjectExplorer.BuildStep.Enabled">true</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Make</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">CMakeProjectManager.MakeStep</value>
</valuemap>
<value type="int" key="ProjectExplorer.BuildStepList.StepsCount">1</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Clean</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">ProjectExplorer.BuildSteps.Clean</value>
</valuemap>
<value type="int" key="ProjectExplorer.BuildConfiguration.BuildStepListCount">2</value>
<value type="bool" key="ProjectExplorer.BuildConfiguration.ClearSystemEnvironment">false</value>
<valuelist type="QVariantList" key="ProjectExplorer.BuildConfiguration.UserEnvironmentChanges"/>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Release</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName">Release</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">CMakeProjectManager.CMakeBuildConfiguration</value>
</valuemap>
<valuemap type="QVariantMap" key="ProjectExplorer.Target.BuildConfiguration.3">
<valuelist type="QVariantList" key="CMake.Configuration">
<value type="QString">CMAKE_BUILD_TYPE:STRING=RelWithDebInfo</value>
</valuelist>
<value type="QString" key="ProjectExplorer.BuildConfiguration.BuildDirectory">/home/luckiezhou/Projects/graphemb/build-graph-Desktop-Release with Debug Information</value>
<valuemap type="QVariantMap" key="ProjectExplorer.BuildConfiguration.BuildStepList.0">
<valuemap type="QVariantMap" key="ProjectExplorer.BuildStepList.Step.0">
<value type="QString" key="CMakeProjectManager.MakeStep.AdditionalArguments"></value>
<valuelist type="QVariantList" key="CMakeProjectManager.MakeStep.BuildTargets">
<value type="QString"></value>
</valuelist>
<value type="bool" key="ProjectExplorer.BuildStep.Enabled">true</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Make</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">CMakeProjectManager.MakeStep</value>
</valuemap>
<value type="int" key="ProjectExplorer.BuildStepList.StepsCount">1</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Build</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">ProjectExplorer.BuildSteps.Build</value>
</valuemap>
<valuemap type="QVariantMap" key="ProjectExplorer.BuildConfiguration.BuildStepList.1">
<valuemap type="QVariantMap" key="ProjectExplorer.BuildStepList.Step.0">
<value type="QString" key="CMakeProjectManager.MakeStep.AdditionalArguments"></value>
<valuelist type="QVariantList" key="CMakeProjectManager.MakeStep.BuildTargets">
<value type="QString">clean</value>
</valuelist>
<value type="bool" key="ProjectExplorer.BuildStep.Enabled">true</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Make</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">CMakeProjectManager.MakeStep</value>
</valuemap>
<value type="int" key="ProjectExplorer.BuildStepList.StepsCount">1</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Clean</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">ProjectExplorer.BuildSteps.Clean</value>
</valuemap>
<value type="int" key="ProjectExplorer.BuildConfiguration.BuildStepListCount">2</value>
<value type="bool" key="ProjectExplorer.BuildConfiguration.ClearSystemEnvironment">false</value>
<valuelist type="QVariantList" key="ProjectExplorer.BuildConfiguration.UserEnvironmentChanges"/>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Release with Debug Information</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName">Release with Debug Information</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">CMakeProjectManager.CMakeBuildConfiguration</value>
</valuemap>
<valuemap type="QVariantMap" key="ProjectExplorer.Target.BuildConfiguration.4">
<valuelist type="QVariantList" key="CMake.Configuration">
<value type="QString">CMAKE_BUILD_TYPE:STRING=MinSizeRel</value>
</valuelist>
<value type="QString" key="ProjectExplorer.BuildConfiguration.BuildDirectory">/home/luckiezhou/Projects/graphemb/build-graph-Desktop-Minimum Size Release</value>
<valuemap type="QVariantMap" key="ProjectExplorer.BuildConfiguration.BuildStepList.0">
<valuemap type="QVariantMap" key="ProjectExplorer.BuildStepList.Step.0">
<value type="QString" key="CMakeProjectManager.MakeStep.AdditionalArguments"></value>
<valuelist type="QVariantList" key="CMakeProjectManager.MakeStep.BuildTargets">
<value type="QString"></value>
</valuelist>
<value type="bool" key="ProjectExplorer.BuildStep.Enabled">true</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Make</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">CMakeProjectManager.MakeStep</value>
</valuemap>
<value type="int" key="ProjectExplorer.BuildStepList.StepsCount">1</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Build</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">ProjectExplorer.BuildSteps.Build</value>
</valuemap>
<valuemap type="QVariantMap" key="ProjectExplorer.BuildConfiguration.BuildStepList.1">
<valuemap type="QVariantMap" key="ProjectExplorer.BuildStepList.Step.0">
<value type="QString" key="CMakeProjectManager.MakeStep.AdditionalArguments"></value>
<valuelist type="QVariantList" key="CMakeProjectManager.MakeStep.BuildTargets">
<value type="QString">clean</value>
</valuelist>
<value type="bool" key="ProjectExplorer.BuildStep.Enabled">true</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Make</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">CMakeProjectManager.MakeStep</value>
</valuemap>
<value type="int" key="ProjectExplorer.BuildStepList.StepsCount">1</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Clean</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">ProjectExplorer.BuildSteps.Clean</value>
</valuemap>
<value type="int" key="ProjectExplorer.BuildConfiguration.BuildStepListCount">2</value>
<value type="bool" key="ProjectExplorer.BuildConfiguration.ClearSystemEnvironment">false</value>
<valuelist type="QVariantList" key="ProjectExplorer.BuildConfiguration.UserEnvironmentChanges"/>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Minimum Size Release</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName">Minimum Size Release</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">CMakeProjectManager.CMakeBuildConfiguration</value>
</valuemap>
<value type="int" key="ProjectExplorer.Target.BuildConfigurationCount">5</value>
<valuemap type="QVariantMap" key="ProjectExplorer.Target.DeployConfiguration.0">
<valuemap type="QVariantMap" key="ProjectExplorer.BuildConfiguration.BuildStepList.0">
<value type="int" key="ProjectExplorer.BuildStepList.StepsCount">0</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Deploy</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">ProjectExplorer.BuildSteps.Deploy</value>
</valuemap>
<value type="int" key="ProjectExplorer.BuildConfiguration.BuildStepListCount">1</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Deploy locally</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">ProjectExplorer.DefaultDeployConfiguration</value>
</valuemap>
<value type="int" key="ProjectExplorer.Target.DeployConfigurationCount">1</value>
<valuemap type="QVariantMap" key="ProjectExplorer.Target.PluginSettings"/>
<valuemap type="QVariantMap" key="ProjectExplorer.Target.RunConfiguration.0">
<value type="bool" key="Analyzer.QmlProfiler.AggregateTraces">false</value>
<value type="bool" key="Analyzer.QmlProfiler.FlushEnabled">false</value>
<value type="uint" key="Analyzer.QmlProfiler.FlushInterval">1000</value>
<value type="QString" key="Analyzer.QmlProfiler.LastTraceFile"></value>
<value type="bool" key="Analyzer.QmlProfiler.Settings.UseGlobalSettings">true</value>
<valuelist type="QVariantList" key="Analyzer.Valgrind.AddedSuppressionFiles"/>
<value type="bool" key="Analyzer.Valgrind.Callgrind.CollectBusEvents">false</value>
<value type="bool" key="Analyzer.Valgrind.Callgrind.CollectSystime">false</value>
<value type="bool" key="Analyzer.Valgrind.Callgrind.EnableBranchSim">false</value>
<value type="bool" key="Analyzer.Valgrind.Callgrind.EnableCacheSim">false</value>
<value type="bool" key="Analyzer.Valgrind.Callgrind.EnableEventToolTips">true</value>
<value type="double" key="Analyzer.Valgrind.Callgrind.MinimumCostRatio">0.01</value>
<value type="double" key="Analyzer.Valgrind.Callgrind.VisualisationMinimumCostRatio">10</value>
<value type="bool" key="Analyzer.Valgrind.FilterExternalIssues">true</value>
<value type="int" key="Analyzer.Valgrind.LeakCheckOnFinish">1</value>
<value type="int" key="Analyzer.Valgrind.NumCallers">25</value>
<valuelist type="QVariantList" key="Analyzer.Valgrind.RemovedSuppressionFiles"/>
<value type="int" key="Analyzer.Valgrind.SelfModifyingCodeDetection">1</value>
<value type="bool" key="Analyzer.Valgrind.Settings.UseGlobalSettings">true</value>
<value type="bool" key="Analyzer.Valgrind.ShowReachable">false</value>
<value type="bool" key="Analyzer.Valgrind.TrackOrigins">true</value>
<value type="QString" key="Analyzer.Valgrind.ValgrindExecutable">valgrind</value>
<valuelist type="QVariantList" key="Analyzer.Valgrind.VisibleErrorKinds">
<value type="int">0</value>
<value type="int">1</value>
<value type="int">2</value>
<value type="int">3</value>
<value type="int">4</value>
<value type="int">5</value>
<value type="int">6</value>
<value type="int">7</value>
<value type="int">8</value>
<value type="int">9</value>
<value type="int">10</value>
<value type="int">11</value>
<value type="int">12</value>
<value type="int">13</value>
<value type="int">14</value>
</valuelist>
<value type="QString" key="CMakeProjectManager.CMakeRunConfiguation.Title">_test_mygraph</value>
<value type="QString" key="CMakeProjectManager.CMakeRunConfiguration.Arguments"></value>
<value type="QString" key="CMakeProjectManager.CMakeRunConfiguration.UserWorkingDirectory"></value>
<value type="QString" key="CMakeProjectManager.CMakeRunConfiguration.UserWorkingDirectory.default">/home/luckiezhou/Projects/graphemb/build-graph-Desktop-Default</value>
<value type="int" key="PE.EnvironmentAspect.Base">2</value>
<valuelist type="QVariantList" key="PE.EnvironmentAspect.Changes"/>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">_test_mygraph</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">CMakeProjectManager.CMakeRunConfiguration._test_mygraph</value>
<value type="uint" key="RunConfiguration.QmlDebugServerPort">3768</value>
<value type="bool" key="RunConfiguration.UseCppDebugger">false</value>
<value type="bool" key="RunConfiguration.UseCppDebuggerAuto">true</value>
<value type="bool" key="RunConfiguration.UseMultiProcess">false</value>
<value type="bool" key="RunConfiguration.UseQmlDebugger">false</value>
<value type="bool" key="RunConfiguration.UseQmlDebuggerAuto">true</value>
</valuemap>
<value type="int" key="ProjectExplorer.Target.RunConfigurationCount">1</value>
</valuemap>
</data>
<data>
<variable>ProjectExplorer.Project.TargetCount</variable>
<value type="int">1</value>
</data>
<data>
<variable>ProjectExplorer.Project.Updater.FileVersion</variable>
<value type="int">18</value>
</data>
<data>
<variable>Version</variable>
<value type="int">18</value>
</data>
</qtcreator>
================================================
FILE: core/graph/_test_graph.cpp
================================================
#include <string>
#include <iostream>
using namespace std;
#include "nodemap.h"
#include "nodeset.h"
#include "graph.h"
void test_nodemap()
{
NodeMap<int, string> *nm = new NodeMap<int, string>();
cout << "test default value" << endl;
cout << nm->get_value(0) << endl;
nm->newnode(100, string("hello"));
nm->newnode(200, string("world"));
cout << (*nm)[100] << endl;
assert((*nm)[100] == "hello");
NodeMap<int, string> *nm1 = new NodeMap<int, string>(move(*nm));
cout << (*nm1)[100] << endl;
assert((*nm1)[100] == "hello");
try
{
nm1->newnode(100, string("olleh"), DUP_WARN);
assert(0);
}
catch(...)
{
}
nm1->newnode(100, string("olleh"));
cout << (*nm1)[100] << endl;
assert((*nm1)[100] == "olleh");
nm1->newnode(200, string("dlrow"), DUP_IGNORE);
cout << (*nm1)[200] << endl;
assert((*nm1)[200] == "world");
cout << nm1->exists(100) << endl;
assert(nm1->exists(100));
nm->newnode(400, string("abcd"));
nm->newnode(200, string("dcba"), DUP_OVERWRITE);
nm1->merge(*nm);
int keys[] = {100, 200, 400};
string values[] = {"olleh", "dcba", "abcd"};
int pt = 0;
for(auto& itr : *nm1)
{
cout << itr.first << itr.second << endl;
assert(itr.first == keys[pt]);
assert(itr.second == values[pt]);
pt++;
}
NodeMap<int, int> nm2;
try
{
nm2.get(10);
}
catch(NoSuchFieldException& e)
{
cout << e.what() << endl;
}
}
void test_nodeset()
{
NodeSet<string> *nm = new NodeSet<string>();
nm->newnode(string("hello"));
nm->newnode(string("world"));
cout << (*nm)[string("hello")] << endl;
assert((*nm)[string("hello")]);
NodeSet<string> *nm1 = new NodeSet<string>(move(*nm));
cout << (*nm1)[string("hello")] << endl;
assert((*nm1)[string("hello")]);
try
{
nm1->newnode(string("hello"), DUP_WARN);
assert(0);
}
catch(...)
{
}
nm1->newnode(string("dlrow"), DUP_IGNORE);
cout << (*nm1)[string("dlrow")] << endl;
assert((*nm1)[string("dlrow")]);
cout << nm1->exists(string("dlrow")) << endl;
assert(nm1->exists(string("dlrow")));
nm->newnode(string("abcd"));
nm->newnode(string("dcba"), DUP_OVERWRITE);
nm1->merge(*nm);
string values[] = {"abcd", "dcba", "dlrow", "hello", "world"};
int pt = 0;
for(auto& itr : *nm1)
{
cout << itr << endl;
assert(itr == values[pt]);
pt++;
}
}
struct A { };
void test_graph()
{
Graph<string, float, NodeMap> g0;
cout << "test default" << endl;
cout << g0.edge_value("10", "20") << endl;
g0.newnode("0");
cout << g0.edge_value("0", "100") << endl;
Graph<int, int, NodeMap> g;
for(int i = 0; i < 10; i++)
{
g.newnode(i);
g[i].newnode(i + 1, i + 1);
}
cout << "test tostring" << endl;
g.tostring(cout);
cout << endl;
Graph<int, int, NodeMap> g1 = g.inverseEdge();
for(int i = 1; i <= 10; i++)
{
cout << g1.exists(i) << endl;
assert(g1.exists(i));
cout << g1[i].exists(i - 1) << endl;
assert(g1[i].exists(i - 1));
assert(!g1[i].exists(i) && !g1[i].exists(i + 1));
cout << g1[i][i - 1] << endl;
assert(g1[i][i - 1] == i);
}
auto ev = g1.edges();
assert(ev.end() == ev.end());
for(auto itr = ev.begin(); itr != ev.end(); itr++)
{
cout << (*itr).first << ' ' << (*itr).second.first << ' ' << (*itr).second.second << endl;
}
cout << "testing save&load" << endl;
g1.save("_test_graph.graph");
Graph<int, int, NodeMap> g2;
g2.load("_test_graph.graph");
cout << "edges loaded" << endl;
ev = g2.edges();
for(auto itr = ev.begin(); itr != ev.end(); itr++)
{
cout << (*itr).first << ' ' << (*itr).second.first << ' ' << (*itr).second.second << endl;
}
cout << "compare with original" << endl;
ev = g1.edges();
for(auto itr = ev.begin(); itr != ev.end(); itr++)
{
cout << (*itr).first << ' ' << (*itr).second.first << ' ' << (*itr).second.second << endl;
assert(g2.exists((*itr).first, (*itr).second.first));
}
g1.save("_test_graph_txt.graph", false);
cout << "A has nullinst " << Has_nullinst<A>::value << endl;
assert(Has_nullinst<A>::value == 0);
cout << "edgemap has nullinst " << Has_nullinst<EdgeMap<int, int, NodeMap> >::value << endl;
assert((Has_nullinst<EdgeMap<int, int, NodeMap> >::value) == 1);
}
void test_functions()
{
Graph<string, float, NodeMap> g, g1;
g.newnode("hello");
g.newnode("world");
g.newnode("WORLD");
g.newnode("abcde");
g1.newnode("hello");
g1.newnode("world");
g1.newnode("HELLO");
g1.newnode("abcde");
g.newedge("hello", "world", 1);
g.newedge("WORLD", "world", 2);
g.newedge("abcde", "WORLD", 1.2);
g.newedge("abcde", "hello", 2.34);
g1.newedge("hello", "world", 3);
g1.newedge("hello", "HELLO", 4);
g1.newedge("abcde", "hello", 2.66);
g.merge_graph(g1);
cout << "expected output (order does not matter):" << endl;
cout << "hello world 4" << endl
<< "WOLRD world 2" << endl
<< "abcde WORLD 1.2" << endl
<< "abcde hello 5" << endl
<< "hello HELLO 4" << endl;
cout << "real output: " << endl;
auto ev = g.edges();
for(const auto& itr : ev)
{
cout << itr.first << ' ' << itr.second.first << ' ' << itr.second.second << endl;
}
ev = g1.edges();
for(const auto& itr : ev)
{
cout << itr.first << ' ' << itr.second.first << ' ' << itr.second.second << endl;
}
cout << "what if merge with itself" << endl;
g.merge_graph(g);
ev = g.edges();
for(const auto& itr : ev)
{
cout << itr.first << ' ' << itr.second.first << ' ' << itr.second.second << endl;
}
}
int main()
{
test_nodemap();
cout << "#################################" << endl;
test_nodeset();
cout << "#################################" << endl;
test_graph();
cout << "#################################" << endl;
test_functions();
return 0;
}
================================================
FILE: core/graph/defs.h
================================================
#ifndef DEFS_H
#define DEFS_H
enum DUPMODE
{
DUP_IGNORE = 0, // do not insert when duplicated
DUP_OVERWRITE, // overwrite duplicated
DUP_WARN, // throw if duplicated
};
#endif // DEFS_H
================================================
FILE: core/graph/exception.cpp
================================================
================================================
FILE: core/graph/exception.h
================================================
#ifndef EXCEPTION_H_INCLUDE
#define EXCEPTION_H_INCLUDE
#include <stdexcept>
#include <exception>
#include <cerrno>
#include <cstring>
#include <sstream>
template <typename T>
std::string _to_string(T key)
{
std::ostringstream oss;
oss << key;
return oss.str();
}
template <typename T>
class DuplicateKeyException : public std::runtime_error
{
public:
DuplicateKeyException(const std::string& where, const std::string& msg, T key)
: std::runtime_error(where + ": " + msg + " " + _to_string<T>(key)),
key(key), msg(msg), where(where) {}
T key;
std::string msg;
std::string where;
};
template <typename T>
class InvalidKeyException : public std::runtime_error
{
public:
InvalidKeyException(const std::string& where, const std::string& msg, T key)
: std::runtime_error(where + ": " + msg + " " + _to_string<T>(key)),
key(key), msg(msg), where(where) {}
T key;
std::string msg;
std::string where;
};
// this class is not thread safe due to usage of errno
class IOException : public std::runtime_error
{
public:
IOException(const std::string& where, const std::string& msg)
: std::runtime_error(where + ": " + msg + ": " + strerr),
msg(msg), where(where), strerr(strerror(errno)) {}
std::string msg;
std::string where;
std::string strerr;
};
class InvalidFormatException : public std::runtime_error
{
public:
InvalidFormatException(const std::string& where, const std::string& msg)
: std::runtime_error(where + ": " + msg + ": " + strerr),
msg(msg), where(where), strerr(strerror(errno)) {}
std::string msg;
std::string where;
std::string strerr;
};
class NotImplementedException : public std::logic_error
{
public:
NotImplementedException(const std::string& where, const std::string& msg)
: std::logic_error(funcname + ": " + msg), msg(msg), funcname(where) {}
std::string msg;
std::string funcname;
};
class NoSuchFieldException : public std::logic_error
{
public:
NoSuchFieldException(const std::string& where, const std::string& msg, const std::string fieldname)
: std::logic_error(where + ": " + msg + ": " + fieldname),
msg(msg), where(where), fieldname(fieldname)
{}
std::string msg;
std::string where;
std::string fieldname;
};
#endif // EXCEPTION_H_INCLUDE
================================================
FILE: core/graph/graph.cpp
================================================
#include "graph.h"
//#include <list>
//#include <map>
//#include <utility>
//#include <cassert>
//#include <sstream>
//#include "exception.h"
//using namespace std;
//const Graph Graph::empty_graph;
//const list<Graph::key_type> Graph::empty_val;
//bool Graph::load(const char *fn, int fmt, bool overwrite, const NodeSet& filter, bool filter_out)
//{
// // in this case, we filter_out an empty graph
// if(&filter == &NodeSet::empty_ns)
// filter_out = true;
// FILE *fp = fopen(fn, "r");
// if(!fp)
// {
// char buf[128];
// sprintf(buf, "Cannot open graph file for reading: %s", fn);
// perror(buf);
// return false;
// }
// while(1)
// {
// Graph::key_type user = 0;
// list<Graph::key_type> arr;
// if(fmt == Graph::FMT_RAW)
// {
// user = readline(fp, &arr);
// if(user == -1) break;
// }
// else
// {
// size_t ecnt;
// if(fscanf(fp, WUID_IOFMT "%lu", &user, &ecnt) == EOF)
// break;
// for(size_t i = 0; i < ecnt; i++)
// {
// wuid_t tmp;
// fscanf(fp, WUID_IOFMT, &tmp);
// arr.push_back(tmp);
// }
// }
// //assert(!exists(user));
// if(filter_out ^ filter.exists(user))
// {
// if(!overwrite && exists(user))
// {
// ostringstream oss;
// oss << "Duplicated key: " << user;
// throw DuplicateKeyException<key_type>(oss.str(), user);
// }
// data[user] = move(arr);
// }
// }
// fclose(fp);
// return true;
//}
//bool Graph::load(FileNameInfo info, int fmt, bool overwrite, const NodeSet& filter, bool filter_out)
//{
// // we use a separate graph to load because we do not consider overwrite when loading multiple files
// Graph ng;
// for(auto itr : info)
// {
// printf("loading %s\n", itr.c_str());
// ng.load(itr.c_str(), fmt, false, filter, filter_out);
// }
// merge(ng, overwrite);
// return true;
//}
//void Graph::merge(Graph& g, bool overwrite)
//{
// if(size() < g.size()) // for efficiency
// data.swap(g.data);
// for(const auto& itr : g.data)
// {
// if(!overwrite && exists(itr.first))
// {
// ostringstream oss;
// oss << "Duplicated key: " << itr.first;
// throw DuplicateKeyException<key_type>(oss.str(), itr.first);
// }
// data[itr.first] = move(itr.second);
// }
// g.clear();
//}
//const list<Graph::key_type>& Graph::get(Graph::key_type key) const
//{
// if(exists(key))
// return find(key)->second;
// else
// return empty_val;
//}
//// TODO: return a view instead of creating a new nodeset
//NodeSet Graph::nodes() const
//{
// NodeSet ret;
// for(const auto& itr : data)
// {
// ret.newnode(itr.first);
// }
// return move(ret);
//}
//Graph Graph::subset(const NodeSet& ns)
//{
// Graph ret;
// for(const auto& itr : data)
// {
// if(ns.exists(itr.first))
// ret[itr.first] = itr.second;
// }
// return move(ret);
//}
//bool Graph::save(const char *fn, int fmt)
//{
// FILE *fp = fopen(fn, "w");
// if(!fp)
// {
// perror("Cannot open network file for writing");
// return false;
// }
// for(const auto& u : data)
// {
// fprintf(fp, WUID_IOFMT, u.first);
// if(fmt == FMT_ADVANCED)
// fprintf(fp, " %lu", u.second.size());
// for(auto fu : u.second)
// fprintf(fp, " " WUID_IOFMT, fu);
// fprintf(fp, "\n");
// }
// fclose(fp);
// return true;
//}
//bool Graph::save(const FileNameInfo& info, int fmt)
//{
// if(info.rr != info.lr)
// throw runtime_error("saving graph to multiple files is not supported");
// for(auto itr : info)
// {
// return save(itr.c_str(), fmt);
// }
// return true;
//}
//Graph Graph::inverseEdge() const
//{
//}
================================================
FILE: core/graph/graph.h
================================================
/**
Code manipulating graph structure,
actually we can implement this basing on boosting graph library...
*/
#ifndef GRAPH_H_INCLUDED
#define GRAPH_H_INCLUDED
#include <map>
#include <list>
#include <utility>
#include "nodemap.h"
#include "exception.h"
#include "defs.h"
#ifdef DEBUG
#include <iostream>
#endif
#define NOTIMPL(func) throw NotImplementedException("function " ## func ## " not implemented", func)
//template <typename node_type, typename rec_type,
// template <typename, typename> typename impl_type>
//class EdgeList : public impl_type<node_type, rec_type>
//{
//public:
// node_type rec2key(const rec_type& rec) { NOTIMPL(__FUNCTION__); }
//};
// when the concept of edge is introduced to NodeMap etc.
template <typename node_t, typename weight_t, template<typename, typename> class EdgeMapImpl>
class EdgeMap : public EdgeMapImpl<node_t, weight_t>
{
public:
typedef node_t node_type;
typedef weight_t weight_type;
typedef EdgeMapImpl<node_t, weight_t> super;
typedef typename super::value_type rec_type;
static_assert(std::is_same<rec_type, std::pair<const node_type, weight_type> >::value,
"Invalid inner types for EdgeMap");
EdgeMap() {}
EdgeMap(EdgeMapImpl<node_t, weight_t>&& em)
: super(std::move(em)) {}
EdgeMap(EdgeMap&& em)
: super(std::move(em)) {}
EdgeMap& operator=(EdgeMap&& em)
{
super::operator=(std::move(em));
return *this;
}
static node_type rec2key(const rec_type& rec) { return rec.first; }
static rec_type invrec(const node_type& new_key, const rec_type& rec)
{
return std::make_pair(new_key, rec.second);
}
std::pair<typename super::iterator, bool> newrec(rec_type&& rec, DUPMODE dupmode = DUP_OVERWRITE)
{
return super::newnode(std::move(rec), dupmode);
}
std::pair<typename super::iterator, bool> newedge(const node_type& key, weight_type&& val, DUPMODE dupmode = DUP_OVERWRITE)
{
return super::newnode(key, std::move(val), dupmode);
}
bool exists(const node_type& key) const
{
return super::exists(key);
}
static const EdgeMap nullinst;
private:
EdgeMap(const EdgeMap &em) = delete;
EdgeMap& operator=(const EdgeMap& em) = delete;
};
template <typename node_t, typename weight_t, template<typename, typename> class EdgeMapImpl>
const EdgeMap<node_t, weight_t, EdgeMapImpl> EdgeMap<node_t, weight_t, EdgeMapImpl>::nullinst;
template <typename G>
class EdgeView
{
public:
EdgeView(G *g) : g(g) {}
class iterator
{
public:
typedef typename G::iterator outeritr;
typedef typename G::edgelist_type::iterator inneritr;
std::pair<typename G::node_type, typename G::edgelist_type::value_type> operator*()
{
return std::make_pair(outer->first, *inner);
}
iterator& operator++()
{
inc();
skip_invalid();
return *this;
}
iterator operator++(int)
{
iterator ret = *this;
operator++();
return ret;
}
bool operator==(const iterator& itr) const
{
return (outer == outerend && itr.outer == outerend) ||
(inner == itr.inner && outer == itr.outer);
}
bool operator!=(const iterator& itr) const
{
return !operator==(itr);
}
friend class EdgeView;
private:
iterator(outeritr outer, outeritr outerend, inneritr inner)
: outer(outer), outerend(outerend), inner(inner) {}
iterator& inc()
{
if(outer == outerend) return *this;
if(inner == outer->second.end())
{
++outer;
if(outer != outerend)
inner = outer->second.begin();
else
inner = inneritr();
}
else
++inner;
return *this;
}
iterator& skip_invalid()
{
while(outer != outerend && inner == outer->second.end())
inc();
return *this;
}
outeritr outer;
outeritr outerend;
inneritr inner;
};
iterator begin()
{
typename iterator::outeritr outer = g->begin();
typename iterator::inneritr inner;
if(outer != g->end())
{
inner = outer->second.begin();
}
return iterator(outer, g->end(), inner).skip_invalid();
}
iterator end()
{
typename iterator::outeritr outer = g->end();
typename iterator::inneritr inner;
return iterator(outer, outer, inner);
}
// TODO add const version here
private:
G *g;
};
// TODO: add directed label to graph
template <typename node_t, typename weight_t, template <class, class> class EdgeMapImpl>
class Graph : public NodeMap<node_t, EdgeMap<node_t, weight_t, EdgeMapImpl> >
{
public:
// static_assert(std::is_same<typename EdgeMap<mapped_t>::node_type, key_t>::value,
// "From and To nodes are not the same type");
typedef EdgeMap<node_t, weight_t, EdgeMapImpl> edgelist_type;
typedef NodeMap<node_t, edgelist_type> super;
typedef node_t node_type;
typedef weight_t weight_type;
// enum
// {
// FMT_RAW = 0,
// FMT_ADVANCED, // with number of edges specified in each line
// };
Graph() {}
Graph(Graph&& g) = default;
Graph& operator=(Graph&& g) = default;
//Graph(const char *fn, int fmt) { load(fn, fmt); }
/** filter_out:
true - ignore all those appeared in filter
false - reserve only those appeared in filter
*/
//bool load(const char *fn, int fmt, bool overwrite = false, const NodeSet& filter = NodeSet::empty_ns, bool filter_out = false);
//bool load(FileNameInfo info, int fmt, bool overwrite = false, const NodeSet& filter = NodeSet::empty_ns, bool filter_out = false);
//bool save(const char *fn, int fmt);
//bool save(const FileNameInfo& info, int fmt);
const edgelist_type& get_value(const node_type& key) const
{
return static_cast<const edgelist_type&>(super::get_value(key));
// return super::get(key);
}
edgelist_type& get(const node_type& key)
{
return static_cast<edgelist_type&>(super::get(key));
}
size_t out_degree(const node_type& key) const
{
return get_value(key).size();
}
const weight_type& edge_value(const node_type& key1, const node_type& key2, const weight_type& dft = Defaults<weight_type>::get()) const
{
return get_value(key1).get_value(key2, dft);
}
weight_type& edge(const node_type& key1, const node_type& key2,
bool create_default = false, weight_type&& dft = weight_type())
{
return get(key1).get(key2, create_default, std::move(dft));
}
bool exists(const node_type& from, const node_type& to) const
{
return get_value(from).exists(to);
}
bool exists(const node_type& key) const { return super::exists(key); }
// implement edge iterator as return type?
std::pair<typename edgelist_type::iterator, bool> newedge(const node_type& from, const node_type& to, ty
gitextract_6ehqdt73/
├── .gitignore
├── LICENSE
├── README.md
├── __main__.py
├── build.sh
├── core/
│ ├── __init__.py
│ ├── algorithm/
│ │ ├── CMakeLists.txt
│ │ ├── __init__.py
│ │ ├── boost_python_omp.h
│ │ ├── cmake/
│ │ │ └── FindNumPy.cmake
│ │ ├── dynamic_triad.py
│ │ ├── dynamic_triad_cimpl.cpp
│ │ ├── embutils.py
│ │ └── samplers/
│ │ ├── __init__.py
│ │ ├── pos_neg.py
│ │ ├── pos_neg_tri.py
│ │ └── sampler.py
│ ├── cython_src/
│ │ ├── README.txt
│ │ └── utils_cy.pyx
│ ├── dataset/
│ │ ├── __init__.py
│ │ ├── adjlist.py
│ │ ├── citation.py
│ │ └── dataset_utils.py
│ ├── gconfig.py
│ ├── gconv.py
│ ├── graph/
│ │ ├── CMakeLists.txt
│ │ ├── CMakeLists.txt.user
│ │ ├── _test_graph.cpp
│ │ ├── defs.h
│ │ ├── exception.cpp
│ │ ├── exception.h
│ │ ├── graph.cpp
│ │ ├── graph.h
│ │ ├── graph_pywrapper.cpp
│ │ ├── graph_pywrapper.h
│ │ ├── ioutils.cpp
│ │ ├── ioutils.h
│ │ ├── nodemap.cpp
│ │ ├── nodemap.h
│ │ ├── nodeset.cpp
│ │ ├── nodeset.h
│ │ ├── types.cpp
│ │ ├── types.h
│ │ ├── utils.cpp
│ │ └── utils.h
│ ├── graphtool_utils.py
│ ├── kerasext/
│ │ ├── __init__.py
│ │ ├── debug/
│ │ │ ├── __init__.py
│ │ │ └── finite_number_check.py
│ │ └── keras_backend_patches/
│ │ ├── __init__.py
│ │ ├── tensorflow_patches.py
│ │ └── theano_patches.py
│ ├── mygraph_utils.py
│ ├── utils.py
│ └── utils_py.py
├── data/
│ └── academic_toy.pickle
├── docs/
│ ├── README.md
│ ├── _config.yml
│ └── _layouts/
│ └── default.html
├── requirements.txt
└── scripts/
├── academic2adjlist.py
├── demo.sh
├── demo_raw.sh
├── stdtests.py
└── test.py
SYMBOL INDEX (389 symbols across 30 files)
FILE: core/__init__.py
function main (line 7) | def main():
FILE: core/algorithm/boost_python_omp.h
function class (line 25) | class GILRelease {
function GILAcquire (line 33) | struct GILAcquire{
FILE: core/algorithm/dynamic_triad.py
class Model (line 27) | class Model(Sampler, TrainFlow, WithData, Validator):
method __init__ (line 28) | def __init__(self, ds, pretrain_size=10, embdim=16, beta=None,
method name (line 48) | def name(self):
method dataset (line 52) | def dataset(self):
method pretrain (line 56) | def pretrain(self):
method online (line 64) | def online(self):
method verbose (line 71) | def verbose(self, inputs):
method make_pretrain (line 76) | def make_pretrain(self):
method make_online (line 173) | def make_online(self):
method save_model (line 216) | def save_model(self, copy=True):
method restore_model (line 227) | def restore_model(self, model, begin=None, end=None, copy=True):
method pretrain_begin (line 244) | def pretrain_begin(self, begin, end):
method pretrain_begin_iteration (line 252) | def pretrain_begin_iteration(self):
method pretrain_end_iteration (line 263) | def pretrain_end_iteration(self):
method pretrain_end (line 267) | def pretrain_end(self):
method online_begin (line 271) | def online_begin(self, begin, end):
method online_end (line 282) | def online_end(self):
method make_pretrain_input (line 288) | def make_pretrain_input(self, batch):
method make_online_input (line 299) | def make_online_input(self, batch):
method __emcoef_cimpl (line 308) | def __emcoef_cimpl(self, data):
method __emcoef_pyimpl (line 319) | def __emcoef_pyimpl(self, data):
method __emcoef (line 334) | def __emcoef(self, data):
method __emcoef_monitor (line 353) | def __emcoef_monitor(reportq):
method __emcoef_calculator_factory (line 365) | def __emcoef_calculator_factory(self, timestep):
FILE: core/algorithm/dynamic_triad_cimpl.cpp
type Record (line 26) | struct Record
function Tensor1D_Managed (line 45) | Tensor1D_Managed X(int a, int b, int c, cgraph_type<graph_t> g, Tensor2D...
function P (line 58) | float P(int a, int b, int c, cgraph_type<graph_t> g, Tensor2D emb, Tenso...
function translate_input (line 71) | void translate_input(const py::list py_graph, const py::list py_nodename...
function Tensor1D (line 92) | Tensor1D translate_1darray(py::object arr)
function Tensor3D (line 102) | Tensor3D translate_3darray(py::object arr)
function T (line 108) | T extract(py::object obj)
function extract_record (line 120) | void extract_record(py::object rec, Record *out)
function Tensor2D (line 135) | Tensor2D slice_tensor3d(Tensor3D t, int idx)
function _emcoef (line 145) | py::list _emcoef(py::list data, py::object py_emb, py::object py_theta, ...
function emcoef (line 252) | py::list emcoef(py::list data, py::object py_emb, py::object py_theta, p...
function BOOST_PYTHON_MODULE (line 269) | BOOST_PYTHON_MODULE(dynamic_triad_cimpl)
FILE: core/algorithm/embutils.py
class WithData (line 16) | class WithData(object):
method dataset (line 18) | def dataset(self):
class TrainFlow (line 22) | class TrainFlow(utils.Archivable, WithData):
method __init__ (line 23) | def __init__(self, **flowargs):
method __add_history (line 37) | def __add_history(self, hist):
method start_training (line 54) | def start_training(self, begin, end):
method stop_training (line 61) | def stop_training(self):
method cur_train_begin (line 66) | def cur_train_begin(self):
method cur_train_end (line 72) | def cur_train_end(self):
method init_train_begin (line 78) | def init_train_begin(self):
method init_train_end (line 84) | def init_train_end(self):
method last_train_begin (line 90) | def last_train_begin(self):
method last_train_end (line 96) | def last_train_end(self):
method flowargs (line 102) | def flowargs(self):
method is_training (line 106) | def is_training(self):
method embeddings_at (line 109) | def embeddings_at(self, step, allow_missing=False, default=None):
method make_features (line 119) | def make_features(self, samples):
method clear (line 129) | def clear(self):
method slim_storage (line 135) | def slim_storage(self, keep_size):
method export (line 141) | def export(self):
method archive (line 148) | def archive(self, name=None, copy=True):
method load_archive (line 165) | def load_archive(self, ar, copy=True, name=None):
method __check_flowargs (line 186) | def __check_flowargs(self, old_args):
method save_model (line 194) | def save_model(self, copy=True):
method restore_model (line 202) | def restore_model(self, model, begin=None, end=None, copy=True):
class TrainFlowView (line 219) | class TrainFlowView(TrainFlow, WithData):
method __init__ (line 220) | def __init__(self, **flowargs):
method start_training (line 223) | def start_training(self, begin, end):
method stop_training (line 226) | def stop_training(self):
method dataset (line 230) | def dataset(self):
class Validator (line 237) | class Validator(object):
method __task_handler (line 241) | def __task_handler(self):
method _validate_link_reconstruction (line 250) | def _validate_link_reconstruction(self, samples, lbs):
method _validate_node_classify (line 276) | def _validate_node_classify(self, samples, lbs):
method __none (line 296) | def __none(self, samples, lbs):
method __unknown (line 299) | def __unknown(self, samples, lbs):
method validate (line 302) | def validate(self, task, samples, lbs):
FILE: core/algorithm/samplers/pos_neg.py
class Sampler (line 13) | class Sampler(sampler.Sampler, WithData):
method __init__ (line 14) | def __init__(self, **kwargs):
method __make_pos (line 24) | def __make_pos(self, begin, end):
method pretrain_begin (line 51) | def pretrain_begin(self, begin, end):
method pretrain_end (line 56) | def pretrain_end(self):
method __make_neg (line 59) | def __make_neg(self, posdata, negdup=1):
method pretrain_begin_iteration (line 102) | def pretrain_begin_iteration(self):
method pretrain_end_iteration (line 106) | def pretrain_end_iteration(self):
method online_begin (line 109) | def online_begin(self, begin, end):
method online_end (line 114) | def online_end(self):
method online_begin_iteration (line 117) | def online_begin_iteration(self):
method online_end_iteration (line 121) | def online_end_iteration(self):
method make_pretrain_input (line 124) | def make_pretrain_input(self, batch):
method make_online_input (line 137) | def make_online_input(self, batch):
method _make_rep_cache (line 148) | def _make_rep_cache(self, k):
method _rep_cache (line 155) | def _rep_cache(self, k):
method shuffle_sample (line 160) | def shuffle_sample(self):
method batches (line 166) | def batches(self, batchsize):
method sample_size (line 173) | def sample_size(self):
FILE: core/algorithm/samplers/pos_neg_tri.py
class Sampler (line 21) | class Sampler(pos_neg.Sampler, WithData):
method __init__ (line 23) | def __init__(self, **kwargs):
method pretrain_begin_iteration (line 35) | def pretrain_begin_iteration(self):
method _triag_cache (line 74) | def _triag_cache(self, k, knode, onode):
method _make_triag_cache (line 85) | def _make_triag_cache(self, t):
method make_pretrain_input (line 109) | def make_pretrain_input(self, batch):
method __sample_uncached_monitor (line 115) | def __sample_uncached_monitor(reportq):
method __uncached_sampler_factory (line 142) | def __uncached_sampler_factory(self):
method __sample_one_uncached (line 178) | def __sample_one_uncached(data, nodenames, name2idx, mygraphs, localst...
method __debug_and_count_triangles (line 246) | def __debug_and_count_triangles(self, nodenames):
FILE: core/algorithm/samplers/sampler.py
class Sampler (line 4) | class Sampler(object):
method pretrain_begin (line 5) | def pretrain_begin(self, begin, end):
method pretrain_end (line 8) | def pretrain_end(self):
method pretrain_begin_iteration (line 11) | def pretrain_begin_iteration(self):
method pretrain_end_iteration (line 14) | def pretrain_end_iteration(self):
method online_begin (line 17) | def online_begin(self, begin, end):
method online_end (line 20) | def online_end(self):
method online_begin_iteration (line 23) | def online_begin_iteration(self):
method online_end_iteration (line 26) | def online_end_iteration(self):
method make_pretrain_input (line 29) | def make_pretrain_input(self, batch):
method make_online_input (line 32) | def make_online_input(self, batch):
method shuffle_sample (line 35) | def shuffle_sample(self):
method batches (line 38) | def batches(self, batchsize):
method sample_size (line 41) | def sample_size(self):
FILE: core/dataset/adjlist.py
class Dataset (line 8) | class Dataset(DatasetBase):
method inittime (line 10) | def inittime(self):
method __init__ (line 13) | def __init__(self, datafn, localtime, nsteps, stepsize, stepstride, of...
method name (line 22) | def name(self):
method _time2unit (line 26) | def _time2unit(self, tm):
method _unit2time (line 29) | def _unit2time(self, unit):
method __check_vertices (line 32) | def __check_vertices(self, vs):
method _load_unit_graph (line 38) | def _load_unit_graph(self, tm):
method _merge_unit_graphs (line 52) | def _merge_unit_graphs(self, graphs, curstep):
method archive (line 66) | def archive(self, name=None):
method load_archive (line 70) | def load_archive(self, ar, copy=False, name=None):
FILE: core/dataset/citation.py
class Dataset (line 13) | class Dataset(DatasetBase):
method inittime (line 15) | def inittime(self):
method __init__ (line 18) | def __init__(self, datafn, localyear=None, nsteps=None, stepsize=None,...
method name (line 37) | def name(self):
method _time2unit (line 41) | def _time2unit(self, tm):
method _unit2time (line 44) | def _unit2time(self, unit):
method _load_unit_graph (line 48) | def _load_unit_graph(self, tm):
method _merge_unit_graphs (line 54) | def _merge_unit_graphs(self, graphs, curstep):
method _full_archive (line 65) | def _full_archive(self, name=None):
method archive (line 69) | def archive(self, name=None):
method load_archive (line 79) | def load_archive(self, ar, copy=False, name=None):
method manual_features (line 91) | def manual_features(self):
method data (line 95) | def data(self):
method __label_vertices (line 99) | def __label_vertices(feats, featnames, confdata):
method __vertex_raw_labels (line 121) | def __vertex_raw_labels(self, return_name=False):
method vertex_labels (line 205) | def vertex_labels(self, target=4, return_name=False):
FILE: core/dataset/dataset_utils.py
class Timeline (line 13) | class Timeline(utils.Archivable):
method __init__ (line 14) | def __init__(self, inittime, stepsize, stepstride):
method time2step (line 19) | def time2step(self, tm):
method step2time (line 26) | def step2time(self, step):
method _step2unit (line 30) | def _step2unit(self, step):
method _unit2step (line 33) | def _unit2step(self, unit):
method _time2unit (line 36) | def _time2unit(self, step):
method _unit2time (line 39) | def _unit2time(self, unit):
class Archive (line 46) | class Archive(utils.Archivable):
method _archive_args (line 47) | def _archive_args(self):
method archive (line 50) | def archive(self, name=None):
method from_archive (line 61) | def from_archive(cls, ar, copy=False, name=None):
class Cache (line 73) | class Cache(utils.Archivable):
method _cache_args (line 74) | def _cache_args(self):
method is_compatible (line 77) | def is_compatible(self, args):
method _full_archive (line 81) | def _full_archive(self, name=None):
method cache (line 84) | def cache(self):
method load_cache (line 87) | def load_cache(self, args, datagen):
class DynamicGraph (line 94) | class DynamicGraph(Timeline, utils.Archivable):
method __init__ (line 95) | def __init__(self, inittime, localtime, nsteps, stepsize, stepstride, ...
method mygraphs (line 112) | def mygraphs(self):
method gtgraphs (line 116) | def gtgraphs(self):
method nsize (line 120) | def nsize(self):
method _load_graph (line 124) | def _load_graph(self, step):
method _load_unit_graph (line 131) | def _load_unit_graph(self, tm):
method _merge_unit_graphs (line 134) | def _merge_unit_graphs(self, graphs, curstep):
method archive (line 138) | def archive(self, name=None):
method load_archive (line 150) | def load_archive(self, ar, copy=False, name=None):
class TestSampler (line 161) | class TestSampler(object):
method vertex_labels (line 163) | def vertex_labels(self):
method vertex_raw_labels (line 166) | def vertex_raw_labels(self):
method vertex_static_labels (line 169) | def vertex_static_labels(self):
method __stratified_sample_size (line 174) | def __stratified_sample_size(size, possize, negsize):
method __task_handler (line 185) | def __task_handler(self):
method _sample_order_links (line 197) | def _sample_order_links(self, begin, end, size=None, intv=0, name=""):
method _sample_link_reconstruction (line 223) | def _sample_link_reconstruction(self, begin, end, size=None, negdup=1,...
method _sample_link_prediction (line 269) | def _sample_link_prediction(self, begin, end, size=None, name=""):
method _sample_node_classify (line 273) | def _sample_node_classify(self, begin, end, size=None, intv=0, name=""):
method _sample_node_predict (line 293) | def _sample_node_predict(self, begin, end, size=None, name=""):
method _sample_changed_link_classify (line 296) | def _sample_changed_link_classify(self, begin, end, size=None, intv=0,...
method _sample_changed_link_prediction (line 342) | def _sample_changed_link_prediction(self, begin, end, size, name=""):
method __none (line 345) | def __none(self, begin, end, size=None, name=""):
method __unknown (line 348) | def __unknown(self, begin, end, size=None, name=""):
method sample_test_data (line 351) | def sample_test_data(self, task, begin, end, size=None):
class DatasetBase (line 355) | class DatasetBase(DynamicGraph, Archive, Cache, TestSampler):
method inittime (line 359) | def inittime(self):
method _archive_args (line 362) | def _archive_args(self):
method _cache_args (line 365) | def _cache_args(self):
method __init__ (line 375) | def __init__(self, datafn, localtime, nsteps, stepsize=5, stepstride=1...
FILE: core/gconv.py
function graphtool2mygraph (line 9) | def graphtool2mygraph(g, **_):
function mygraph2graphtool (line 52) | def mygraph2graphtool(g, convert_to=None, **_):
FILE: core/graph/_test_graph.cpp
function test_nodemap (line 9) | void test_nodemap()
function test_nodeset (line 61) | void test_nodeset()
type A (line 97) | struct A { }
function test_graph (line 99) | void test_graph()
function test_functions (line 167) | void test_functions()
function main (line 218) | int main()
FILE: core/graph/defs.h
type DUPMODE (line 4) | enum DUPMODE
FILE: core/graph/exception.h
function string (line 11) | string _to_string(T key)
function class (line 44) | class IOException : public std::runtime_error
function class (line 55) | class InvalidFormatException : public std::runtime_error
function class (line 66) | class NotImplementedException : public std::logic_error
function class (line 75) | class NoSuchFieldException : public std::logic_error
FILE: core/graph/graph.h
type weight_t (line 36) | typedef weight_t weight_type;
type EdgeMapImpl (line 37) | typedef EdgeMapImpl<node_t, weight_t> super;
type typename (line 38) | typedef typename super::value_type rec_type;
function node_type (line 54) | static node_type rec2key(const rec_type& rec) { return rec.first; }
function rec_type (line 55) | static rec_type invrec(const node_type& new_key, const rec_type& rec)
function exists (line 67) | bool exists(const node_type& key) const
function class (line 88) | class iterator
function iterator (line 159) | iterator begin()
function iterator (line 169) | iterator end()
type EdgeMap (line 188) | typedef EdgeMap<node_t, weight_t, EdgeMapImpl> edgelist_type;
type NodeMap (line 189) | typedef NodeMap<node_t, edgelist_type> super;
type node_t (line 190) | typedef node_t node_type;
type weight_t (line 191) | typedef weight_t weight_type;
function edgelist_type (line 212) | const edgelist_type& get_value(const node_type& key) const
function out_degree (line 222) | size_t out_degree(const node_type& key) const
function exists (line 238) | bool exists(const node_type& from, const node_type& to) const
function exists (line 243) | bool exists(const node_type& key) const { return super::exists(key); }
function Graph (line 262) | Graph inverseEdge() const
FILE: core/graph/graph_pywrapper.cpp
type GraphPickleSuite (line 15) | struct GraphPickleSuite : py::pickle_suite
method getinitargs (line 17) | static py::tuple getinitargs(const T& t)
method getstate (line 22) | static py::tuple getstate(const T& t)
method setstate (line 28) | static void setstate(T& t, py::tuple state)
function T (line 79) | T* translate(uintptr_t ptr)
function transfer_object (line 85) | py::object transfer_object(T *g)
function _throw_unsupported (line 91) | void _throw_unsupported(const string& ntype, const string& wtype)
function makeGraph (line 101) | py::object makeGraph(const string& ntype, const string& wtype)
function BOOST_PYTHON_MODULE (line 145) | BOOST_PYTHON_MODULE(mygraph)
FILE: core/graph/graph_pywrapper.h
function g (line 35) | GraphPyWrapper(const GraphPyWrapper& g)
function g (line 45) | GraphPyWrapper(const GraphPyWrapper &&g)
function g (line 55) | g(g)
function GraphPyWrapper (line 71) | static GraphPyWrapper* makeGraph()
function load_graph (line 81) | void load_graph(const char *fn)
function parse_str (line 93) | void parse_str(const std::string& str)
function add_vertex (line 105) | void add_vertex(const typename CGraph::key_type& node)
function add_edge (line 110) | void add_edge(const typename CGraph::key_type& from, const typename CGra...
function inc_edge (line 115) | void inc_edge(const typename CGraph::key_type& from, const typename CGra...
function GraphPyWrapper (line 130) | GraphPyWrapper *inverseEdge()
function WeightType (line 159) | WeightType edge(typename CGraph::key_type from, typename CGraph::key_typ...
function exists (line 164) | bool exists(typename CGraph::key_type key) const
function exists (line 169) | bool exists(typename CGraph::key_type from, typename CGraph::key_type to...
FILE: core/graph/ioutils.h
function class (line 13) | class BaseArchive {}
function class (line 15) | class IArchive : public BaseArchive
function class (line 21) | class OArchive : public BaseArchive
function class (line 27) | class TxtArchive : public BaseArchive
function class (line 33) | class BinArchive : public BaseArchive
type SetDelim (line 127) | struct SetDelim
function get_delim (line 174) | char get_delim()
FILE: core/graph/nodemap.h
type mapped_t (line 31) | typedef mapped_t mapped_type;
type std (line 32) | typedef std::map<key_type, mapped_type> data_type;
type typename (line 33) | typedef typename data_type::value_type value_type;
type typename (line 34) | typedef typename data_type::iterator iterator;
type typename (line 35) | typedef typename data_type::const_iterator const_iterator;
type typename (line 36) | typedef typename data_type::size_type size_type;
function data (line 39) | NodeMap(const data_type& data) : data(data) {}
function data (line 40) | data(data) {}
function load (line 76) | bool load(const char *fn)
function load (line 87) | bool load(std::istream& in)
function fromstring (line 107) | bool fromstring(std::istream& in, DUPMODE dupmode = DUP_WARN)
function tostring (line 138) | bool tostring(std::ostream& out) const
function clear (line 197) | void clear() { data.clear(); }
function NodeMap (line 198) | NodeMap copy() const { return std::move(NodeMap(data)); }
function iterator (line 211) | iterator find(const key_type& key) { return data.find(key); }
function const_iterator (line 212) | const_iterator find(const key_type& key) const { return data.find(key); }
function exists (line 213) | bool exists(const key_type& key) const { return data.find(key) != data.e...
function iterator (line 214) | iterator begin() { return data.begin(); }
function iterator (line 216) | iterator end() { return data.end(); }
FILE: core/graph/nodeset.h
type mapped_type (line 26) | typedef nullptr_t mapped_type;
type std (line 27) | typedef std::set<key_type> data_type;
type typename (line 28) | typedef typename data_type::iterator iterator;
type typename (line 29) | typedef typename data_type::const_iterator const_iterator;
type typename (line 30) | typedef typename data_type::size_type size_type;
type FORMAT (line 32) | enum FORMAT
function data (line 47) | NodeMap(const data_type& data) : data(data) {}
function data (line 48) | data(data) {}
function load (line 56) | bool load(const char *fn, FORMAT fmt = FMT_LIST, DUPMODE dupmode = DUP_W...
function clear (line 123) | void clear() { data.clear(); }
function NodeMap (line 124) | NodeMap copy() const { return std::move(NodeMap(data)); }
function iterator (line 138) | iterator find(const key_type& key) { return data.find(key); }
function const_iterator (line 139) | const_iterator find(const key_type& key) const { return data.find(key); }
function exists (line 140) | bool exists(const key_type& key) const { return data.find(key) != data.e...
function iterator (line 141) | iterator begin() { return data.begin(); }
function iterator (line 143) | iterator end() { return data.end(); }
function get (line 163) | const bool get(const key_type& key) const
FILE: core/graph/types.h
function string (line 11) | struct type2name<std::string>
function int (line 17) | struct type2name<int>
function float (line 23) | struct type2name<float>
function int64_t (line 29) | struct type2name<int64_t>
FILE: core/graph/utils.h
type DftPrimitive (line 54) | struct DftPrimitive {}
type DftNullinst (line 55) | struct DftNullinst {}
function T (line 65) | static const T& get() { return dft; }
function T (line 81) | static const T& get() { return dft; }
type CopyConstructable (line 93) | struct CopyConstructable {}
type CopyAssignable (line 94) | struct CopyAssignable {}
function T (line 103) | static T copy(const T& from)
function T (line 112) | static T copy(const T& from)
function T (line 122) | static T copy(const T& from)
type FileList (line 130) | struct FileList
FILE: core/graphtool_utils.py
function type2python (line 13) | def type2python(tp):
function python2type (line 24) | def python2type(tp):
function graph_summary (line 35) | def graph_summary(g):
function load_edge_list (line 40) | def load_edge_list(fn, directed=True, nodename=None, nametype='string', ...
function load_mygraph_core (line 88) | def load_mygraph_core(vertices, vid2elist, directed=True, nametype=None,...
function load_mygraph (line 168) | def load_mygraph(fn, directed=True, nodename=None, nametype='string', co...
function load_graph (line 264) | def load_graph(fn, fmt='mygraph', directed=None, nodename=None, nametype...
function save_graph (line 273) | def save_graph(g, fn, fmt='adjlist', weight=None):
function save_adjlist (line 284) | def save_adjlist(g, fn, weight=None):
function save_edgelist (line 302) | def save_edgelist(g, fn, weight=None):
function save_TNE (line 315) | def save_TNE(g, fn, weight=None):
function merge_graph (line 347) | def merge_graph(graphs, rawnames, weights=None, directed=False, name_typ...
FILE: core/kerasext/keras_backend_patches/tensorflow_patches.py
function gather_nd (line 4) | def gather_nd(param, indices):
function slice (line 8) | def slice(input, begin, size):
function log_softmax (line 12) | def log_softmax(logits, axis=-1):
FILE: core/kerasext/keras_backend_patches/theano_patches.py
function choose (line 4) | def choose(a, choices, out=None, mode='raise'):
function tensordot (line 8) | def tensordot(a, b, axes=2):
function arange (line 12) | def arange(start, stop=None, step=1, dtype=None):
function logsoftmax (line 16) | def logsoftmax(c):
function var (line 20) | def var(inpt, axis=None, keepdims=False):
function eye (line 24) | def eye(n, m=None, k=0, dtype=None):
function gamma (line 28) | def gamma(a):
function pool2d_raw (line 32) | def pool2d_raw(inpt, ds, ignore_border=False, st=None, padding=(0, 0), m...
FILE: core/mygraph_utils.py
function type2python (line 7) | def type2python(tp):
function python2type (line 18) | def python2type(tp):
function format_type (line 30) | def format_type(tp):
function save_graph (line 46) | def save_graph(g, fn, fmt='adjlist'):
function save_adjlist (line 58) | def save_adjlist(g, fn):
function load_adjlist (line 72) | def load_adjlist(fn, node_type='string', weight_type='float'):
FILE: core/utils.py
function func_wrapper (line 39) | def func_wrapper(args):
class ParMap (line 44) | class ParMap(object):
method __init__ (line 50) | def __init__(self, work, monitor=None, njobs=mp.cpu_count(), maxtasksp...
method close (line 58) | def close(self):
method __del__ (line 64) | def __del__(self):
method njobs (line 68) | def njobs(self):
method njobs (line 72) | def njobs(self, n):
method default_chunk (line 76) | def default_chunk(self, dlen):
method run (line 79) | def run(self, data, chunk=None, shuffle=False):
method run_slices (line 94) | def run_slices(self, slices):
FILE: core/utils_py.py
class KeyDefaultDict (line 8) | class KeyDefaultDict(defaultdict):
method __missing__ (line 9) | def __missing__(self, key):
function slice_sample (line 17) | def slice_sample(sample, chunk=None, nslice=None):
function islice_sample (line 35) | def islice_sample(sample, chunk=None, nslice=None):
function apply_order (line 50) | def apply_order(sample, order):
function shuffle_sample (line 54) | def shuffle_sample(sample, return_order=False):
class Archivable (line 67) | class Archivable(object):
method archive (line 68) | def archive(self, name=None, copy=True):
method load_archive (line 71) | def load_archive(self, ar, copy=True, name=None):
class OffsetList (line 75) | class OffsetList(Archivable):
method __init__ (line 76) | def __init__(self, offset, length, datasrc, copy=True, managed=None):
method __len__ (line 98) | def __len__(self):
method __normalize_slice (line 101) | def __normalize_slice(self, slc):
method __normalize_neg_index (line 113) | def __normalize_neg_index(self, idx):
method __setitem__ (line 119) | def __setitem__(self, key, item):
method __getitem__ (line 139) | def __getitem__(self, key):
method __array__ (line 161) | def __array__(self):
method __iter__ (line 166) | def __iter__(self):
method __next__ (line 171) | def __next__(self):
method append (line 174) | def append(self, item):
method extend (line 180) | def extend(self, lst):
method next (line 186) | def next(self):
method _load_item (line 193) | def _load_item(self, step):
method _store_item (line 200) | def _store_item(self, step, itm):
method archive (line 204) | def archive(self, name=None, copy=True):
method load_archive (line 218) | def load_archive(self, ar, copy=False, name=None):
function group_by (line 227) | def group_by(data, key=lambda x: x):
function crandint (line 240) | def crandint(ub):
FILE: scripts/stdtests.py
class StdTests (line 28) | class StdTests(object):
method __init__ (line 29) | def __init__(self, ds, emb, **kwargs):
method __make_classifier (line 38) | def __make_classifier(self):
method __classify (line 48) | def __classify(self, feat, lbs):
method __classify_multifeat (line 79) | def __classify_multifeat(self, feat, lbs, decision_func=None):
class ResultPresenter (line 126) | class ResultPresenter(object):
method __init__ (line 127) | def __init__(self):
method add_result (line 130) | def add_result(self, res):
method show_result (line 136) | def show_result(self):
method node_classify (line 143) | def node_classify(self, ds, emb, scale=None, intv=0, repeat=1):
method all_link_predict (line 166) | def all_link_predict(self, ds, emb, intv=0, repeat=1):
method changed_link_predict (line 178) | def changed_link_predict(self, ds, emb, intv=0, repeat=1):
method run_tests (line 189) | def run_tests(self, tests, repeat=1):
function load_datamod (line 234) | def load_datamod(modname):
function load_or_update_cache (line 239) | def load_or_update_cache(ds, cacheprefix):
function load_embedding (line 257) | def load_embedding(fn, vs):
Condensed preview — 65 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (4,588K chars).
[
{
"path": ".gitignore",
"chars": 1157,
"preview": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packagi"
},
{
"path": "LICENSE",
"chars": 11357,
"preview": " Apache License\n Version 2.0, January 2004\n "
},
{
"path": "README.md",
"chars": 15344,
"preview": "# DynamicTriad\nThis project implements the DynamicTriad algorithm proposed in [1], which is a node embedding algorithm f"
},
{
"path": "__main__.py",
"chars": 69,
"preview": "from __future__ import print_function\n\nfrom core import main\n\nmain()\n"
},
{
"path": "build.sh",
"chars": 1517,
"preview": "pushd () {\n command pushd \"$@\" > /dev/null\n}\n\npopd () {\n command popd \"$@\" > /dev/null\n}\n\nset -e\n\nfiledir=$(dirnam"
},
{
"path": "core/__init__.py",
"chars": 8373,
"preview": "from __future__ import print_function\n\nfrom os import sys, path\nsys.path.append(path.dirname(path.dirname(path.abspath(_"
},
{
"path": "core/algorithm/CMakeLists.txt",
"chars": 5031,
"preview": "cmake_minimum_required(VERSION 2.8)\n\nproject(algorithm)\n#set(CMAKE_PREFIX_PATH ${LD_LIBRARY_PATH})\n\n# use local find_pac"
},
{
"path": "core/algorithm/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "core/algorithm/boost_python_omp.h",
"chars": 1105,
"preview": "#ifndef OMP_UTILS_H\n#define OMP_UTILS_H\n\n#include <Python.h>\n#include <cmath>\n\n#define OMP_INIT_FOR(sz, deg) \\\n do { "
},
{
"path": "core/algorithm/cmake/FindNumPy.cmake",
"chars": 1217,
"preview": "# Find the native numpy includes\n# This module defines\n# PYTHON_NUMPY_INCLUDE_DIR, where to find numpy/arrayobject.h, e"
},
{
"path": "core/algorithm/dynamic_triad.py",
"chars": 18982,
"preview": "from __future__ import print_function\nfrom __future__ import print_function\n\nimport keras.backend as K\nfrom core.kerasex"
},
{
"path": "core/algorithm/dynamic_triad_cimpl.cpp",
"chars": 8604,
"preview": "#include <Python.h>\n#include <numpy/ndarraytypes.h>\n#include <numpy/ndarrayobject.h>\n\n#include <boost/python.hpp>\nnamesp"
},
{
"path": "core/algorithm/embutils.py",
"chars": 10957,
"preview": "from __future__ import print_function\n\nimport numpy as np\nfrom collections import defaultdict\nfrom sklearn.linear_model "
},
{
"path": "core/algorithm/samplers/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "core/algorithm/samplers/pos_neg.py",
"chars": 7547,
"preview": "from __future__ import print_function\nfrom __future__ import absolute_import\n\nimport numpy as np\nimport random\nfrom iter"
},
{
"path": "core/algorithm/samplers/pos_neg_tri.py",
"chars": 13976,
"preview": "# triagnle data format:\n# time step of the open triangle\n# open triangle ceter node\n# open triangle node 1\n# ope"
},
{
"path": "core/algorithm/samplers/sampler.py",
"chars": 751,
"preview": "from __future__ import print_function\n\n\nclass Sampler(object):\n def pretrain_begin(self, begin, end):\n pass\n\n "
},
{
"path": "core/cython_src/README.txt",
"chars": 267,
"preview": "TODO: this directory is needed because cython always tries to detect context\npackage, i.e., always compiles and installs"
},
{
"path": "core/cython_src/utils_cy.pyx",
"chars": 8827,
"preview": "# distutils: language=c++\n\nfrom __future__ import print_function\nimport numpy as np\n\nfrom collections import defaultdict"
},
{
"path": "core/dataset/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "core/dataset/adjlist.py",
"chars": 2286,
"preview": "from __future__ import print_function\n\nfrom dataset_utils import DatasetBase\nfrom .. import mygraph\nfrom .. import mygra"
},
{
"path": "core/dataset/citation.py",
"chars": 10394,
"preview": "from __future__ import print_function\n\n# import graphtool_utils as gtutils\nimport numpy as np\nimport re\nfrom six.moves i"
},
{
"path": "core/dataset/dataset_utils.py",
"chars": 15268,
"preview": "from __future__ import print_function\nfrom __future__ import absolute_import\n\nfrom core import utils, gconv\nfrom core im"
},
{
"path": "core/gconfig.py",
"chars": 45,
"preview": "debug = False \nuse_cython = True \nnjobs = 30\n"
},
{
"path": "core/gconv.py",
"chars": 2174,
"preview": "from __future__ import print_function\n\nimport graphtool_utils as gtutils\nimport utils\nimport mygraph\nimport mygraph_util"
},
{
"path": "core/graph/CMakeLists.txt",
"chars": 3679,
"preview": "project(graph)\ncmake_minimum_required(VERSION 3.1.0 FATAL_ERROR)\n\n#set(CMAKE_PREFIX_PATH ${LD_LIBRARY_PATH})\n\nif(NOT CMA"
},
{
"path": "core/graph/CMakeLists.txt.user",
"chars": 26527,
"preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE QtCreatorProject>\n<!-- Written by QtCreator 4.3.0, 2017-07-16T22:01:00."
},
{
"path": "core/graph/_test_graph.cpp",
"chars": 6214,
"preview": "#include <string>\n#include <iostream>\nusing namespace std;\n\n#include \"nodemap.h\"\n#include \"nodeset.h\"\n#include \"graph.h\""
},
{
"path": "core/graph/defs.h",
"chars": 210,
"preview": "#ifndef DEFS_H\n#define DEFS_H\n\nenum DUPMODE\n{\n DUP_IGNORE = 0, // do not insert when duplicated\n DUP_OVERWRITE, "
},
{
"path": "core/graph/exception.cpp",
"chars": 0,
"preview": ""
},
{
"path": "core/graph/exception.h",
"chars": 2382,
"preview": "#ifndef EXCEPTION_H_INCLUDE\n#define EXCEPTION_H_INCLUDE\n\n#include <stdexcept>\n#include <exception>\n#include <cerrno>\n#in"
},
{
"path": "core/graph/graph.cpp",
"chars": 4085,
"preview": "#include \"graph.h\"\n\n//#include <list>\n//#include <map>\n//#include <utility>\n//#include <cassert>\n//#include <sstream>\n//"
},
{
"path": "core/graph/graph.h",
"chars": 10406,
"preview": "/**\n Code manipulating graph structure,\n actually we can implement this basing on boosting graph library...\n*/\n\n#ifnde"
},
{
"path": "core/graph/graph_pywrapper.cpp",
"chars": 7201,
"preview": "#include <Python.h>\n#include <boost/python.hpp>\nnamespace py = boost::python;\n\n#include \"graph_pywrapper.h\"\n\nusing names"
},
{
"path": "core/graph/graph_pywrapper.h",
"chars": 5268,
"preview": "#ifndef GRAPH_PYWRAPPER\n#define GRAPH_PYWRAPPER\n\n#include <Python.h>\n#include <boost/python.hpp>\nnamespace py = boost::p"
},
{
"path": "core/graph/ioutils.cpp",
"chars": 21,
"preview": "#include \"ioutils.h\"\n"
},
{
"path": "core/graph/ioutils.h",
"chars": 7272,
"preview": "#ifndef IOUTILS_H\n#define IOUTILS_H\n\n#include <iostream>\n#include <map>\n#include <type_traits>\n#include <string>\n#includ"
},
{
"path": "core/graph/nodemap.cpp",
"chars": 21,
"preview": "#include \"nodemap.h\"\n"
},
{
"path": "core/graph/nodemap.h",
"chars": 10311,
"preview": "#ifndef NODEMAP_H\n#define NODEMAP_H\n\n#include <map>\n#include <fstream>\n#include <string>\n#include <vector>\n#include <sst"
},
{
"path": "core/graph/nodeset.cpp",
"chars": 21,
"preview": "#include \"nodeset.h\"\n"
},
{
"path": "core/graph/nodeset.h",
"chars": 6494,
"preview": "#ifndef NodeMap_H_INCLUDED\n#define NodeMap_H_INCLUDED\n\n#include <set>\n#include <utility>\n#include <stdexcept>\n#include <"
},
{
"path": "core/graph/types.cpp",
"chars": 232,
"preview": "#include \"types.h\"\n\nconst std::string type2name<std::string>::name = \"string\";\nconst std::string type2name<int>::name = "
},
{
"path": "core/graph/types.h",
"chars": 428,
"preview": "#ifndef TYPES_H\n#define TYPES_H\n\n#include <string>\n\ntemplate <typename T>\nstruct type2name\n{};\n\ntemplate <>\nstruct type2"
},
{
"path": "core/graph/utils.cpp",
"chars": 2323,
"preview": "#include \"utils.h\"\n//#include <list>\n//#include <cstdio>\n//#include <cstdlib>\n//#include <cerrno>\n//#include <cassert>\n/"
},
{
"path": "core/graph/utils.h",
"chars": 4659,
"preview": "#ifndef UTILS_H_INCLUDED\n#define UTILS_H_INCLUDED\n\n#include <list>\n#include <string>\n#include <cstdint>\n#include <cstdli"
},
{
"path": "core/graphtool_utils.py",
"chars": 16909,
"preview": "from __future__ import print_function\n\nimport graph_tool as gt\nimport itertools\nfrom collections import defaultdict\nimpo"
},
{
"path": "core/kerasext/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "core/kerasext/debug/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "core/kerasext/debug/finite_number_check.py",
"chars": 359,
"preview": "import keras.backend as K\n\nif K._BACKEND == 'tensorflow':\n from tensorflow.python import debug as tfdbg\n sess = tf"
},
{
"path": "core/kerasext/keras_backend_patches/__init__.py",
"chars": 542,
"preview": "from __future__ import absolute_import\n\nimport warnings\n\nimport keras.backend\n\nif keras.backend._BACKEND == 'theano':\n "
},
{
"path": "core/kerasext/keras_backend_patches/tensorflow_patches.py",
"chars": 249,
"preview": "import tensorflow as tf\n\n\ndef gather_nd(param, indices):\n return tf.gather_nd(param, indices)\n\n\ndef slice(input, begi"
},
{
"path": "core/kerasext/keras_backend_patches/theano_patches.py",
"chars": 776,
"preview": "import theano\n\n\ndef choose(a, choices, out=None, mode='raise'):\n return theano.tensor.choose(a, choices, out, mode)\n\n"
},
{
"path": "core/mygraph_utils.py",
"chars": 3448,
"preview": "from __future__ import print_function\nimport itertools\nimport math\nimport mygraph\n\n\ndef type2python(tp):\n if tp == 's"
},
{
"path": "core/utils.py",
"chars": 3332,
"preview": "import sys\nimport gconfig\nfrom os import path\n\nfile_dir = path.dirname(path.abspath(__file__))\nbuild_opts = ['build_ext'"
},
{
"path": "core/utils_py.py",
"chars": 7938,
"preview": "from __future__ import print_function\nimport numpy as np\nfrom collections import defaultdict\nfrom copy import deepcopy\ni"
},
{
"path": "data/academic_toy.pickle",
"chars": 3521207,
"preview": "(dp1\nS'confmap'\np2\n(dp3\nI0\nS'INFORMS Journal on Computing'\np4\nsI1\nS'Theoretical Computer Science'\np5\nsI129748\nS\"NSDI'14 "
},
{
"path": "docs/README.md",
"chars": 7085,
"preview": "\n\n### Dynamic Network Embedding\n\nThe goal of so-called \"network embedding\" is to project each vertex in a graph to a vec"
},
{
"path": "docs/_config.yml",
"chars": 125,
"preview": "theme: jekyll-theme-slate\ntitle: DynamicTriad\ndescription: Dynamic Network Embedding by Modeling Triadic Closure Process"
},
{
"path": "docs/_layouts/default.html",
"chars": 2631,
"preview": "<!DOCTYPE html>\n<html lang=\"{{ site.lang | default: \"en-US\" }}\">\n\n <head>\n <meta charset='utf-8'>\n <meta http-equ"
},
{
"path": "requirements.txt",
"chars": 103,
"preview": "Cython>=0.25.2\nKeras>=2.0.4\ndill>=0.2.5\nsix>=1.10.0\nscipy>=0.19.0\nnumpy>=1.11.0\nscikit_learn>=0.19.1\n\n\n"
},
{
"path": "scripts/academic2adjlist.py",
"chars": 1471,
"preview": "from __future__ import print_function\n\nimport sys\nimport os\nfrom six.moves import cPickle\nimport regex as re\n\ntry:\n f"
},
{
"path": "scripts/demo.sh",
"chars": 1298,
"preview": "pushd () {\n command pushd \"$@\" > /dev/null\n}\n\npopd () {\n command popd \"$@\" > /dev/null\n}\n\nset -e\n\nMAINPATH=$(dirna"
},
{
"path": "scripts/demo_raw.sh",
"chars": 1090,
"preview": "pushd () {\n command pushd \"$@\" > /dev/null\n}\n\npopd () {\n command popd \"$@\" > /dev/null\n}\n\nset -e\n\nMAINPATH=$(dirna"
},
{
"path": "scripts/stdtests.py",
"chars": 13487,
"preview": "from __future__ import print_function\n\nimport sys\nimport os\n\ntry:\n import core\nexcept ImportError:\n mainpath = os."
},
{
"path": "scripts/test.py",
"chars": 704,
"preview": "import sys\nimport os\n\nrootpath = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))\nsys.path.append(rootpath)\n\n"
}
]
About this extraction
This page contains the full source code of the luckiezhou/DynamicTriad GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 65 files (3.6 MB), approximately 959.4k tokens, and a symbol index with 389 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.