Repository: matenure/FastGCN
Branch: master
Commit: b8e6e6412d8c
Files: 48
Total size: 171.8 KB

Directory structure:
gitextract_xo97i8qd/

├── README.md
├── __init__.py
├── create_Graph.py
├── create_Graph_forGraphSAGE.py
├── data/
│   ├── ind.citeseer.allx
│   ├── ind.citeseer.ally
│   ├── ind.citeseer.graph
│   ├── ind.citeseer.test.index
│   ├── ind.citeseer.tx
│   ├── ind.citeseer.ty
│   ├── ind.citeseer.x
│   ├── ind.citeseer.y
│   ├── ind.cora.allx
│   ├── ind.cora.ally
│   ├── ind.cora.graph
│   ├── ind.cora.test.index
│   ├── ind.cora.tx
│   ├── ind.cora.ty
│   ├── ind.cora.x
│   ├── ind.cora.y
│   ├── ind.pubmed.allx
│   ├── ind.pubmed.ally
│   ├── ind.pubmed.graph
│   ├── ind.pubmed.test.index
│   ├── ind.pubmed.tx
│   ├── ind.pubmed.ty
│   ├── ind.pubmed.x
│   └── ind.pubmed.y
├── inits.py
├── lanczos.py
├── layers.py
├── metrics.py
├── models.py
├── pubmed-original_inductive_FastGCN.py
├── pubmed-original_transductive_FastGCN.py
├── pubmed_Mix.py
├── pubmed_Mix_sampleA.py
├── pubmed_Mix_uniform.py
├── pubmed_inductive_appr2layers.py
├── train.py
├── train_batch_multiRank_inductive_newscheme.py
├── train_batch_multiRank_inductive_reddit_Mixlayers_sampleA.py
├── train_batch_multiRank_inductive_reddit_Mixlayers_sampleBatch.py
├── train_batch_multiRank_inductive_reddit_Mixlayers_uniform.py
├── train_batch_multiRank_inductive_reddit_appr2layers.py
├── train_batch_multiRank_inductive_reddit_onelayer.py
├── transformRedditGraph2NPZ.py
└── utils.py

================================================
FILE CONTENTS
================================================

================================================
FILE: README.md
================================================
# FastGCN
This is the Tensorflow implementation of our ICLR2018 paper: ["**FastGCN: Fast Learning with Graph Convolutional Networks via Importance Sampling**".](https://openreview.net/forum?id=rytstxWAW&noteId=ByU9EpGSf)


Instructions of the sample codes:

[For Reddit dataset]

	train_batch_multiRank_inductive_reddit_Mixlayers_sampleA.py is the final model. (precomputated the AH in the bottom layer) The original Reddit data should be transferred into the .npz format using this function: transferRedditDataFormat.
	Note: By default, this code does no sampling. To enable sampling, change `main(None)` at the bottom to `main(100)`. (The number is the sample size. You can also try other sample sizes)

	train_batch_multiRank_inductive_reddit_Mixlayers_uniform.py is the model for uniform sampling.

	train_batch_multiRank_inductive_reddit_Mixlayers_appr2layers.py is the model for 2-layer approximation.

	create_Graph_forGraphSAGE.py is used to transfer the data into the GraphSAGE format, so that users can compare our method with GraphSAGE. We also include the transferred original Cora dataset in this repository (./data/cora_graphSAGE).


[For pubmed or cora]

	train.py is the original GCN model.

 	pubmed_Mix_sampleA.py 	The dataset could be defined in the codes, for example: flags.DEFINE_string('dataset', 'pubmed', 'Dataset string.')

	pubmed_Mix_uniform.py and pubmed_inductive_appr2layers.py are similar to the ones for reddit.

	pubmed-original**.py means the codes are used for original Cora or Pubmed datasets. Users could also change their datasets by changing the data load function from load_data() to load_data_original().


================================================
FILE: __init__.py
================================================
from __future__ import print_function
from __future__ import division


================================================
FILE: create_Graph.py
================================================
import numpy as np
import pickle as pkl
import scipy.sparse as sp
import sys
import os
import networkx as nx
from utils import *
import json
from networkx.readwrite import json_graph

 # 'cora', 'citeseer', 'pubmed'

if __name__=="__main__":
    data_name = 'cora'
    adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data(data_name)

    G = nx.from_scipy_sparse_matrix(adj)

    val_index = np.where(val_mask)[0]
    test_index = np.where(test_mask)[0]
    y = y_train+y_val+y_test
    y = np.argmax(y,axis=1)


    for i in range(len(y)):
        if i in val_index:
            G.node[i]['val']=True
            G.node[i]['test']=False
        elif i in test_index:
            G.node[i]['test']=True
            G.node[i]['val']=False
        else:
            G.node[i]['test'] = False
            G.node[i]['val'] = False


    data = json_graph.node_link_data(G)
    with open("cora/cora-G.json","wb") as f:
        json.dump(data,f)
    classMap = {}
    idMap = {}
    for i in range(len(y)):
        classMap[i]=y[i]
        idMap[i] = i
    with open("cora/cora-id_map.json","wb") as f:
        json.dump(idMap,f)
    with open("cora/cora-class_map.json","wb") as f:
        json.dump(classMap,f)
    np.save(open("cora/cora-feats.npy","wb"), features.todense())


================================================
FILE: create_Graph_forGraphSAGE.py
================================================
import numpy as np
import pickle as pkl
import scipy.sparse as sp
import sys
import os
import networkx as nx
from utils import *
import json
from networkx.readwrite import json_graph

 # 'cora', 'citeseer', 'pubmed'

if __name__=="__main__":
    data_name = 'cora'
    adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data_original(data_name)

    G = nx.from_scipy_sparse_matrix(adj)
    train_index = np.where(train_mask)[0]
    val_index = np.where(val_mask)[0]
    test_index = np.where(test_mask)[0]
    y = y_train+y_val+y_test
    y = np.argmax(y,axis=1)

    train_attr, val_attr, test_attr = ({i: bool(m) for i, m in enumerate(mask)} for mask in (train_mask, val_mask, test_mask))

    nx.set_node_attributes(G, train_attr, 'train')
    nx.set_node_attributes(G, val_attr, 'val')
    nx.set_node_attributes(G, test_attr, 'test')
    
    data = json_graph.node_link_data(G)
    with open("%s/%s0-G.json" % (data_name, data_name), "wb") as f:
        json.dump(data,f)
    classMap = {}
    idMap = {}
    for i in range(len(y)):
        classMap[i]=y[i]
        idMap[i] = i

    with open("%s/%s0-id_map.json" % (data_name, data_name), "wb") as f:
        json.dump(idMap,f)
    with open("%s/%s0-class_map.json" % (data_name, data_name), "wb") as f:
        json.dump(classMap,f)

    np.save("%s/%s0-feats.npy" % (data_name, data_name), features.todense())


================================================
FILE: data/ind.citeseer.test.index
================================================
2488
2644
3261
2804
3176
2432
3310
2410
2812
2520
2994
3282
2680
2848
2670
3005
2977
2592
2967
2461
3184
2852
2768
2905
2851
3129
3164
2438
2793
2763
2528
2954
2347
2640
3265
2874
2446
2856
3149
2374
3097
3301
2664
2418
2655
2464
2596
3262
3278
2320
2612
2614
2550
2626
2772
3007
2733
2516
2476
2798
2561
2839
2685
2391
2705
3098
2754
3251
2767
2630
2727
2513
2701
3264
2792
2821
3260
2462
3307
2639
2900
3060
2672
3116
2731
3316
2386
2425
2518
3151
2586
2797
2479
3117
2580
3182
2459
2508
3052
3230
3215
2803
2969
2562
2398
3325
2343
3030
2414
2776
2383
3173
2850
2499
3312
2648
2784
2898
3056
2484
3179
3132
2577
2563
2867
3317
2355
3207
3178
2968
3319
2358
2764
3001
2683
3271
2321
2567
2502
3246
2715
3066
2390
2381
3162
2741
2498
2790
3038
3321
2481
3050
3161
3122
2801
2957
3177
2965
2621
3208
2921
2802
2357
2677
2519
2860
2696
2368
3241
2858
2419
2762
2875
3222
3064
2827
3044
2471
3062
2982
2736
2322
2709
2766
2424
2602
2970
2675
3299
2554
2964
2597
2753
2979
2523
2912
2896
2317
3167
2813
2482
2557
3043
3244
2985
2460
2363
3272
3045
3192
2453
2656
2834
2443
3202
2926
2711
2633
2384
2752
3285
2817
2483
2919
2924
2661
2698
2361
2662
2819
3143
2316
3196
2739
2345
2578
2822
3229
2908
2917
2692
3200
2324
2522
3322
2697
3163
3093
3233
2774
2371
2835
2652
2539
2843
3231
2976
2429
2367
3144
2564
3283
3217
3035
2962
2433
2415
2387
3021
2595
2517
2468
3061
2673
2348
3027
2467
3318
2959
3273
2392
2779
2678
3004
2634
2974
3198
2342
2376
3249
2868
2952
2710
2838
2335
2524
2650
3186
2743
2545
2841
2515
2505
3181
2945
2738
2933
3303
2611
3090
2328
3010
3016
2504
2936
3266
3253
2840
3034
2581
2344
2452
2654
3199
3137
2514
2394
2544
2641
2613
2618
2558
2593
2532
2512
2975
3267
2566
2951
3300
2869
2629
2747
3055
2831
3105
3168
3100
2431
2828
2684
3269
2910
2865
2693
2884
3228
2783
3247
2770
3157
2421
2382
2331
3203
3240
2351
3114
2986
2688
2439
2996
3079
3103
3296
2349
2372
3096
2422
2551
3069
2737
3084
3304
3022
2542
3204
2949
2318
2450
3140
2734
2881
2576
3054
3089
3125
2761
3136
3111
2427
2466
3101
3104
3259
2534
2961
3191
3000
3036
2356
2800
3155
3224
2646
2735
3020
2866
2426
2448
3226
3219
2749
3183
2906
2360
2440
2946
2313
2859
2340
3008
2719
3058
2653
3023
2888
3243
2913
3242
3067
2409
3227
2380
2353
2686
2971
2847
2947
2857
3263
3218
2861
3323
2635
2966
2604
2456
2832
2694
3245
3119
2942
3153
2894
2555
3128
2703
2323
2631
2732
2699
2314
2590
3127
2891
2873
2814
2326
3026
3288
3095
2706
2457
2377
2620
2526
2674
3190
2923
3032
2334
3254
2991
3277
2973
2599
2658
2636
2826
3148
2958
3258
2990
3180
2538
2748
2625
2565
3011
3057
2354
3158
2622
3308
2983
2560
3169
3059
2480
3194
3291
3216
2643
3172
2352
2724
2485
2411
2948
2445
2362
2668
3275
3107
2496
2529
2700
2541
3028
2879
2660
3324
2755
2436
3048
2623
2920
3040
2568
3221
3003
3295
2473
3232
3213
2823
2897
2573
2645
3018
3326
2795
2915
3109
3086
2463
3118
2671
2909
2393
2325
3029
2972
3110
2870
3284
2816
2647
2667
2955
2333
2960
2864
2893
2458
2441
2359
2327
3256
3099
3073
3138
2511
2666
2548
2364
2451
2911
3237
3206
3080
3279
2934
2981
2878
3130
2830
3091
2659
2449
3152
2413
2722
2796
3220
2751
2935
3238
2491
2730
2842
3223
2492
3074
3094
2833
2521
2883
3315
2845
2907
3083
2572
3092
2903
2918
3039
3286
2587
3068
2338
3166
3134
2455
2497
2992
2775
2681
2430
2932
2931
2434
3154
3046
2598
2366
3015
3147
2944
2582
3274
2987
2642
2547
2420
2930
2750
2417
2808
3141
2997
2995
2584
2312
3033
3070
3065
2509
3314
2396
2543
2423
3170
2389
3289
2728
2540
2437
2486
2895
3017
2853
2406
2346
2877
2472
3210
2637
2927
2789
2330
3088
3102
2616
3081
2902
3205
3320
3165
2984
3185
2707
3255
2583
2773
2742
3024
2402
2718
2882
2575
3281
2786
2855
3014
2401
2535
2687
2495
3113
2609
2559
2665
2530
3293
2399
2605
2690
3133
2799
2533
2695
2713
2886
2691
2549
3077
3002
3049
3051
3087
2444
3085
3135
2702
3211
3108
2501
2769
3290
2465
3025
3019
2385
2940
2657
2610
2525
2941
3078
2341
2916
2956
2375
2880
3009
2780
2370
2925
2332
3146
2315
2809
3145
3106
2782
2760
2493
2765
2556
2890
2400
2339
3201
2818
3248
3280
2570
2569
2937
3174
2836
2708
2820
3195
2617
3197
2319
2744
2615
2825
2603
2914
2531
3193
2624
2365
2810
3239
3159
2537
2844
2758
2938
3037
2503
3297
2885
2608
2494
2712
2408
2901
2704
2536
2373
2478
2723
3076
2627
2369
2669
3006
2628
2788
3276
2435
3139
3235
2527
2571
2815
2442
2892
2978
2746
3150
2574
2725
3188
2601
2378
3075
2632
2794
3270
3071
2506
3126
3236
3257
2824
2989
2950
2428
2405
3156
2447
2787
2805
2720
2403
2811
2329
2474
2785
2350
2507
2416
3112
2475
2876
2585
2487
3072
3082
2943
2757
2388
2600
3294
2756
3142
3041
2594
2998
3047
2379
2980
2454
2862
3175
2588
3031
3012
2889
2500
2791
2854
2619
2395
2807
2740
2412
3131
3013
2939
2651
2490
2988
2863
3225
2745
2714
3160
3124
2849
2676
2872
3287
3189
2716
3115
2928
2871
2591
2717
2546
2777
3298
2397
3187
2726
2336
3268
2477
2904
2846
3121
2899
2510
2806
2963
3313
2679
3302
2663
3053
2469
2999
3311
2470
2638
3120
3171
2689
2922
2607
2721
2993
2887
2837
2929
2829
3234
2649
2337
2759
2778
2771
2404
2589
3123
3209
2729
3252
2606
2579
2552


================================================
FILE: data/ind.cora.test.index
================================================
2692
2532
2050
1715
2362
2609
2622
1975
2081
1767
2263
1725
2588
2259
2357
1998
2574
2179
2291
2382
1812
1751
2422
1937
2631
2510
2378
2589
2345
1943
1850
2298
1825
2035
2507
2313
1906
1797
2023
2159
2495
1886
2122
2369
2461
1925
2565
1858
2234
2000
1846
2318
1723
2559
2258
1763
1991
1922
2003
2662
2250
2064
2529
1888
2499
2454
2320
2287
2203
2018
2002
2632
2554
2314
2537
1760
2088
2086
2218
2605
1953
2403
1920
2015
2335
2535
1837
2009
1905
2636
1942
2193
2576
2373
1873
2463
2509
1954
2656
2455
2494
2295
2114
2561
2176
2275
2635
2442
2704
2127
2085
2214
2487
1739
2543
1783
2485
2262
2472
2326
1738
2170
2100
2384
2152
2647
2693
2376
1775
1726
2476
2195
1773
1793
2194
2581
1854
2524
1945
1781
1987
2599
1744
2225
2300
1928
2042
2202
1958
1816
1916
2679
2190
1733
2034
2643
2177
1883
1917
1996
2491
2268
2231
2471
1919
1909
2012
2522
1865
2466
2469
2087
2584
2563
1924
2143
1736
1966
2533
2490
2630
1973
2568
1978
2664
2633
2312
2178
1754
2307
2480
1960
1742
1962
2160
2070
2553
2433
1768
2659
2379
2271
1776
2153
1877
2027
2028
2155
2196
2483
2026
2158
2407
1821
2131
2676
2277
2489
2424
1963
1808
1859
2597
2548
2368
1817
2405
2413
2603
2350
2118
2329
1969
2577
2475
2467
2425
1769
2092
2044
2586
2608
1983
2109
2649
1964
2144
1902
2411
2508
2360
1721
2005
2014
2308
2646
1949
1830
2212
2596
1832
1735
1866
2695
1941
2546
2498
2686
2665
1784
2613
1970
2021
2211
2516
2185
2479
2699
2150
1990
2063
2075
1979
2094
1787
2571
2690
1926
2341
2566
1957
1709
1955
2570
2387
1811
2025
2447
2696
2052
2366
1857
2273
2245
2672
2133
2421
1929
2125
2319
2641
2167
2418
1765
1761
1828
2188
1972
1997
2419
2289
2296
2587
2051
2440
2053
2191
1923
2164
1861
2339
2333
2523
2670
2121
1921
1724
2253
2374
1940
2545
2301
2244
2156
1849
2551
2011
2279
2572
1757
2400
2569
2072
2526
2173
2069
2036
1819
1734
1880
2137
2408
2226
2604
1771
2698
2187
2060
1756
2201
2066
2439
1844
1772
2383
2398
1708
1992
1959
1794
2426
2702
2444
1944
1829
2660
2497
2607
2343
1730
2624
1790
1935
1967
2401
2255
2355
2348
1931
2183
2161
2701
1948
2501
2192
2404
2209
2331
1810
2363
2334
1887
2393
2557
1719
1732
1986
2037
2056
1867
2126
1932
2117
1807
1801
1743
2041
1843
2388
2221
1833
2677
1778
2661
2306
2394
2106
2430
2371
2606
2353
2269
2317
2645
2372
2550
2043
1968
2165
2310
1985
2446
1982
2377
2207
1818
1913
1766
1722
1894
2020
1881
2621
2409
2261
2458
2096
1712
2594
2293
2048
2359
1839
2392
2254
1911
2101
2367
1889
1753
2555
2246
2264
2010
2336
2651
2017
2140
1842
2019
1890
2525
2134
2492
2652
2040
2145
2575
2166
1999
2434
1711
2276
2450
2389
2669
2595
1814
2039
2502
1896
2168
2344
2637
2031
1977
2380
1936
2047
2460
2102
1745
2650
2046
2514
1980
2352
2113
1713
2058
2558
1718
1864
1876
2338
1879
1891
2186
2451
2181
2638
2644
2103
2591
2266
2468
1869
2582
2674
2361
2462
1748
2215
2615
2236
2248
2493
2342
2449
2274
1824
1852
1870
2441
2356
1835
2694
2602
2685
1893
2544
2536
1994
1853
1838
1786
1930
2539
1892
2265
2618
2486
2583
2061
1796
1806
2084
1933
2095
2136
2078
1884
2438
2286
2138
1750
2184
1799
2278
2410
2642
2435
1956
2399
1774
2129
1898
1823
1938
2299
1862
2420
2673
1984
2204
1717
2074
2213
2436
2297
2592
2667
2703
2511
1779
1782
2625
2365
2315
2381
1788
1714
2302
1927
2325
2506
2169
2328
2629
2128
2655
2282
2073
2395
2247
2521
2260
1868
1988
2324
2705
2541
1731
2681
2707
2465
1785
2149
2045
2505
2611
2217
2180
1904
2453
2484
1871
2309
2349
2482
2004
1965
2406
2162
1805
2654
2007
1947
1981
2112
2141
1720
1758
2080
2330
2030
2432
2089
2547
1820
1815
2675
1840
2658
2370
2251
1908
2029
2068
2513
2549
2267
2580
2327
2351
2111
2022
2321
2614
2252
2104
1822
2552
2243
1798
2396
2663
2564
2148
2562
2684
2001
2151
2706
2240
2474
2303
2634
2680
2055
2090
2503
2347
2402
2238
1950
2054
2016
1872
2233
1710
2032
2540
2628
1795
2616
1903
2531
2567
1946
1897
2222
2227
2627
1856
2464
2241
2481
2130
2311
2083
2223
2284
2235
2097
1752
2515
2527
2385
2189
2283
2182
2079
2375
2174
2437
1993
2517
2443
2224
2648
2171
2290
2542
2038
1855
1831
1759
1848
2445
1827
2429
2205
2598
2657
1728
2065
1918
2427
2573
2620
2292
1777
2008
1875
2288
2256
2033
2470
2585
2610
2082
2230
1915
1847
2337
2512
2386
2006
2653
2346
1951
2110
2639
2520
1939
2683
2139
2220
1910
2237
1900
1836
2197
1716
1860
2077
2519
2538
2323
1914
1971
1845
2132
1802
1907
2640
2496
2281
2198
2416
2285
1755
2431
2071
2249
2123
1727
2459
2304
2199
1791
1809
1780
2210
2417
1874
1878
2116
1961
1863
2579
2477
2228
2332
2578
2457
2024
1934
2316
1841
1764
1737
2322
2239
2294
1729
2488
1974
2473
2098
2612
1834
2340
2423
2175
2280
2617
2208
2560
1741
2600
2059
1747
2242
2700
2232
2057
2147
2682
1792
1826
2120
1895
2364
2163
1851
2391
2414
2452
1803
1989
2623
2200
2528
2415
1804
2146
2619
2687
1762
2172
2270
2678
2593
2448
1882
2257
2500
1899
2478
2412
2107
1746
2428
2115
1800
1901
2397
2530
1912
2108
2206
2091
1740
2219
1976
2099
2142
2671
2668
2216
2272
2229
2666
2456
2534
2697
2688
2062
2691
2689
2154
2590
2626
2390
1813
2067
1952
2518
2358
1789
2076
2049
2119
2013
2124
2556
2105
2093
1885
2305
2354
2135
2601
1770
1995
2504
1749
2157


================================================
FILE: data/ind.pubmed.test.index
================================================
18747
19392
19181
18843
19221
18962
19560
19097
18966
19014
18756
19313
19000
19569
19359
18854
18970
19073
19661
19180
19377
18750
19401
18788
19224
19447
19017
19241
18890
18908
18965
19001
18849
19641
18852
19222
19172
18762
19156
19162
18856
18763
19318
18826
19712
19192
19695
19030
19523
19249
19079
19232
19455
18743
18800
19071
18885
19593
19394
19390
18832
19445
18838
19632
19548
19546
18825
19498
19266
19117
19595
19252
18730
18913
18809
19452
19520
19274
19555
19388
18919
19099
19637
19403
18720
19526
18905
19451
19408
18923
18794
19322
19431
18912
18841
19239
19125
19258
19565
18898
19482
19029
18778
19096
19684
19552
18765
19361
19171
19367
19623
19402
19327
19118
18888
18726
19510
18831
19490
19576
19050
18729
18896
19246
19012
18862
18873
19193
19693
19474
18953
19115
19182
19269
19116
18837
18872
19007
19212
18798
19102
18772
19660
19511
18914
18886
19672
19360
19213
18810
19420
19512
18719
19432
19350
19127
18782
19587
18924
19488
18781
19340
19190
19383
19094
18835
19487
19230
18791
18882
18937
18928
18755
18802
19516
18795
18786
19273
19349
19398
19626
19130
19351
19489
19446
18959
19025
18792
18878
19304
19629
19061
18785
19194
19179
19210
19417
19583
19415
19443
18739
19662
18904
18910
18901
18960
18722
18827
19290
18842
19389
19344
18961
19098
19147
19334
19358
18829
18984
18931
18742
19320
19111
19196
18887
18991
19469
18990
18876
19261
19270
19522
19088
19284
19646
19493
19225
19615
19449
19043
19674
19391
18918
19155
19110
18815
19131
18834
19715
19603
19688
19133
19053
19166
19066
18893
18757
19582
19282
19257
18869
19467
18954
19371
19151
19462
19598
19653
19187
19624
19564
19534
19581
19478
18985
18746
19342
18777
19696
18824
19138
18728
19643
19199
18731
19168
18948
19216
19697
19347
18808
18725
19134
18847
18828
18996
19106
19485
18917
18911
18776
19203
19158
18895
19165
19382
18780
18836
19373
19659
18947
19375
19299
18761
19366
18754
19248
19416
19658
19638
19034
19281
18844
18922
19491
19272
19341
19068
19332
19559
19293
18804
18933
18935
19405
18936
18945
18943
18818
18797
19570
19464
19428
19093
19433
18986
19161
19255
19157
19046
19292
19434
19298
18724
19410
19694
19214
19640
19189
18963
19218
19585
19041
19550
19123
19620
19376
19561
18944
19706
19056
19283
18741
19319
19144
19542
18821
19404
19080
19303
18793
19306
19678
19435
19519
19566
19278
18946
19536
19020
19057
19198
19333
19649
19699
19399
19654
19136
19465
19321
19577
18907
19665
19386
19596
19247
19473
19568
19355
18925
19586
18982
19616
19495
19612
19023
19438
18817
19692
19295
19414
19676
19472
19107
19062
19035
18883
19409
19052
19606
19091
19651
19475
19413
18796
19369
19639
19701
19461
19645
19251
19063
19679
19545
19081
19363
18995
19549
18790
18855
18833
18899
19395
18717
19647
18768
19103
19245
18819
18779
19656
19076
18745
18971
19197
19711
19074
19128
19466
19139
19309
19324
18814
19092
19627
19060
18806
18929
18737
18942
18906
18858
19456
19253
19716
19104
19667
19574
18903
19237
18864
19556
19364
18952
19008
19323
19700
19170
19267
19345
19238
18909
18892
19109
19704
18902
19275
19680
18723
19242
19112
19169
18956
19343
19650
19541
19698
19521
19087
18976
19038
18775
18968
19671
19412
19407
19573
19027
18813
19357
19460
19673
19481
19036
19614
18787
19195
18732
18884
19613
19657
19575
19226
19589
19234
19617
19707
19484
18740
19424
18784
19419
19159
18865
19105
19315
19480
19664
19378
18803
19605
18870
19042
19426
18848
19223
19509
19532
18752
19691
18718
19209
19362
19090
19492
19567
19687
19018
18830
19530
19554
19119
19442
19558
19527
19427
19291
19543
19422
19142
18897
18950
19425
19002
19588
18978
19551
18930
18736
19101
19215
19150
19263
18949
18974
18759
19335
19200
19129
19328
19437
18988
19429
19368
19406
19049
18811
19296
19256
19385
19602
18770
19337
19580
19476
19045
19132
19089
19120
19265
19483
18767
19227
18934
19069
18820
19006
19459
18927
19037
19280
19441
18823
19015
19114
19618
18957
19176
18853
19648
19201
19444
19279
18751
19302
19505
18733
19601
19533
18863
19708
19387
19346
19152
19206
18851
19338
19681
19380
19055
18766
19085
19591
19547
18958
19146
18840
19051
19021
19207
19235
19086
18979
19300
18939
19100
19619
19287
18980
19277
19326
19108
18920
19625
19374
19078
18734
19634
19339
18877
19423
19652
19683
19044
18983
19330
19529
19714
19468
19075
19540
18839
19022
19286
19537
19175
19463
19167
19705
19562
19244
19486
19611
18801
19178
19590
18846
19450
19205
19381
18941
19670
19185
19504
19633
18997
19113
19397
19636
19709
19289
19264
19353
19584
19126
18938
19669
18964
19276
18774
19173
19231
18973
18769
19064
19040
19668
18738
19082
19655
19236
19352
19609
19628
18951
19384
19122
18875
18992
18753
19379
19254
19301
19506
19135
19010
19682
19400
19579
19316
19553
19208
19635
19644
18891
19024
18989
19250
18850
19317
18915
19607
18799
18881
19479
19031
19365
19164
18744
18760
19502
19058
19517
18735
19448
19243
19453
19285
18857
19439
19016
18975
19503
18998
18981
19186
18994
19240
19631
19070
19174
18900
19065
19220
19229
18880
19308
19372
19496
18771
19325
19538
19033
18874
19077
19211
18764
19458
19571
19121
19019
19059
19497
18969
19666
19297
19219
19622
19184
18977
19702
19539
19329
19095
19675
18972
19514
19703
19188
18866
18812
19314
18822
18845
19494
19411
18916
19686
18967
19294
19143
19204
18805
19689
19233
18758
18748
19011
19685
19336
19608
19454
19124
18868
18807
19544
19621
19228
19154
19141
19145
19153
18860
19163
19393
19268
19160
19305
19259
19471
19524
18783
19396
18894
19430
19690
19348
19597
19592
19677
18889
19331
18773
19137
19009
18932
19599
18816
19054
19067
19477
19191
18921
18940
19578
19183
19004
19072
19710
19005
19610
18955
19457
19148
18859
18993
19642
19047
19418
19535
19600
19312
19039
19028
18879
19003
19026
19013
19149
19177
19217
18987
19354
19525
19202
19084
19032
18749
18867
19048
18999
19260
19630
18727
19356
19083
18926
18789
19370
18861
19311
19557
19531
19436
19140
19310
19501
18721
19604
19713
19262
19563
19507
19440
19572
19513
19515
19518
19421
19470
19499
19663
19508
18871
19528
19500
19307
19288
19594
19271


================================================
FILE: inits.py
================================================
import tensorflow as tf
import numpy as np


def uniform(shape, scale=0.05, name=None):
    """Uniform init."""
    initial = tf.random_uniform(shape, minval=-scale, maxval=scale, dtype=tf.float32)
    return tf.Variable(initial, name=name)


def glorot(shape, name=None):
    """Glorot & Bengio (AISTATS 2010) init."""
    init_range = np.sqrt(6.0/(shape[0]+shape[1]))
    initial = tf.random_uniform(shape, minval=-init_range, maxval=init_range, dtype=tf.float32)
    return tf.Variable(initial, name=name)


def zeros(shape, name=None):
    """All zeros."""
    initial = tf.zeros(shape, dtype=tf.float32)
    return tf.Variable(initial, name=name)


def ones(shape, name=None):
    """All ones."""
    initial = tf.ones(shape, dtype=tf.float32)
    return tf.Variable(initial, name=name)

================================================
FILE: lanczos.py
================================================
import numpy as np
from numpy.linalg import norm
from utils import load_data as dataload
import scipy.sparse as sparse
import pickle
from scipy.linalg import qr, svd

def lanczos(A,k,q):
    n = A.shape[0]
    Q = np.zeros((n,k+1))

    Q[:,0] = q/norm(q)

    alpha = 0
    beta = 0

    for i in range(k):
      if i == 0:
        q = np.dot(A,Q[:,i])
      else:
        q = np.dot(A, Q[:,i]) - beta*Q[:,i-1]
      alpha = np.dot(q.T, Q[:,i])
      q = q - Q[:,i]*alpha
      q = q - np.dot(Q[:,:i], np.dot(Q[:,:i].T, q)) # full reorthogonalization
      beta = norm(q)
      Q[:,i+1] = q/beta
      print(i)

    Q = Q[:,:k]

    Sigma = np.dot(Q.T, np.dot(A, Q))
    # A2 = np.dot(Q[:,:k], np.dot(Sigma[:k,:k], Q[:,:k].T))
    # return A2
    return Q, Sigma

def dense_RandomSVD(A,K):
    G = np.random.randn(A.shape[0],K)
    B = np.dot(A,G)
    Q,R =qr(B,mode='economic')
    M = np.dot(np.dot(Q, np.dot(np.dot(Q.T, A),Q)),Q.T)
    return M


if __name__=="__main__":
    adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = dataload('cora')
    print(adj.shape)
    adj = np.array(sparse.csr_matrix.todense(adj))
    # np.save("ADJ_cora.npy",adj)
    q = np.random.randn(adj.shape[0],)
    Q, sigma = lanczos(adj,100,q)
    r = 100
    A2 = np.dot(Q[:,:r], np.dot(sigma[:r,:r], Q[:,:r].T))

    # u,v,a = svd(adj)

    err = norm(adj-A2)/norm(adj)
    print(err)


# A = np.random.random((10000,10000))
# A = np.triu(A) + np.triu(A).T
# q = np.random.random((10000,))
# K = 100
# Q, sigma = lanczos(A,K,q)
# r = 100
# A2 = np.dot(Q[:,:r], np.dot(sigma[:r,:r], Q[:,:r].T))
# err = norm(A-A2)/norm(A)
# print(err)


================================================
FILE: layers.py
================================================
from inits import *
import tensorflow as tf

flags = tf.app.flags
FLAGS = flags.FLAGS

# global unique layer ID dictionary for layer name assignment
_LAYER_UIDS = {}


def get_layer_uid(layer_name=''):
    """Helper function, assigns unique layer IDs."""
    if layer_name not in _LAYER_UIDS:
        _LAYER_UIDS[layer_name] = 1
        return 1
    else:
        _LAYER_UIDS[layer_name] += 1
        return _LAYER_UIDS[layer_name]


def sparse_dropout(x, keep_prob, noise_shape):
    """Dropout for sparse tensors."""
    random_tensor = keep_prob
    random_tensor += tf.random_uniform(noise_shape)
    dropout_mask = tf.cast(tf.floor(random_tensor), dtype=tf.bool)
    pre_out = tf.sparse_retain(x, dropout_mask)
    return pre_out * (1./keep_prob)


def dot(x, y, sparse=False):
    """Wrapper for tf.matmul (sparse vs dense)."""
    if sparse:
        res = tf.sparse_tensor_dense_matmul(x, y)
    else:
        res = tf.matmul(x, y)
    return res


class Layer(object):
    """Base layer class. Defines basic API for all layer objects.
    Implementation inspired by keras (http://keras.io).

    # Properties
        name: String, defines the variable scope of the layer.
        logging: Boolean, switches Tensorflow histogram logging on/off

    # Methods
        _call(inputs): Defines computation graph of layer
            (i.e. takes input, returns output)
        __call__(inputs): Wrapper for _call()
        _log_vars(): Log all variables
    """

    def __init__(self, **kwargs):
        allowed_kwargs = {'name', 'logging'}
        for kwarg in kwargs.keys():
            assert kwarg in allowed_kwargs, 'Invalid keyword argument: ' + kwarg
        name = kwargs.get('name')
        if not name:
            layer = self.__class__.__name__.lower()
            name = layer + '_' + str(get_layer_uid(layer))
        self.name = name
        self.vars = {}
        logging = kwargs.get('logging', False)
        self.logging = logging
        self.sparse_inputs = False

    def _call(self, inputs):
        return inputs

    def __call__(self, inputs):
        with tf.name_scope(self.name):
            if self.logging and not self.sparse_inputs:
                tf.summary.histogram(self.name + '/inputs', inputs)
            outputs = self._call(inputs)
            if self.logging:
                tf.summary.histogram(self.name + '/outputs', outputs)
            return outputs

    def _log_vars(self):
        for var in self.vars:
            tf.summary.histogram(self.name + '/vars/' + var, self.vars[var])


class Dense(Layer):
    """Dense layer."""
    def __init__(self, input_dim, output_dim, placeholders, dropout=0., sparse_inputs=False,
                 act=tf.nn.relu, bias=False, featureless=False, **kwargs):
        super(Dense, self).__init__(**kwargs)

        if dropout:
            self.dropout = placeholders['dropout']
        else:
            self.dropout = 0.

        self.act = act
        self.sparse_inputs = sparse_inputs
        self.featureless = featureless
        self.bias = bias

        # helper variable for sparse dropout
        self.num_features_nonzero = placeholders['num_features_nonzero']

        with tf.variable_scope(self.name + '_vars'):
            self.vars['weights'] = glorot([input_dim, output_dim],
                                          name='weights')
            if self.bias:
                self.vars['bias'] = zeros([output_dim], name='bias')

        if self.logging:
            self._log_vars()

    def _call(self, inputs):
        x = inputs

        # dropout
        if self.sparse_inputs:
            x = sparse_dropout(x, 1-self.dropout, self.num_features_nonzero)
        else:
            x = tf.nn.dropout(x, 1-self.dropout)

        # transform
        output = dot(x, self.vars['weights'], sparse=self.sparse_inputs)

        # bias
        if self.bias:
            output += self.vars['bias']

        return self.act(output)


class GraphConvolution(Layer):
    """Graph convolution layer."""
    def __init__(self, input_dim, output_dim, placeholders, dropout=0.,
                 support=None, sparse_inputs=False, act=tf.nn.relu, bias=False,
                 featureless=False, **kwargs):
        super(GraphConvolution, self).__init__(**kwargs)

        if dropout:
            self.dropout = placeholders['dropout']
        else:
            self.dropout = 0.

        self.act = act
        if support is None:
            self.support = placeholders['support'][0]
        else:
            self.support = support
        self.sparse_inputs = sparse_inputs
        self.featureless = featureless
        self.bias = bias

        # helper variable for sparse dropout
        self.num_features_nonzero = placeholders['num_features_nonzero']

        with tf.variable_scope(self.name + '_vars'):
            for i in range(1):
                self.vars['weights_' + str(i)] = glorot([input_dim, output_dim],
                                                        name='weights_' + str(i))
            if self.bias:
                self.vars['bias'] = zeros([output_dim], name='bias')

        if self.logging:
            self._log_vars()

    def _call(self, inputs):
        x = inputs

        # dropout
        if self.sparse_inputs:
            x = sparse_dropout(x, 1-self.dropout, self.num_features_nonzero)
        else:
            x = tf.nn.dropout(x, 1-self.dropout)

        # convolve
        # supports = list()
        # for i in range(len(self.support)):
        #     if not self.featureless:
        #         pre_sup = dot(x, self.vars['weights_' + str(i)],
        #                       sparse=self.sparse_inputs)
        #     else:
        #         pre_sup = self.vars['weights_' + str(i)]
        #     support = dot(self.support[i], pre_sup, sparse=True)
        #     supports.append(support)
        # output = tf.add_n(supports)
        if not self.featureless:
            pre_sup = dot(x, self.vars['weights_0'],
                          sparse=self.sparse_inputs)
        else:
            pre_sup = self.vars['weights_0']
        output = dot(self.support, pre_sup, sparse=True)

        # bias
        if self.bias:
            output += self.vars['bias']

        return self.act(output)


class SampledGraphConvolution(Layer):
    """Graph convolution layer."""
    def __init__(self, input_dim, output_dim, placeholders, dropout=0., rank = 100,
                 support=None, sparse_inputs=False, act=tf.nn.relu, bias=False,
                 featureless=False, **kwargs):
        super(SampledGraphConvolution, self).__init__(**kwargs)

        if dropout:
            self.dropout = placeholders['dropout']
        else:
            self.dropout = 0.

        self.act = act
        if support is None:
            self.support = placeholders['support'][0]
        else:
            self.support = support
        self.sparse_inputs = sparse_inputs
        self.featureless = featureless
        self.bias = bias

        # helper variable for sparse dropout
        self.num_features_nonzero = placeholders['num_features_nonzero']
        self.rank = rank

        with tf.variable_scope(self.name + '_vars'):
            for i in range(1):
                self.vars['weights_' + str(i)] = glorot([input_dim, output_dim],
                                                        name='weights_' + str(i))
            if self.bias:
                self.vars['bias'] = zeros([output_dim], name='bias')

        if self.logging:
            self._log_vars()

    def _call(self, inputs):
        x = inputs
        norm_x = tf.nn.l2_normalize(x, axis=1)
        norm_support = tf.nn.l2_normalize(self.support, axis=0)
        norm_mix = tf.cross(norm_x, norm_support)
        norm_mix = norm_mix*tf.inv(tf.reduce_sum(norm_mix))
        sampledIndex = tf.multinomial(tf.log(norm_mix), self.rank)
        new_support = dot(self.support,tf.diag(norm_mix),sparse=True)


        # dropout
        if self.sparse_inputs:
            x = sparse_dropout(x, 1-self.dropout, self.num_features_nonzero)
        else:
            x = tf.nn.dropout(x, 1-self.dropout)

        # convolve
        # supports = list()
        # for i in range(len(self.support)):
        #     if not self.featureless:
        #         pre_sup = dot(x, self.vars['weights_' + str(i)],
        #                       sparse=self.sparse_inputs)
        #     else:
        #         pre_sup = self.vars['weights_' + str(i)]
        #     support = dot(self.support[i], pre_sup, sparse=True)
        #     supports.append(support)
        # output = tf.add_n(supports)
        if not self.featureless:
            pre_sup = dot(x, self.vars['weights_0'],
                          sparse=self.sparse_inputs)
        else:
            pre_sup = self.vars['weights_0']
        output = dot(new_support, pre_sup, sparse=True)

        # bias
        if self.bias:
            output += self.vars['bias']

        return self.act(output)


================================================
FILE: metrics.py
================================================
import tensorflow as tf


def masked_softmax_cross_entropy(preds, labels, mask):
    """Softmax cross-entropy loss with masking."""
    loss = tf.nn.softmax_cross_entropy_with_logits(logits=preds, labels=labels)
    mask = tf.cast(mask, dtype=tf.float32)
    mask /= tf.reduce_mean(mask)
    loss *= mask
    return tf.reduce_mean(loss)


def masked_accuracy(preds, labels, mask):
    """Accuracy with masking."""
    correct_prediction = tf.equal(tf.argmax(preds, 1), tf.argmax(labels, 1))
    accuracy_all = tf.cast(correct_prediction, tf.float32)
    mask = tf.cast(mask, dtype=tf.float32)
    mask /= tf.reduce_mean(mask)
    accuracy_all *= mask
    return tf.reduce_mean(accuracy_all)


def softmax_cross_entropy(preds, labels):
    loss = tf.nn.softmax_cross_entropy_with_logits(logits=preds, labels=labels)
    return tf.reduce_mean(loss)

def accuracy(preds, labels):
    correct_prediction = tf.equal(tf.argmax(preds, 1), tf.argmax(labels, 1))
    accuracy_all = tf.cast(correct_prediction, tf.float32)
    return tf.reduce_mean(accuracy_all)


================================================
FILE: models.py
================================================
from layers import *
from metrics import *

flags = tf.app.flags
FLAGS = flags.FLAGS


class Model(object):
    def __init__(self, **kwargs):
        allowed_kwargs = {'name', 'logging'}
        for kwarg in kwargs.keys():
            assert kwarg in allowed_kwargs, 'Invalid keyword argument: ' + kwarg
        name = kwargs.get('name')
        if not name:
            name = self.__class__.__name__.lower()
        self.name = name

        logging = kwargs.get('logging', False)
        self.logging = logging

        self.vars = {}
        self.placeholders = {}

        self.layers = []
        self.activations = []

        self.inputs = None
        self.outputs = None

        self.loss = 0
        self.accuracy = 0
        self.optimizer = None
        self.opt_op = None

    def _build(self):
        raise NotImplementedError

    def build(self):
        """ Wrapper for _build() """
        with tf.variable_scope(self.name):
            self._build()

        # Build sequential layer model
        self.activations.append(self.inputs)
        for layer in self.layers:
            hidden = layer(self.activations[-1])
            self.activations.append(hidden)
        self.outputs = self.activations[-1]

        # Store model variables for easy access
        variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.name)
        self.vars = {var.name: var for var in variables}

        # Build metrics
        self._loss()
        self._accuracy()

        self.opt_op = self.optimizer.minimize(self.loss)

    def predict(self):
        pass

    def _loss(self):
        raise NotImplementedError

    def _accuracy(self):
        raise NotImplementedError

    def save(self, sess=None):
        if not sess:
            raise AttributeError("TensorFlow session not provided.")
        saver = tf.train.Saver(self.vars)
        save_path = saver.save(sess, "tmp/%s.ckpt" % self.name)
        print("Model saved in file: %s" % save_path)

    def load(self, sess=None):
        if not sess:
            raise AttributeError("TensorFlow session not provided.")
        saver = tf.train.Saver(self.vars)
        save_path = "tmp/%s.ckpt" % self.name
        saver.restore(sess, save_path)
        print("Model restored from file: %s" % save_path)


class MLP(Model):
    def __init__(self, placeholders, input_dim, **kwargs):
        super(MLP, self).__init__(**kwargs)

        self.inputs = placeholders['features']
        self.input_dim = input_dim
        # self.input_dim = self.inputs.get_shape().as_list()[1]  # To be supported in future Tensorflow versions
        self.output_dim = placeholders['labels'].get_shape().as_list()[1]
        self.placeholders = placeholders

        self.optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate)

        self.build()

    def _loss(self):
        # Weight decay loss
        for var in self.layers[0].vars.values():
            self.loss += FLAGS.weight_decay * tf.nn.l2_loss(var)

        # Cross entropy error
        self.loss += masked_softmax_cross_entropy(self.outputs, self.placeholders['labels'],
                                                  self.placeholders['labels_mask'])

    def _accuracy(self):
        self.accuracy = masked_accuracy(self.outputs, self.placeholders['labels'],
                                        self.placeholders['labels_mask'])

    def _build(self):
        self.layers.append(Dense(input_dim=self.input_dim,
                                 output_dim=FLAGS.hidden1,
                                 placeholders=self.placeholders,
                                 act=tf.nn.relu,
                                 dropout=True,
                                 sparse_inputs=True,
                                 logging=self.logging))

        self.layers.append(Dense(input_dim=FLAGS.hidden1,
                                 output_dim=self.output_dim,
                                 placeholders=self.placeholders,
                                 act=lambda x: x,
                                 dropout=True,
                                 logging=self.logging))

    def predict(self):
        return tf.nn.softmax(self.outputs)


class GCN(Model):
    def __init__(self, placeholders, input_dim, **kwargs):
        super(GCN, self).__init__(**kwargs)

        self.inputs = placeholders['features']
        self.input_dim = input_dim
        # self.input_dim = self.inputs.get_shape().as_list()[1]  # To be supported in future Tensorflow versions
        self.output_dim = placeholders['labels'].get_shape().as_list()[1]
        self.placeholders = placeholders

        self.optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate)

        self.build()

    def _loss(self):
        # Weight decay loss
        for var in self.layers[0].vars.values():
            self.loss += FLAGS.weight_decay * tf.nn.l2_loss(var)

        # Cross entropy error
        self.loss += masked_softmax_cross_entropy(self.outputs, self.placeholders['labels'],
                                                  self.placeholders['labels_mask'])

    def _accuracy(self):
        self.accuracy = masked_accuracy(self.outputs, self.placeholders['labels'],
                                        self.placeholders['labels_mask'])

    def _build(self):

        self.layers.append(GraphConvolution(input_dim=self.input_dim,
                                            output_dim=FLAGS.hidden1,
                                            placeholders=self.placeholders,
                                            act=tf.nn.relu,
                                            dropout=True,
                                            sparse_inputs=True,
                                            logging=self.logging))

        self.layers.append(GraphConvolution(input_dim=FLAGS.hidden1,
                                            output_dim=self.output_dim,
                                            placeholders=self.placeholders,
                                            act=lambda x: x,
                                            dropout=True,
                                            logging=self.logging))

    def predict(self):
        return tf.nn.softmax(self.outputs)


class GCN_APPRO(Model):
    def __init__(self, placeholders, input_dim, **kwargs):
        super(GCN_APPRO, self).__init__(**kwargs)
        self.inputs = placeholders['features']
        self.input_dim = input_dim
        # self.input_dim = self.inputs.get_shape().as_list()[1]  # To be supported in future Tensorflow versions
        self.output_dim = placeholders['labels'].get_shape().as_list()[1]
        self.placeholders = placeholders
        self.supports = placeholders['support']

        self.optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate)

        self.build()

    def _loss(self):
        # Weight decay loss
        for var in self.layers[0].vars.values():
            self.loss += FLAGS.weight_decay * tf.nn.l2_loss(var)

        # Cross entropy error
        self.loss += softmax_cross_entropy(self.outputs, self.placeholders['labels'])

    def _accuracy(self):
        self.accuracy = accuracy(self.outputs, self.placeholders['labels'])

    def _build(self):
        # appr_support = self.placeholders['support'][0]
        self.layers.append(GraphConvolution(input_dim=self.input_dim,
                                            output_dim=FLAGS.hidden1,
                                            placeholders=self.placeholders,
                                            support=self.supports[0],
                                            act=tf.nn.relu,
                                            dropout=True,
                                            sparse_inputs=False,
                                            logging=self.logging))

        self.layers.append(GraphConvolution(input_dim=FLAGS.hidden1,
                                            output_dim=self.output_dim,
                                            placeholders=self.placeholders,
                                            support=self.supports[1],
                                            act=lambda x: x,
                                            dropout=True,
                                            logging=self.logging))

    def predict(self):
        return tf.nn.softmax(self.outputs)


class GCN_APPRO_Mix(Model): #mixture of dense and gcn
    def __init__(self, placeholders, input_dim, **kwargs):
        super(GCN_APPRO_Mix, self).__init__(**kwargs)
        self.inputs = placeholders['AXfeatures']# A*X for the bottom layer, not original feature X
        self.input_dim = input_dim
        # self.input_dim = self.inputs.get_shape().as_list()[1]  # To be supported in future Tensorflow versions
        self.output_dim = placeholders['labels'].get_shape().as_list()[1]
        self.placeholders = placeholders
        self.support = placeholders['support']

        self.optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate)

        self.build()

    def _loss(self):
        # Weight decay loss
        for var in self.layers[0].vars.values():
            self.loss += FLAGS.weight_decay * tf.nn.l2_loss(var)

        # Cross entropy error
        self.loss += softmax_cross_entropy(self.outputs, self.placeholders['labels'])


    def _accuracy(self):
        self.accuracy = accuracy(self.outputs, self.placeholders['labels'])

    def _build(self):
        self.layers.append(Dense(input_dim=self.input_dim,
                                 output_dim=FLAGS.hidden1,
                                 placeholders=self.placeholders,
                                 act=tf.nn.relu,
                                 dropout=True,
                                 sparse_inputs=False,
                                 logging=self.logging))

        self.layers.append(GraphConvolution(input_dim=FLAGS.hidden1,
                                            output_dim=self.output_dim,
                                            placeholders=self.placeholders,
                                            support=self.support,
                                            act=lambda x: x,
                                            dropout=True,
                                            logging=self.logging))

    def predict(self):
        return tf.nn.softmax(self.outputs)


class GCN_APPRO_Onelayer(Model):
    def __init__(self, placeholders, input_dim, **kwargs):
        super(GCN_APPRO_Onelayer, self).__init__(**kwargs)
        self.inputs = placeholders['features']
        self.input_dim = input_dim
        # self.input_dim = self.inputs.get_shape().as_list()[1]  # To be supported in future Tensorflow versions
        self.output_dim = placeholders['labels'].get_shape().as_list()[1]
        self.placeholders = placeholders
        self.supports = placeholders['support']

        self.optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate)

        self.build()

    def _loss(self):
        # Weight decay loss
        for var in self.layers[0].vars.values():
            self.loss += FLAGS.weight_decay * tf.nn.l2_loss(var)

        # Cross entropy error
        self.loss += masked_softmax_cross_entropy(self.outputs, self.placeholders['labels'],
                                                  self.placeholders['labels_mask'])

    def _accuracy(self):
        self.accuracy = masked_accuracy(self.outputs, self.placeholders['labels'],
                                        self.placeholders['labels_mask'])

    def _build(self):
        appr_support = self.placeholders['support'][0]
        self.layers.append(GraphConvolution(input_dim=self.input_dim,
                                            output_dim=self.output_dim,
                                            placeholders=self.placeholders,
                                            support=self.supports[0],
                                            act=tf.nn.relu,
                                            dropout=True,
                                            sparse_inputs=True,
                                            logging=self.logging))

    def predict(self):
        return tf.nn.softmax(self.outputs)


================================================
FILE: pubmed-original_inductive_FastGCN.py
================================================
from __future__ import division
from __future__ import print_function

import time
import tensorflow as tf
import scipy.sparse as sp
import os

from utils import *
from models import GCN_APPRO_Mix

# Set random seed
seed = 123
np.random.seed(seed)
tf.set_random_seed(seed)

# Settings
flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_string('dataset', 'pubmed', 'Dataset string.')  # 'cora', 'citeseer', 'pubmed'
flags.DEFINE_string('model', 'gcn_mix', 'Model string.')  # 'gcn_mix', 'gcn_appr'
flags.DEFINE_float('learning_rate', 0.01, 'Initial learning rate.')
flags.DEFINE_integer('epochs', 100, 'Number of epochs to train.')
flags.DEFINE_integer('hidden1', 16, 'Number of units in hidden layer 1.')
flags.DEFINE_float('dropout', 0.0, 'Dropout rate (1 - keep probability).')
flags.DEFINE_float('weight_decay', 5e-4, 'Weight for L2 loss on embedding matrix.')
flags.DEFINE_integer('early_stopping', 10, 'Tolerance for early stopping (# of epochs).')
flags.DEFINE_integer('max_degree', 3, 'Maximum Chebyshev polynomial degree.')


def construct_feeddict_forMixlayers(AXfeatures, support, labels, placeholders):
    feed_dict = dict()
    feed_dict.update({placeholders['labels']: labels})
    feed_dict.update({placeholders['AXfeatures']: AXfeatures})
    feed_dict.update({placeholders['support']: support})
    feed_dict.update({placeholders['num_features_nonzero']: AXfeatures[1].shape})
    return feed_dict

def iterate_minibatches_listinputs(inputs, batchsize, shuffle=False):
    assert inputs is not None
    numSamples = inputs[0].shape[0]
    if shuffle:
        indices = np.arange(numSamples)
        np.random.shuffle(indices)
    for start_idx in range(0, numSamples - batchsize + 1, batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
        yield [input[excerpt] for input in inputs]


def main(rank1):

    adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data_original(FLAGS.dataset)

    train_index = np.where(train_mask)[0]
    adj_train = adj[train_index, :][:, train_index]
    train_mask = train_mask[train_index]
    y_train = y_train[train_index]
    val_index = np.where(val_mask)[0]
    y_val = y_val[val_index]
    test_index = np.where(test_mask)[0]
    y_test = y_test[test_index]

    train_val_index = np.concatenate([train_index, val_index],axis=0)
    train_test_idnex = np.concatenate([train_index, test_index],axis=0)


    numNode_train = adj_train.shape[0]
    # print("numNode", numNode)


    if FLAGS.model == 'gcn_mix':
        normADJ_train = nontuple_preprocess_adj(adj_train)
        # normADJ = nontuple_preprocess_adj(adj)
        normADJ_val = nontuple_preprocess_adj(adj[train_val_index,:][:,train_val_index])
        normADJ_test = nontuple_preprocess_adj(adj[train_test_idnex,:][:,train_test_idnex])

        num_supports = 2
        model_func = GCN_APPRO_Mix
    else:
        raise ValueError('Invalid argument for model: ' + str(FLAGS.model))

    # Some preprocessing
    features = nontuple_preprocess_features(features).todense()

    train_features = normADJ_train.dot(features[train_index])
    val_features = normADJ_val.dot(features[train_val_index])
    test_features = normADJ_test.dot(features[train_test_idnex])

    nonzero_feature_number = len(np.nonzero(features)[0])
    nonzero_feature_number_train = len(np.nonzero(train_features)[0])


    # Define placeholders
    placeholders = {
        'support': tf.sparse_placeholder(tf.float32) ,
        'AXfeatures': tf.placeholder(tf.float32, shape=(None, features.shape[1])),
        'labels': tf.placeholder(tf.float32, shape=(None, y_train.shape[1])),
        'dropout': tf.placeholder_with_default(0., shape=()),
        'num_features_nonzero': tf.placeholder(tf.int32)  # helper variable for sparse dropout
    }

    # Create model
    model = model_func(placeholders, input_dim=features.shape[-1], logging=True)

    # Initialize session
    sess = tf.Session()

    # Define model evaluation function
    def evaluate(features, support, labels, placeholders):
        t_test = time.time()
        feed_dict_val = construct_feeddict_forMixlayers(features, support, labels, placeholders)
        outs_val = sess.run([model.loss, model.accuracy], feed_dict=feed_dict_val)
        return outs_val[0], outs_val[1], (time.time() - t_test)

    # Init variables
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()

    cost_val = []

    p0 = column_prop(normADJ_train)

    # testSupport = [sparse_to_tuple(normADJ), sparse_to_tuple(normADJ)]
    valSupport = sparse_to_tuple(normADJ_val[len(train_index):, :])
    testSupport = sparse_to_tuple(normADJ_test[len(train_index):, :])

    t = time.time()
    maxACC = 0.0
    # Train model
    for epoch in range(FLAGS.epochs):
        t1 = time.time()

        n = 0
        for batch in iterate_minibatches_listinputs([normADJ_train, y_train], batchsize=20, shuffle=True):
            [normADJ_batch, y_train_batch] = batch

            if rank1 is None:
                support1 = sparse_to_tuple(normADJ_batch)
                features_inputs = train_features
            else:
                distr = np.nonzero(np.sum(normADJ_batch, axis=0))[1]
                if rank1 > len(distr):
                    q1 = distr
                else:
                    q1 = np.random.choice(distr, rank1, replace=False, p=p0[distr]/sum(p0[distr]))  # top layer


                support1 = sparse_to_tuple(normADJ_batch[:, q1].dot(sp.diags(1.0 / (p0[q1] * rank1))))
                if len(support1[1])==0:
                    continue
                features_inputs = train_features[q1, :]  # selected nodes for approximation
            # Construct feed dictionary
            feed_dict = construct_feeddict_forMixlayers(features_inputs, support1, y_train_batch,
                                            placeholders)
            feed_dict.update({placeholders['dropout']: FLAGS.dropout})

            # Training step
            outs = sess.run([model.opt_op, model.loss, model.accuracy], feed_dict=feed_dict)
            n = n +1

        # Validation
        cost, acc, duration = evaluate(val_features, valSupport, y_val,  placeholders)
        cost_val.append(cost)

        # if epoch > 50 and acc>maxACC:
        #     maxACC = acc
        #     save_path = saver.save(sess, "tmp/tmp_MixModel.ckpt")

        # Print results
        print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(outs[1]),
              "train_acc=", "{:.5f}".format(outs[2]), "val_loss=", "{:.5f}".format(cost),
              "val_acc=", "{:.5f}".format(acc), "time per batch=", "{:.5f}".format((time.time() - t1)/n))

        if epoch%5==0:
            # Validation
            test_cost, test_acc, test_duration = evaluate(test_features, testSupport, y_test,
                                                          placeholders)
            print("training time by far=", "{:.5f}".format(time.time() - t),
                  "epoch = {}".format(epoch + 1),
                  "cost=", "{:.5f}".format(test_cost),
                  "accuracy=", "{:.5f}".format(test_acc))

        if epoch > FLAGS.early_stopping and np.mean(cost_val[-2:]) > np.mean(cost_val[-(FLAGS.early_stopping + 1):-1]):
            # print("Early stopping...")
            break


    train_duration = time.time() - t
    # Testing
    # if os.path.exists("tmp/pubmed_MixModel.ckpt"):
    #     saver.restore(sess, "tmp/pubmed_MixModel.ckpt")
    test_cost, test_acc, test_duration = evaluate(test_features, testSupport, y_test,
                                                  placeholders)
    print("rank1 = {}".format(rank1), "cost=", "{:.5f}".format(test_cost),
          "accuracy=", "{:.5f}".format(test_acc), "training time=", "{:.5f}".format(train_duration), "training time per epoch=", "{:.5f}".format(train_duration/(epoch+1)),
          "test time=", "{:.5f}".format(test_duration))

if __name__=="__main__":
    print("DATASET:", FLAGS.dataset)
    # main(None)
    main(100)
    # for k in [5, 10, 25, 50]:
    #     main(k)

================================================
FILE: pubmed-original_transductive_FastGCN.py
================================================
from __future__ import division
from __future__ import print_function

import time
import tensorflow as tf
import scipy.sparse as sp
import os

from utils import *
from models import GCN_APPRO_Mix

# Set random seed
seed = 123
np.random.seed(seed)
tf.set_random_seed(seed)

# Settings
flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_string('dataset', 'pubmed', 'Dataset string.')  # 'cora', 'citeseer', 'pubmed'
flags.DEFINE_string('model', 'gcn_mix', 'Model string.')  # 'gcn_mix', 'gcn_appr'
flags.DEFINE_float('learning_rate', 0.01, 'Initial learning rate.')
flags.DEFINE_integer('epochs', 100, 'Number of epochs to train.')
flags.DEFINE_integer('hidden1', 16, 'Number of units in hidden layer 1.')
flags.DEFINE_float('dropout', 0.0, 'Dropout rate (1 - keep probability).')
flags.DEFINE_float('weight_decay', 5e-4, 'Weight for L2 loss on embedding matrix.')
flags.DEFINE_integer('early_stopping', 10, 'Tolerance for early stopping (# of epochs).')
flags.DEFINE_integer('max_degree', 3, 'Maximum Chebyshev polynomial degree.')


def construct_feeddict_forMixlayers(AXfeatures, support, labels, placeholders):
    feed_dict = dict()
    feed_dict.update({placeholders['labels']: labels})
    feed_dict.update({placeholders['AXfeatures']: AXfeatures})
    feed_dict.update({placeholders['support']: support})
    feed_dict.update({placeholders['num_features_nonzero']: AXfeatures[1].shape})
    return feed_dict

def iterate_minibatches_listinputs(inputs, batchsize, shuffle=False):
    assert inputs is not None
    numSamples = inputs[0].shape[0]
    if shuffle:
        indices = np.arange(numSamples)
        np.random.shuffle(indices)
    for start_idx in range(0, numSamples - batchsize + 1, batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
        yield [input[excerpt] for input in inputs]


def main(rank1):

    adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data_original(FLAGS.dataset)

    train_index = np.where(train_mask)[0]
    adj_train = adj[train_index, :][:]
    train_mask = train_mask[train_index]
    y_train = y_train[train_index]
    val_index = np.where(val_mask)[0]
    y_val = y_val[val_index]
    test_index = np.where(test_mask)[0]
    y_test = y_test[test_index]

    train_val_index = np.concatenate([train_index, val_index],axis=0)
    train_test_index = np.concatenate([train_index, test_index],axis=0)


    numNode_train = adj_train.shape[0]
    # print("numNode", numNode)


    if FLAGS.model == 'gcn_mix':

        normADJ = nontuple_preprocess_adj(adj)
        # normADJ_train = nontuple_preprocess_adj(adj_train)
        # normADJ_val = nontuple_preprocess_adj(adj[train_val_index,:][:])
        # normADJ_test = nontuple_preprocess_adj(adj[train_test_idnex,:][:])
        normADJ_train = normADJ[train_index,:][:]
        normADJ_val = normADJ[train_val_index, :][:]
        normADJ_test = normADJ[train_test_index, :][:]


        num_supports = 2
        model_func = GCN_APPRO_Mix
    else:
        raise ValueError('Invalid argument for model: ' + str(FLAGS.model))

    # Some preprocessing
    features = nontuple_preprocess_features(features).todense()

    ax_features = normADJ.dot(features[:])
    # val_features = normADJ_val.dot(features[train_val_index])
    # test_features = normADJ_test.dot(features[train_test_idnex])

    nonzero_feature_number = len(np.nonzero(features)[0])
    nonzero_feature_number_train = len(np.nonzero(ax_features)[0])


    # Define placeholders
    placeholders = {
        'support': tf.sparse_placeholder(tf.float32) ,
        'AXfeatures': tf.placeholder(tf.float32, shape=(None, features.shape[1])),
        'labels': tf.placeholder(tf.float32, shape=(None, y_train.shape[1])),
        'dropout': tf.placeholder_with_default(0., shape=()),
        'labels_mask': tf.placeholder(tf.int32),
        'num_features_nonzero': tf.placeholder(tf.int32)  # helper variable for sparse dropout
    }

    # Create model
    model = model_func(placeholders, input_dim=features.shape[-1], logging=True)

    # Initialize session
    sess = tf.Session()

    # Define model evaluation function
    def evaluate(features, support, labels, placeholders):
        t_test = time.time()
        feed_dict_val = construct_feeddict_forMixlayers(features, support, labels, placeholders)
        outs_val = sess.run([model.loss, model.accuracy], feed_dict=feed_dict_val)
        return outs_val[0], outs_val[1], (time.time() - t_test)

    # Init variables
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()

    cost_val = []

    p0 = column_prop(normADJ)

    # testSupport = [sparse_to_tuple(normADJ), sparse_to_tuple(normADJ)]
    valSupport = sparse_to_tuple(normADJ_val[len(train_index):, :])
    testSupport = sparse_to_tuple(normADJ_test[len(train_index):, :])

    t = time.time()
    maxACC = 0.0
    # Train model
    for epoch in range(FLAGS.epochs):
        t1 = time.time()

        n = 0
        for batch in iterate_minibatches_listinputs([normADJ_train, y_train], batchsize=20, shuffle=True):
            [normADJ_batch, y_train_batch] = batch

            if rank1 is None:
                support1 = sparse_to_tuple(normADJ_batch)
                features_inputs = ax_features
            else:
                distr = np.nonzero(np.sum(normADJ_batch, axis=0))[1]
                if rank1 > len(distr):
                    q1 = distr
                else:
                    q1 = np.random.choice(distr, rank1, replace=False, p=p0[distr]/sum(p0[distr]))  # top layer


                support1 = sparse_to_tuple(normADJ_batch[:, q1].dot(sp.diags(1.0 / (p0[q1] * rank1))))
                if len(support1[1])==0:
                    continue
                features_inputs = ax_features[q1, :]  # selected nodes for approximation
            # Construct feed dictionary
            feed_dict = construct_feeddict_forMixlayers(features_inputs, support1, y_train_batch,
                                            placeholders)
            feed_dict.update({placeholders['dropout']: FLAGS.dropout})

            # Training step
            outs = sess.run([model.opt_op, model.loss, model.accuracy], feed_dict=feed_dict)
            n = n +1

        # Validation
        cost, acc, duration = evaluate(ax_features, valSupport, y_val,  placeholders)
        cost_val.append(cost)

        # if epoch > 50 and acc>maxACC:
        #     maxACC = acc
        #     save_path = saver.save(sess, "tmp/tmp_MixModel.ckpt")

        # Print results
        print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(outs[1]),
              "train_acc=", "{:.5f}".format(outs[2]), "val_loss=", "{:.5f}".format(cost),
              "val_acc=", "{:.5f}".format(acc), "time per batch=", "{:.5f}".format((time.time() - t1)/n))

        # if epoch%5==0:
        #     # Validation
        #     test_cost, test_acc, test_duration = evaluate(ax_features, testSupport, y_test,
        #                                                   placeholders)
        #     print("training time by far=", "{:.5f}".format(time.time() - t),
        #           "epoch = {}".format(epoch + 1),
        #           "cost=", "{:.5f}".format(test_cost),
        #           "accuracy=", "{:.5f}".format(test_acc))

        if epoch > FLAGS.early_stopping and np.mean(cost_val[-2:]) > np.mean(cost_val[-(FLAGS.early_stopping + 1):-1]):
            # print("Early stopping...")
            break


    train_duration = time.time() - t
    # Testing
    # if os.path.exists("tmp/pubmed_MixModel.ckpt"):
    #     saver.restore(sess, "tmp/pubmed_MixModel.ckpt")
    test_cost, test_acc, test_duration = evaluate(ax_features, testSupport, y_test,
                                                  placeholders)
    print("rank1 = {}".format(rank1), "cost=", "{:.5f}".format(test_cost),
          "accuracy=", "{:.5f}".format(test_acc), "training time=", "{:.5f}".format(train_duration), "training time per epoch=", "{:.5f}".format(train_duration/(epoch+1)),
          "test time=", "{:.5f}".format(test_duration))

if __name__=="__main__":
    print("DATASET:", FLAGS.dataset)
    main(400)
    # main(100)
    # for k in [5, 10, 25, 50]:
    #     main(k)

================================================
FILE: pubmed_Mix.py
================================================
from __future__ import division
from __future__ import print_function

import time
import tensorflow as tf
import scipy.sparse as sp
import os

from utils import *
from models import GCN_APPRO_Mix

# Set random seed
seed = 123
np.random.seed(seed)
tf.set_random_seed(seed)

# Settings
flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_string('dataset', 'pubmed', 'Dataset string.')  # 'cora', 'citeseer', 'pubmed'
flags.DEFINE_string('model', 'gcn_mix', 'Model string.')  # 'gcn_mix', 'gcn_appr'
flags.DEFINE_float('learning_rate', 0.001, 'Initial learning rate.')
flags.DEFINE_integer('epochs', 200, 'Number of epochs to train.')
flags.DEFINE_integer('hidden1', 16, 'Number of units in hidden layer 1.')
flags.DEFINE_float('dropout', 0.0, 'Dropout rate (1 - keep probability).')
flags.DEFINE_float('weight_decay', 5e-4, 'Weight for L2 loss on embedding matrix.')
flags.DEFINE_integer('early_stopping', 30, 'Tolerance for early stopping (# of epochs).')
flags.DEFINE_integer('max_degree', 3, 'Maximum Chebyshev polynomial degree.')


def construct_feeddict_forMixlayers(AXfeatures, support, labels, placeholders):
    feed_dict = dict()
    feed_dict.update({placeholders['labels']: labels})
    feed_dict.update({placeholders['AXfeatures']: AXfeatures})
    feed_dict.update({placeholders['support']: support})
    feed_dict.update({placeholders['num_features_nonzero']: AXfeatures[1].shape})
    return feed_dict

def iterate_minibatches_listinputs(inputs, batchsize, shuffle=False):
    assert inputs is not None
    numSamples = inputs[0].shape[0]
    if shuffle:
        indices = np.arange(numSamples)
        np.random.shuffle(indices)
    for start_idx in range(0, numSamples - batchsize + 1, batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
        yield [input[excerpt] for input in inputs]


def main(rank1):

    adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data(FLAGS.dataset)

    train_index = np.where(train_mask)[0]
    adj_train = adj[train_index, :][:, train_index]
    train_mask = train_mask[train_index]
    y_train = y_train[train_index]
    val_index = np.where(val_mask)[0]
    y_val = y_val[val_index]
    test_index = np.where(test_mask)[0]
    y_test = y_test[test_index]

    train_val_index = np.concatenate([train_index, val_index],axis=0)
    train_test_idnex = np.concatenate([train_index, test_index],axis=0)


    numNode_train = adj_train.shape[0]
    # print("numNode", numNode)


    if FLAGS.model == 'gcn_mix':
        normADJ_train = nontuple_preprocess_adj(adj_train)
        # normADJ = nontuple_preprocess_adj(adj)


        normADJ_val = nontuple_preprocess_adj(adj[train_val_index,:][:,train_val_index])
        normADJ_test = nontuple_preprocess_adj(adj[train_test_idnex,:][:,train_test_idnex])

        num_supports = 2
        model_func = GCN_APPRO_Mix
    else:
        raise ValueError('Invalid argument for model: ' + str(FLAGS.model))

    # Some preprocessing
    features = nontuple_preprocess_features(features).todense()

    train_features = normADJ_train.dot(features[train_index])
    val_features = normADJ_val.dot(features[train_val_index])
    test_features = normADJ_test.dot(features[train_test_idnex])

    nonzero_feature_number = len(np.nonzero(features)[0])
    nonzero_feature_number_train = len(np.nonzero(train_features)[0])


    # Define placeholders
    placeholders = {
        'support': tf.sparse_placeholder(tf.float32) ,
        'AXfeatures': tf.placeholder(tf.float32, shape=(None, features.shape[1])),
        'labels': tf.placeholder(tf.float32, shape=(None, y_train.shape[1])),
        'dropout': tf.placeholder_with_default(0., shape=()),
        'num_features_nonzero': tf.placeholder(tf.int32)  # helper variable for sparse dropout
    }

    # Create model
    model = model_func(placeholders, input_dim=features.shape[-1], logging=True)

    # Initialize session
    sess = tf.Session()

    # Define model evaluation function
    def evaluate(features, support, labels, placeholders):
        t_test = time.time()
        feed_dict_val = construct_feeddict_forMixlayers(features, support, labels, placeholders)
        outs_val = sess.run([model.loss, model.accuracy], feed_dict=feed_dict_val)
        return outs_val[0], outs_val[1], (time.time() - t_test)

    # Init variables
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()

    cost_val = []

    p0 = column_prop(normADJ_train)

    # testSupport = [sparse_to_tuple(normADJ), sparse_to_tuple(normADJ)]
    valSupport = sparse_to_tuple(normADJ_val[len(train_index):, :])
    testSupport = sparse_to_tuple(normADJ_test[len(train_index):, :])

    t = time.time()
    maxACC = 0.0
    # Train model
    for epoch in range(FLAGS.epochs):
        t1 = time.time()

        n = 0
        for batch in iterate_minibatches_listinputs([normADJ_train, y_train], batchsize=1024, shuffle=True):
            [normADJ_batch, y_train_batch] = batch

            p1 = column_prop(normADJ_batch)
            if rank1 is None:
                support1 = sparse_to_tuple(normADJ_batch)
                features_inputs = train_features
            else:

                q1 = np.random.choice(np.arange(numNode_train), rank1, replace=False, p=p1)  # top layer

                support1 = sparse_to_tuple(normADJ_batch[:, q1].dot(sp.diags(1.0 / (p1[q1] * rank1))))

                features_inputs = train_features[q1, :]  # selected nodes for approximation
            # Construct feed dictionary
            feed_dict = construct_feeddict_forMixlayers(features_inputs, support1, y_train_batch,
                                            placeholders)
            feed_dict.update({placeholders['dropout']: FLAGS.dropout})

            # Training step
            outs = sess.run([model.opt_op, model.loss, model.accuracy], feed_dict=feed_dict)
            n = n +1


        # Validation
        cost, acc, duration = evaluate(val_features, valSupport, y_val,  placeholders)
        cost_val.append(cost)

        # if epoch > 50 and acc>maxACC:
        #     maxACC = acc
        #     save_path = saver.save(sess, "tmp/tmp_MixModel.ckpt")

        # Print results
        # print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(outs[1]),
        #       "train_acc=", "{:.5f}".format(outs[2]), "val_loss=", "{:.5f}".format(cost),
        #       "val_acc=", "{:.5f}".format(acc), "time per batch=", "{:.5f}".format((time.time() - t1)/n))

        if epoch > FLAGS.early_stopping and np.mean(cost_val[-2:]) > np.mean(cost_val[-(FLAGS.early_stopping + 1):-1]):
            # print("Early stopping...")
            break


    train_duration = time.time() - t
    # Testing
    # if os.path.exists("tmp/pubmed_MixModel.ckpt"):
    #     saver.restore(sess, "tmp/pubmed_MixModel.ckpt")
    test_cost, test_acc, test_duration = evaluate(test_features, testSupport, y_test,
                                                  placeholders)
    print("rank1 = {}".format(rank1), "cost=", "{:.5f}".format(test_cost),
          "accuracy=", "{:.5f}".format(test_acc), "training time per epoch=", "{:.5f}".format(train_duration/(epoch+1)),
          "test time=", "{:.5f}".format(test_duration))

if __name__=="__main__":
    print("DATASET:", FLAGS.dataset)
    for k in [25, 50, 100, 200, 400]:
        main(k)

================================================
FILE: pubmed_Mix_sampleA.py
================================================
from __future__ import division
from __future__ import print_function

import time
import tensorflow as tf
import scipy.sparse as sp
import os

from utils import *
from models import GCN_APPRO_Mix

# Set random seed
seed = 123
np.random.seed(seed)
tf.set_random_seed(seed)

# Settings
flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_string('dataset', 'pubmed', 'Dataset string.')  # 'cora', 'citeseer', 'pubmed'
flags.DEFINE_string('model', 'gcn_mix', 'Model string.')  # 'gcn_mix', 'gcn_appr'
flags.DEFINE_float('learning_rate', 0.01, 'Initial learning rate.')
flags.DEFINE_integer('epochs', 200, 'Number of epochs to train.')
flags.DEFINE_integer('hidden1', 16, 'Number of units in hidden layer 1.')
flags.DEFINE_float('dropout', 0.0, 'Dropout rate (1 - keep probability).')
flags.DEFINE_float('weight_decay', 5e-4, 'Weight for L2 loss on embedding matrix.')
flags.DEFINE_integer('early_stopping', 30, 'Tolerance for early stopping (# of epochs).')
flags.DEFINE_integer('max_degree', 3, 'Maximum Chebyshev polynomial degree.')


def construct_feeddict_forMixlayers(AXfeatures, support, labels, placeholders):
    feed_dict = dict()
    feed_dict.update({placeholders['labels']: labels})
    feed_dict.update({placeholders['AXfeatures']: AXfeatures})
    feed_dict.update({placeholders['support']: support})
    feed_dict.update({placeholders['num_features_nonzero']: AXfeatures[1].shape})
    return feed_dict

def iterate_minibatches_listinputs(inputs, batchsize, shuffle=False):
    assert inputs is not None
    numSamples = inputs[0].shape[0]
    if shuffle:
        indices = np.arange(numSamples)
        np.random.shuffle(indices)
    for start_idx in range(0, numSamples - batchsize + 1, batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
        yield [input[excerpt] for input in inputs]


def main(rank1):

    adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data(FLAGS.dataset)

    train_index = np.where(train_mask)[0]
    adj_train = adj[train_index, :][:, train_index]
    train_mask = train_mask[train_index]
    y_train = y_train[train_index]
    val_index = np.where(val_mask)[0]
    y_val = y_val[val_index]
    test_index = np.where(test_mask)[0]
    y_test = y_test[test_index]

    train_val_index = np.concatenate([train_index, val_index],axis=0)
    train_test_idnex = np.concatenate([train_index, test_index],axis=0)


    numNode_train = adj_train.shape[0]
    # print("numNode", numNode)


    if FLAGS.model == 'gcn_mix':
        normADJ_train = nontuple_preprocess_adj(adj_train)
        # normADJ = nontuple_preprocess_adj(adj)
        normADJ_val = nontuple_preprocess_adj(adj[train_val_index,:][:,train_val_index])
        normADJ_test = nontuple_preprocess_adj(adj[train_test_idnex,:][:,train_test_idnex])

        num_supports = 2
        model_func = GCN_APPRO_Mix
    else:
        raise ValueError('Invalid argument for model: ' + str(FLAGS.model))

    # Some preprocessing
    features = nontuple_preprocess_features(features).todense()

    train_features = normADJ_train.dot(features[train_index])
    val_features = normADJ_val.dot(features[train_val_index])
    test_features = normADJ_test.dot(features[train_test_idnex])

    nonzero_feature_number = len(np.nonzero(features)[0])
    nonzero_feature_number_train = len(np.nonzero(train_features)[0])


    # Define placeholders
    placeholders = {
        'support': tf.sparse_placeholder(tf.float32) ,
        'AXfeatures': tf.placeholder(tf.float32, shape=(None, features.shape[1])),
        'labels': tf.placeholder(tf.float32, shape=(None, y_train.shape[1])),
        'dropout': tf.placeholder_with_default(0., shape=()),
        'num_features_nonzero': tf.placeholder(tf.int32)  # helper variable for sparse dropout
    }

    # Create model
    model = model_func(placeholders, input_dim=features.shape[-1], logging=True)

    # Initialize session
    sess = tf.Session()

    # Define model evaluation function
    def evaluate(features, support, labels, placeholders):
        t_test = time.time()
        feed_dict_val = construct_feeddict_forMixlayers(features, support, labels, placeholders)
        outs_val = sess.run([model.loss, model.accuracy], feed_dict=feed_dict_val)
        return outs_val[0], outs_val[1], (time.time() - t_test)

    # Init variables
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()

    cost_val = []

    p0 = column_prop(normADJ_train)

    # testSupport = [sparse_to_tuple(normADJ), sparse_to_tuple(normADJ)]
    valSupport = sparse_to_tuple(normADJ_val[len(train_index):, :])
    testSupport = sparse_to_tuple(normADJ_test[len(train_index):, :])

    t = time.time()
    maxACC = 0.0
    # Train model
    for epoch in range(FLAGS.epochs):
        t1 = time.time()

        n = 0
        for batch in iterate_minibatches_listinputs([normADJ_train, y_train], batchsize=1024, shuffle=True):
            [normADJ_batch, y_train_batch] = batch

            if rank1 is None:
                support1 = sparse_to_tuple(normADJ_batch)
                features_inputs = train_features
            else:
                distr = np.nonzero(np.sum(normADJ_batch, axis=0))[1]
                if rank1 > len(distr):
                    q1 = distr
                else:
                    q1 = np.random.choice(distr, rank1, replace=False, p=p0[distr]/sum(p0[distr]))  # top layer


                support1 = sparse_to_tuple(normADJ_batch[:, q1].dot(sp.diags(1.0 / (p0[q1] * rank1))))
                if len(support1[1])==0:
                    continue
                features_inputs = train_features[q1, :]  # selected nodes for approximation
            # Construct feed dictionary
            feed_dict = construct_feeddict_forMixlayers(features_inputs, support1, y_train_batch,
                                            placeholders)
            feed_dict.update({placeholders['dropout']: FLAGS.dropout})

            # Training step
            outs = sess.run([model.opt_op, model.loss, model.accuracy], feed_dict=feed_dict)
            n = n +1


        # Validation
        cost, acc, duration = evaluate(val_features, valSupport, y_val,  placeholders)
        cost_val.append(cost)

        # if epoch > 50 and acc>maxACC:
        #     maxACC = acc
        #     save_path = saver.save(sess, "tmp/tmp_MixModel.ckpt")

        # Print results
        # print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(outs[1]),
        #       "train_acc=", "{:.5f}".format(outs[2]), "val_loss=", "{:.5f}".format(cost),
        #       "val_acc=", "{:.5f}".format(acc), "time per batch=", "{:.5f}".format((time.time() - t1)/n))

        if epoch > FLAGS.early_stopping and np.mean(cost_val[-2:]) > np.mean(cost_val[-(FLAGS.early_stopping + 1):-1]):
            # print("Early stopping...")
            break


    train_duration = time.time() - t
    # Testing
    # if os.path.exists("tmp/pubmed_MixModel.ckpt"):
    #     saver.restore(sess, "tmp/pubmed_MixModel.ckpt")
    test_cost, test_acc, test_duration = evaluate(test_features, testSupport, y_test,
                                                  placeholders)
    print("rank1 = {}".format(rank1), "cost=", "{:.5f}".format(test_cost),
          "accuracy=", "{:.5f}".format(test_acc), "training time per epoch=", "{:.5f}".format(train_duration/(epoch+1)),
          "test time=", "{:.5f}".format(test_duration))

if __name__=="__main__":
    print("DATASET:", FLAGS.dataset)
    # main(None)
    main(50)
    # for k in [25, 50, 100, 200, 400]:
    #     main(k)

================================================
FILE: pubmed_Mix_uniform.py
================================================
from __future__ import division
from __future__ import print_function

import time
import tensorflow as tf
import scipy.sparse as sp
import os

from utils import *
from models import GCN_APPRO_Mix

# Set random seed
seed = 123
np.random.seed(seed)
tf.set_random_seed(seed)

# Settings
flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_string('dataset', 'pubmed', 'Dataset string.')  # 'cora', 'citeseer', 'pubmed'
flags.DEFINE_string('model', 'gcn_mix', 'Model string.')  # 'gcn_mix', 'gcn_appr'
flags.DEFINE_float('learning_rate', 0.001, 'Initial learning rate.')
flags.DEFINE_integer('epochs', 200, 'Number of epochs to train.')
flags.DEFINE_integer('hidden1', 16, 'Number of units in hidden layer 1.')
flags.DEFINE_float('dropout', 0.0, 'Dropout rate (1 - keep probability).')
flags.DEFINE_float('weight_decay', 5e-4, 'Weight for L2 loss on embedding matrix.')
flags.DEFINE_integer('early_stopping', 30, 'Tolerance for early stopping (# of epochs).')
flags.DEFINE_integer('max_degree', 3, 'Maximum Chebyshev polynomial degree.')


def construct_feeddict_forMixlayers(AXfeatures, support, labels, placeholders):
    feed_dict = dict()
    feed_dict.update({placeholders['labels']: labels})
    feed_dict.update({placeholders['AXfeatures']: AXfeatures})
    feed_dict.update({placeholders['support']: support})
    feed_dict.update({placeholders['num_features_nonzero']: AXfeatures[1].shape})
    return feed_dict

def iterate_minibatches_listinputs(inputs, batchsize, shuffle=False):
    assert inputs is not None
    numSamples = inputs[0].shape[0]
    if shuffle:
        indices = np.arange(numSamples)
        np.random.shuffle(indices)
    for start_idx in range(0, numSamples - batchsize + 1, batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
        yield [input[excerpt] for input in inputs]


def main(rank1):

    adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data(FLAGS.dataset)

    train_index = np.where(train_mask)[0]
    adj_train = adj[train_index, :][:, train_index]
    train_mask = train_mask[train_index]
    y_train = y_train[train_index]
    val_index = np.where(val_mask)[0]
    y_val = y_val[val_index]
    test_index = np.where(test_mask)[0]
    y_test = y_test[test_index]

    train_val_index = np.concatenate([train_index, val_index],axis=0)
    train_test_idnex = np.concatenate([train_index, test_index],axis=0)


    numNode_train = adj_train.shape[0]
    # print("numNode", numNode)


    if FLAGS.model == 'gcn_mix':
        normADJ_train = nontuple_preprocess_adj(adj_train)
        # normADJ = nontuple_preprocess_adj(adj)


        normADJ_val = nontuple_preprocess_adj(adj[train_val_index,:][:,train_val_index])
        normADJ_test = nontuple_preprocess_adj(adj[train_test_idnex,:][:,train_test_idnex])

        num_supports = 2
        model_func = GCN_APPRO_Mix
    else:
        raise ValueError('Invalid argument for model: ' + str(FLAGS.model))

    # Some preprocessing
    features = nontuple_preprocess_features(features).todense()

    train_features = normADJ_train.dot(features[train_index])
    val_features = normADJ_val.dot(features[train_val_index])
    test_features = normADJ_test.dot(features[train_test_idnex])

    nonzero_feature_number = len(np.nonzero(features)[0])
    nonzero_feature_number_train = len(np.nonzero(train_features)[0])


    # Define placeholders
    placeholders = {
        'support': tf.sparse_placeholder(tf.float32) ,
        'AXfeatures': tf.placeholder(tf.float32, shape=(None, features.shape[1])),
        'labels': tf.placeholder(tf.float32, shape=(None, y_train.shape[1])),
        'dropout': tf.placeholder_with_default(0., shape=()),
        'num_features_nonzero': tf.placeholder(tf.int32)  # helper variable for sparse dropout
    }

    # Create model
    model = model_func(placeholders, input_dim=features.shape[-1], logging=True)

    # Initialize session
    sess = tf.Session()

    # Define model evaluation function
    def evaluate(features, support, labels, placeholders):
        t_test = time.time()
        feed_dict_val = construct_feeddict_forMixlayers(features, support, labels, placeholders)
        outs_val = sess.run([model.loss, model.accuracy], feed_dict=feed_dict_val)
        return outs_val[0], outs_val[1], (time.time() - t_test)

    # Init variables
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()

    cost_val = []

    p0 = column_prop(normADJ_train)

    # testSupport = [sparse_to_tuple(normADJ), sparse_to_tuple(normADJ)]
    valSupport = sparse_to_tuple(normADJ_val[len(train_index):, :])
    testSupport = sparse_to_tuple(normADJ_test[len(train_index):, :])

    t = time.time()
    maxACC = 0.0
    # Train model
    for epoch in range(FLAGS.epochs):
        t1 = time.time()

        n = 0
        for batch in iterate_minibatches_listinputs([normADJ_train, y_train], batchsize=1024, shuffle=True):
            [normADJ_batch, y_train_batch] = batch

            p1 = column_prop(normADJ_batch)
            if rank1 is None:
                support1 = sparse_to_tuple(normADJ_batch)
                features_inputs = train_features
            else:
                distr = np.nonzero(np.sum(normADJ_batch, axis=0))[1]
                if rank1 > len(distr):
                    q1 = distr
                else:
                    q1 = np.random.choice(distr, rank1, replace=False)  # top layer
                # q1 = np.random.choice(np.arange(numNode_train), rank1)  # top layer

                support1 = sparse_to_tuple(normADJ_batch[:, q1] * numNode_train / len(q1))

                features_inputs = train_features[q1, :]  # selected nodes for approximation
            # Construct feed dictionary
            feed_dict = construct_feeddict_forMixlayers(features_inputs, support1, y_train_batch,
                                            placeholders)
            feed_dict.update({placeholders['dropout']: FLAGS.dropout})

            # Training step
            outs = sess.run([model.opt_op, model.loss, model.accuracy], feed_dict=feed_dict)
            n = n +1


        # Validation
        cost, acc, duration = evaluate(val_features, valSupport, y_val,  placeholders)
        cost_val.append(cost)

        # if epoch > 50 and acc>maxACC:
        #     maxACC = acc
        #     save_path = saver.save(sess, "tmp/tmp_MixModel.ckpt")

        # Print results
        # print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(outs[1]),
        #       "train_acc=", "{:.5f}".format(outs[2]), "val_loss=", "{:.5f}".format(cost),
        #       "val_acc=", "{:.5f}".format(acc), "time per batch=", "{:.5f}".format((time.time() - t1)/n))

        if epoch > FLAGS.early_stopping and np.mean(cost_val[-2:]) > np.mean(cost_val[-(FLAGS.early_stopping + 1):-1]):
            # print("Early stopping...")
            break


    train_duration = time.time() - t
    # Testing
    # if os.path.exists("tmp/pubmed_MixModel.ckpt"):
    #     saver.restore(sess, "tmp/pubmed_MixModel.ckpt")
    test_cost, test_acc, test_duration = evaluate(test_features, testSupport, y_test,
                                                  placeholders)
    print("rank1 = {}".format(rank1), "cost=", "{:.5f}".format(test_cost),
          "accuracy=", "{:.5f}".format(test_acc), "training time per epoch=", "{:.5f}".format(train_duration/(epoch+1)),
          "test time=", "{:.5f}".format(test_duration))

if __name__=="__main__":
    print("DATASET:", FLAGS.dataset)
    main(5)
    # for k in [25, 50, 100, 200, 400]:
    #     main(k)

================================================
FILE: pubmed_inductive_appr2layers.py
================================================
from __future__ import division
from __future__ import print_function

import time
import tensorflow as tf
import scipy.sparse as sp

from utils import *
from models import GCN, MLP, GCN_APPRO

# Set random seed
seed = 123
np.random.seed(seed)
tf.set_random_seed(seed)

# Settings
flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_string('dataset', 'pubmed', 'Dataset string.')  # 'cora', 'citeseer', 'pubmed'
flags.DEFINE_string('model', 'gcn_appr', 'Model string.')  # 'gcn', 'gcn_appr'
flags.DEFINE_float('learning_rate', 0.001, 'Initial learning rate.')
flags.DEFINE_integer('epochs', 200, 'Number of epochs to train.')
flags.DEFINE_integer('hidden1', 16, 'Number of units in hidden layer 1.')
flags.DEFINE_float('dropout', 0.0, 'Dropout rate (1 - keep probability).')
flags.DEFINE_float('weight_decay', 5e-4, 'Weight for L2 loss on embedding matrix.')
flags.DEFINE_integer('early_stopping', 30, 'Tolerance for early stopping (# of epochs).')
flags.DEFINE_integer('max_degree', 3, 'Maximum Chebyshev polynomial degree.')
# Load data


def iterate_minibatches_listinputs(inputs, batchsize, shuffle=False):
    assert inputs is not None
    numSamples = inputs[0].shape[0]
    if shuffle:
        indices = np.arange(numSamples)
        np.random.shuffle(indices)
    for start_idx in range(0, numSamples - batchsize + 1, batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
        yield [input[excerpt] for input in inputs]

def main(rank1, rank0):
    adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data(FLAGS.dataset)

    train_index = np.where(train_mask)[0]
    adj_train = adj[train_index, :][:, train_index]
    train_mask = train_mask[train_index]
    y_train = y_train[train_index]
    val_index = np.where(val_mask)[0]
    # adj_val = adj[val_index, :][:, val_index]
    val_mask = val_mask[val_index]
    y_val = y_val[val_index]
    test_index = np.where(test_mask)[0]
    # adj_test = adj[test_index, :][:, test_index]
    test_mask = test_mask[test_index]
    y_test = y_test[test_index]


    numNode_train = adj_train.shape[0]
    # print("numNode", numNode)

    # Some preprocessing
    features = nontuple_preprocess_features(features).todense()
    train_features = features[train_index]

    if FLAGS.model == 'gcn_appr':
        normADJ_train = nontuple_preprocess_adj(adj_train)
        normADJ = nontuple_preprocess_adj(adj)
        # normADJ_val = nontuple_preprocess_adj(adj_val)
        # normADJ_test = nontuple_preprocess_adj(adj_test)

        num_supports = 2
        model_func = GCN_APPRO
    else:
        raise ValueError('Invalid argument for model: ' + str(FLAGS.model))

    # Define placeholders
    placeholders = {
        'support': [tf.sparse_placeholder(tf.float32) for _ in range(num_supports)],
        'features': tf.placeholder(tf.float32, shape=(None, features.shape[1])),
        'labels': tf.placeholder(tf.float32, shape=(None, y_train.shape[1])),
        'labels_mask': tf.placeholder(tf.int32),
        'dropout': tf.placeholder_with_default(0., shape=()),
        'num_features_nonzero': tf.placeholder(tf.int32)  # helper variable for sparse dropout
    }

    # Create model
    model = model_func(placeholders, input_dim=features.shape[-1], logging=True)

    # Initialize session
    sess = tf.Session()

    # Define model evaluation function
    def evaluate(features, support, labels, mask, placeholders):
        t_test = time.time()
        feed_dict_val = construct_feed_dict(features, support, labels, mask, placeholders)
        outs_val = sess.run([model.loss, model.accuracy], feed_dict=feed_dict_val)
        return outs_val[0], outs_val[1], (time.time() - t_test)

    # Init variables
    sess.run(tf.global_variables_initializer())

    cost_val = []

    p0 = column_prop(normADJ_train)

    # testSupport = [sparse_to_tuple(normADJ), sparse_to_tuple(normADJ)]
    valSupport = [sparse_to_tuple(normADJ), sparse_to_tuple(normADJ[val_index, :])]
    testSupport = [sparse_to_tuple(normADJ), sparse_to_tuple(normADJ[test_index, :])]

    t = time.time()
    # Train model
    for epoch in range(FLAGS.epochs):
        t1 = time.time()

        n = 0
        for batch in iterate_minibatches_listinputs([normADJ_train, y_train, train_mask], batchsize=256, shuffle=True):
            [normADJ_batch, y_train_batch, train_mask_batch] = batch
            if sum(train_mask_batch) < 1:
                continue
            p1 = column_prop(normADJ_batch)
            q1 = np.random.choice(np.arange(numNode_train), rank1, p=p1)  # top layer
            # q0 = np.random.choice(np.arange(numNode_train), rank0, p=p0)  # bottom layer
            support1 = sparse_to_tuple(normADJ_batch[:, q1].dot(sp.diags(1.0 / (p1[q1] * rank1))))

            p2 = column_prop(normADJ_train[q1, :])
            q0 = np.random.choice(np.arange(numNode_train), rank0, p=p2)
            support0 = sparse_to_tuple(normADJ_train[q1, :][:, q0])
            features_inputs = sp.diags(1.0 / (p2[q0] * rank0)).dot(train_features[q0, :])  # selected nodes for approximation


            # Construct feed dictionary
            feed_dict = construct_feed_dict(features_inputs, [support0, support1], y_train_batch, train_mask_batch,
                                            placeholders)
            feed_dict.update({placeholders['dropout']: FLAGS.dropout})

            # Training step
            outs = sess.run([model.opt_op, model.loss, model.accuracy], feed_dict=feed_dict)

        # Validation
        cost, acc, duration = evaluate(features, valSupport, y_val, val_mask, placeholders)
        cost_val.append(cost)

        # # Print results
        print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(outs[1]),
              "train_acc=", "{:.5f}".format(outs[2]), "val_loss=", "{:.5f}".format(cost),
              "val_acc=", "{:.5f}".format(acc), "time=", "{:.5f}".format(time.time() - t1))

        if epoch > FLAGS.early_stopping and cost_val[-1] > np.mean(cost_val[-(FLAGS.early_stopping + 1):-1]):
            # print("Early stopping...")
            break

    train_duration = time.time() - t
    # Testing
    test_cost, test_acc, test_duration = evaluate(features, testSupport, y_test, test_mask,
                                                  placeholders)
    print("rank1 = {}".format(rank1), "rank0 = {}".format(rank0), "cost=", "{:.5f}".format(test_cost),
          "accuracy=", "{:.5f}".format(test_acc), "training time per epoch=", "{:.5f}".format(train_duration/epoch))


if __name__=="__main__":
    print("DATASET:", FLAGS.dataset)
    for k in [5, 10, 25, 50]:
        main(k, k)

    # main(50,50)
    # for k in [50, 100, 200, 400]:
    #     main(k, k)

================================================
FILE: train.py
================================================
from __future__ import division
from __future__ import print_function

import time
import tensorflow as tf

from utils import *
from models import GCN, MLP

# Set random seed
seed = 123
np.random.seed(seed)
tf.set_random_seed(seed)

# Settings
flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_string('dataset', 'pubmed', 'Dataset string.')  # 'cora', 'citeseer', 'pubmed'
flags.DEFINE_string('model', 'gcn', 'Model string.')  # 'gcn', 'gcn_cheby', 'dense'
flags.DEFINE_float('learning_rate', 0.01, 'Initial learning rate.')
flags.DEFINE_integer('epochs', 200, 'Number of epochs to train.')
flags.DEFINE_integer('hidden1', 16, 'Number of units in hidden layer 1.')
flags.DEFINE_float('dropout', 0.5, 'Dropout rate (1 - keep probability).')
flags.DEFINE_float('weight_decay', 5e-4, 'Weight for L2 loss on embedding matrix.')
flags.DEFINE_integer('early_stopping', 10, 'Tolerance for early stopping (# of epochs).')
flags.DEFINE_integer('max_degree', 3, 'Maximum Chebyshev polynomial degree.')

# Load data
adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data(FLAGS.dataset)

# Some preprocessing
features = preprocess_features(features)
if FLAGS.model == 'gcn':
    support = [preprocess_adj(adj)]
    num_supports = 1
    model_func = GCN
elif FLAGS.model == 'gcn_cheby':
    support = chebyshev_polynomials(adj, FLAGS.max_degree)
    num_supports = 1 + FLAGS.max_degree
    model_func = GCN
elif FLAGS.model == 'dense':
    support = [preprocess_adj(adj)]  # Not used
    num_supports = 1
    model_func = MLP
else:
    raise ValueError('Invalid argument for model: ' + str(FLAGS.model))

# Define placeholders
placeholders = {
    'support': [tf.sparse_placeholder(tf.float32) for _ in range(num_supports)],
    'features': tf.sparse_placeholder(tf.float32, shape=tf.constant(features[2], dtype=tf.int64)),
    'labels': tf.placeholder(tf.float32, shape=(None, y_train.shape[1])),
    'labels_mask': tf.placeholder(tf.int32),
    'dropout': tf.placeholder_with_default(0., shape=()),
    'num_features_nonzero': tf.placeholder(tf.int32)  # helper variable for sparse dropout
}

# Create model
model = model_func(placeholders, input_dim=features[2][1], logging=True)
print(adj.shape[0])

# Initialize session
sess = tf.Session()


# Define model evaluation function
def evaluate(features, support, labels, mask, placeholders):
    t_test = time.time()
    feed_dict_val = construct_feed_dict(features, support, labels, mask, placeholders)
    outs_val = sess.run([model.loss, model.accuracy], feed_dict=feed_dict_val)
    return outs_val[0], outs_val[1], (time.time() - t_test)


# Init variables
sess.run(tf.global_variables_initializer())

cost_val = []
t_start = time.time()
# Train model
for epoch in range(FLAGS.epochs):

    t = time.time()
    # Construct feed dictionary
    feed_dict = construct_feed_dict(features, support, y_train, train_mask, placeholders)
    feed_dict.update({placeholders['dropout']: FLAGS.dropout})

    # Training step
    outs = sess.run([model.opt_op, model.loss, model.accuracy], feed_dict=feed_dict)

    # Validation
    cost, acc, duration = evaluate(features, support, y_val, val_mask, placeholders)
    cost_val.append(cost)

    # Print results
    print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(outs[1]),
          "train_acc=", "{:.5f}".format(outs[2]), "val_loss=", "{:.5f}".format(cost),
          "val_acc=", "{:.5f}".format(acc), "time=", "{:.5f}".format(time.time() - t))

    # if epoch % 5 == 0:
    #     # Validation
    #     test_cost, test_acc, test_duration = evaluate(features, support, y_test, test_mask, placeholders)
    #     print("training time by far=", "{:.5f}".format(time.time() - t_start),
    #           "epoch = {}".format(epoch + 1),
    #           "cost=", "{:.5f}".format(test_cost),
    #           "accuracy=", "{:.5f}".format(test_acc))

    if epoch > FLAGS.early_stopping and cost_val[-1] > np.mean(cost_val[-(FLAGS.early_stopping+1):-1]):
        print("Early stopping...")
        break

# print("Optimization Finished!")
train_duration = time.time()-t_start
# Testing
test_cost, test_acc, test_duration = evaluate(features, support, y_test, test_mask, placeholders)
print("Original test set results:", "cost=", "{:.5f}".format(test_cost),
      "accuracy=", "{:.5f}".format(test_acc), "training time =", "{:.5f}".format(train_duration),
      "training time per epoch=", "{:.5f}".format(train_duration/(epoch+1)),
      "test time=", "{:.5f}".format(test_duration))


================================================
FILE: train_batch_multiRank_inductive_newscheme.py
================================================
from __future__ import division
from __future__ import print_function

import time
import tensorflow as tf
import scipy.sparse as sp

from utils import *
from models import GCN, MLP, GCN_APPRO

# Set random seed
seed = 123
np.random.seed(seed)
tf.set_random_seed(seed)

# Settings
flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_string('dataset', 'pubmed', 'Dataset string.')  # 'cora', 'citeseer', 'pubmed'
flags.DEFINE_string('model', 'gcn_appr', 'Model string.')  # 'gcn', 'gcn_appr'
flags.DEFINE_float('learning_rate', 0.01, 'Initial learning rate.')
flags.DEFINE_integer('epochs', 300, 'Number of epochs to train.')
flags.DEFINE_integer('hidden1', 16, 'Number of units in hidden layer 1.')
flags.DEFINE_float('dropout', 0.5, 'Dropout rate (1 - keep probability).')
flags.DEFINE_float('weight_decay', 5e-4, 'Weight for L2 loss on embedding matrix.')
flags.DEFINE_integer('early_stopping', 30, 'Tolerance for early stopping (# of epochs).')
flags.DEFINE_integer('max_degree', 3, 'Maximum Chebyshev polynomial degree.')
rank1 = 300
rank0 = 300
# Load data


def iterate_minibatches_listinputs(inputs, batchsize, shuffle=False):
    assert inputs is not None
    numSamples = inputs[0].shape[0]
    if shuffle:
        indices = np.arange(numSamples)
        np.random.shuffle(indices)
    for start_idx in range(0, numSamples - batchsize + 1, batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
        yield [input[excerpt] for input in inputs]

def main(rank1, rank0):
    adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data(FLAGS.dataset)

    train_index = np.where(train_mask)[0]
    adj_train = adj[train_index, :][:, train_index]
    train_mask = train_mask[train_index]
    y_train = y_train[train_index]
    val_index = np.where(val_mask)[0]
    # adj_val = adj[val_index, :][:, val_index]
    # val_mask = val_mask[val_index]
    # y_val = y_val[val_index]
    # test_index = np.where(test_mask)[0]
    # adj_test = adj[test_index, :][:, test_index]
    # test_mask = test_mask[test_index]
    # y_test = y_test[test_index]

    numNode_train = adj_train.shape[0]
    # print("numNode", numNode)

    # Some preprocessing
    features = nontuple_preprocess_features(features).todense()
    train_features = features[train_index]
    if FLAGS.model == 'gcn_appr':
        normADJ_train = nontuple_preprocess_adj(adj_train)
        normADJ = nontuple_preprocess_adj(adj)
        # normADJ_val = nontuple_preprocess_adj(adj_val)
        # normADJ_test = nontuple_preprocess_adj(adj_test)

        num_supports = 2
        model_func = GCN_APPRO
    else:
        raise ValueError('Invalid argument for model: ' + str(FLAGS.model))

    # Define placeholders
    placeholders = {
        'support': [tf.sparse_placeholder(tf.float32) for _ in range(num_supports)],
        'features': tf.placeholder(tf.float32, shape=(None, features.shape[1])),
        'labels': tf.placeholder(tf.float32, shape=(None, y_train.shape[1])),
        'labels_mask': tf.placeholder(tf.int32),
        'dropout': tf.placeholder_with_default(0., shape=()),
        'num_features_nonzero': tf.placeholder(tf.int32)  # helper variable for sparse dropout
    }

    # Create model
    model = model_func(placeholders, input_dim=features.shape[-1], logging=True)

    # Initialize session
    sess = tf.Session()

    # Define model evaluation function
    def evaluate(features, support, labels, mask, placeholders):
        t_test = time.time()
        feed_dict_val = construct_feed_dict(features, support, labels, mask, placeholders)
        outs_val = sess.run([model.loss, model.accuracy], feed_dict=feed_dict_val)
        return outs_val[0], outs_val[1], (time.time() - t_test)

    # Init variables
    sess.run(tf.global_variables_initializer())

    cost_val = []

    p0 = column_prop(normADJ_train)
    p1 = mix_prop(normADJ_train, features[train_index, :])

    testSupport = [sparse_to_tuple(normADJ), sparse_to_tuple(normADJ)]
    # valSupport = [sparse_to_tuple(normADJ_val), sparse_to_tuple(normADJ_val)]
    # testSupport = [sparse_to_tuple(normADJ_test), sparse_to_tuple(normADJ_test)]
    t = time.time()
    # Train model
    for epoch in range(FLAGS.epochs):


        n = 0
        for batch in iterate_minibatches_listinputs([normADJ_train, y_train, train_mask], batchsize=50, shuffle=True):
            [normADJ_batch, y_train_batch, train_mask_batch] = batch
            if sum(train_mask_batch) < 1:
                continue
            # p1 = column_prop(normADJ_batch)
            q1 = np.random.choice(np.arange(numNode_train), rank1, p=p0)  # top layer
            q0 = np.random.choice(np.arange(numNode_train), rank0, p=p0)  # bottom layer
            support1 = sparse_to_tuple(normADJ_batch[:, q1].dot(sp.diags(1.0 / (p1[q1] * rank1))))
            support0 = sparse_to_tuple(normADJ_train[q1, :][:, q0])
            # support1 = sparse_to_tuple(normADJ_batch)
            # support0 = sparse_to_tuple(normADJ[:, q0])
            features_inputs = sp.diags(1.0 / (p1[q0] * rank0)).dot(train_features[q0, :])  # selected nodes for approximation
            # Construct feed dictionary
            feed_dict = construct_feed_dict(features_inputs, [support0, support1], y_train_batch, train_mask_batch,
                                            placeholders)
            feed_dict.update({placeholders['dropout']: FLAGS.dropout})

            # Training step
            outs = sess.run([model.opt_op, model.loss, model.accuracy], feed_dict=feed_dict)

        # Validation
        cost, acc, duration = evaluate(features, testSupport, y_val, val_mask, placeholders)
        cost_val.append(cost)

        # # Print results
        # print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(outs[1]),
        #       "train_acc=", "{:.5f}".format(outs[2]), "val_loss=", "{:.5f}".format(cost),
        #       "val_acc=", "{:.5f}".format(acc), "time=", "{:.5f}".format(time.time() - t))

        if epoch > FLAGS.early_stopping and cost_val[-1] > np.mean(cost_val[-(FLAGS.early_stopping + 1):-1]):
            # print("Early stopping...")
            break

    train_duration = time.time() - t
    # Testing
    test_cost, test_acc, test_duration = evaluate(features, testSupport, y_test, test_mask,
                                                  placeholders)
    print("rank1 = {}".format(rank1), "rank0 = {}".format(rank0), "cost=", "{:.5f}".format(test_cost),
          "accuracy=", "{:.5f}".format(test_acc), "training time per epoch=", "{:.5f}".format(train_duration/epoch))


if __name__=="__main__":
    print("DATASET:", FLAGS.dataset)
    for k in range(100, 1000, 200):
        main(k, k)

================================================
FILE: train_batch_multiRank_inductive_reddit_Mixlayers_sampleA.py
================================================
from __future__ import division
from __future__ import print_function

import time
import tensorflow as tf
import scipy.sparse as sp

from utils import *
from models import GCN_APPRO_Mix
import json
from networkx.readwrite import json_graph
import os

# Set random seed
seed = 123
np.random.seed(seed)
tf.set_random_seed(seed)

# Settings
flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_string('dataset', 'pubmed', 'Dataset string.')  # 'cora', 'citeseer', 'pubmed'
flags.DEFINE_string('model', 'gcn_mix', 'Model string.')  # 'gcn', 'gcn_appr'
flags.DEFINE_float('learning_rate', 0.01, 'Initial learning rate.')
flags.DEFINE_integer('epochs', 200, 'Number of epochs to train.')
flags.DEFINE_integer('hidden1', 128, 'Number of units in hidden layer 1.')
flags.DEFINE_float('dropout', 0.0, 'Dropout rate (1 - keep probability).')
flags.DEFINE_float('weight_decay', 1e-4, 'Weight for L2 loss on embedding matrix.')
flags.DEFINE_integer('early_stopping', 30, 'Tolerance for early stopping (# of epochs).')
flags.DEFINE_integer('max_degree', 3, 'Maximum Chebyshev polynomial degree.')

# Load data


def iterate_minibatches_listinputs(inputs, batchsize, shuffle=False):
    assert inputs is not None
    numSamples = inputs[0].shape[0]
    if shuffle:
        indices = np.arange(numSamples)
        np.random.shuffle(indices)
    for start_idx in range(0, numSamples - batchsize + 1, batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
        yield [input[excerpt] for input in inputs]


def loadRedditFromG(dataset_dir, inputfile):
    f= open(dataset_dir+inputfile)
    objects = []
    for _ in range(pkl.load(f)):
        objects.append(pkl.load(f))
    adj, train_labels, val_labels, test_labels, train_index, val_index, test_index = tuple(objects)
    feats = np.load(dataset_dir + "/reddit-feats.npy")
    return sp.csr_matrix(adj), sp.lil_matrix(feats), train_labels, val_labels, test_labels, train_index, val_index, test_index


def loadRedditFromNPZ(dataset_dir):
    adj = sp.load_npz(dataset_dir+"reddit_adj.npz")
    data = np.load(dataset_dir+"reddit.npz")

    return adj, data['feats'], data['y_train'], data['y_val'], data['y_test'], data['train_index'], data['val_index'], data['test_index']


def transferRedditDataFormat(dataset_dir, output_file):
    G = json_graph.node_link_graph(json.load(open(dataset_dir + "/reddit-G.json")))
    labels = json.load(open(dataset_dir + "/reddit-class_map.json"))

    train_ids = [n for n in G.nodes() if not G.node[n]['val'] and not G.node[n]['test']]
    test_ids = [n for n in G.nodes() if G.node[n]['test']]
    val_ids = [n for n in G.nodes() if G.node[n]['val']]
    train_labels = [labels[i] for i in train_ids]
    test_labels = [labels[i] for i in test_ids]
    val_labels = [labels[i] for i in val_ids]
    feats = np.load(dataset_dir + "/reddit-feats.npy")
    ## Logistic gets thrown off by big counts, so log transform num comments and score
    feats[:, 0] = np.log(feats[:, 0] + 1.0)
    feats[:, 1] = np.log(feats[:, 1] - min(np.min(feats[:, 1]), -1))
    feat_id_map = json.load(open(dataset_dir + "reddit-id_map.json"))
    feat_id_map = {id: val for id, val in feat_id_map.iteritems()}

    # train_feats = feats[[feat_id_map[id] for id in train_ids]]
    # test_feats = feats[[feat_id_map[id] for id in test_ids]]

    # numNode = len(feat_id_map)
    # adj = sp.lil_matrix(np.zeros((numNode,numNode)))
    # for edge in G.edges():
    #     adj[feat_id_map[edge[0]], feat_id_map[edge[1]]] = 1

    train_index = [feat_id_map[id] for id in train_ids]
    val_index = [feat_id_map[id] for id in val_ids]
    test_index = [feat_id_map[id] for id in test_ids]
    np.savez(output_file, feats = feats, y_train=train_labels, y_val=val_labels, y_test = test_labels, train_index = train_index,
             val_index=val_index, test_index = test_index)


def transferLabel2Onehot(labels, N):
    y = np.zeros((len(labels),N))
    for i in range(len(labels)):
        pos = labels[i]
        y[i,pos] =1
    return y

def construct_feeddict_forMixlayers(AXfeatures, support, labels, placeholders):
    feed_dict = dict()
    feed_dict.update({placeholders['labels']: labels})
    feed_dict.update({placeholders['AXfeatures']: AXfeatures})
    feed_dict.update({placeholders['support']: support})
    feed_dict.update({placeholders['num_features_nonzero']: AXfeatures[1].shape})
    return feed_dict

def main(rank1):


    # config = tf.ConfigProto(device_count={"CPU": 4}, # limit to num_cpu_core CPU usage
    #                 inter_op_parallelism_threads = 1,
    #                 intra_op_parallelism_threads = 4,
    #                 log_device_placement=False)
    adj, features, y_train, y_val, y_test,train_index, val_index, test_index = loadRedditFromNPZ("data/")
    adj = adj+adj.T


    y_train = transferLabel2Onehot(y_train, 41)
    y_val = transferLabel2Onehot(y_val, 41)
    y_test = transferLabel2Onehot(y_test, 41)

    features = sp.lil_matrix(features)

    adj_train = adj[train_index, :][:, train_index]


    numNode_train = adj_train.shape[0]


    # print("numNode", numNode)


    if FLAGS.model == 'gcn_mix':
        normADJ_train = nontuple_preprocess_adj(adj_train)
        normADJ = nontuple_preprocess_adj(adj)
        # normADJ_val = nontuple_preprocess_adj(adj_val)
        # normADJ_test = nontuple_preprocess_adj(adj_test)

        num_supports = 2
        model_func = GCN_APPRO_Mix
    else:
        raise ValueError('Invalid argument for model: ' + str(FLAGS.model))

    # Some preprocessing
    features = nontuple_preprocess_features(features).todense()

    train_features = normADJ_train.dot(features[train_index])
    features = normADJ.dot(features)
    nonzero_feature_number = len(np.nonzero(features)[0])
    nonzero_feature_number_train = len(np.nonzero(train_features)[0])


    # Define placeholders
    placeholders = {
        'support': tf.sparse_placeholder(tf.float32) ,
        'AXfeatures': tf.placeholder(tf.float32, shape=(None, features.shape[1])),
        'labels': tf.placeholder(tf.float32, shape=(None, y_train.shape[1])),
        'dropout': tf.placeholder_with_default(0., shape=()),
        'num_features_nonzero': tf.placeholder(tf.int32)  # helper variable for sparse dropout
    }

    # Create model
    model = model_func(placeholders, input_dim=features.shape[-1], logging=True)

    # Initialize session
    sess = tf.Session()

    # Define model evaluation function
    def evaluate(features, support, labels, placeholders):
        t_test = time.time()
        feed_dict_val = construct_feeddict_forMixlayers(features, support, labels, placeholders)
        outs_val = sess.run([model.loss, model.accuracy], feed_dict=feed_dict_val)
        return outs_val[0], outs_val[1], (time.time() - t_test)

    # Init variables
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()

    cost_val = []

    p0 = column_prop(normADJ_train)

    # testSupport = [sparse_to_tuple(normADJ), sparse_to_tuple(normADJ)]
    valSupport = sparse_to_tuple(normADJ[val_index, :])
    testSupport = sparse_to_tuple(normADJ[test_index, :])

    t = time.time()
    maxACC = 0.0
    # Train model
    for epoch in range(FLAGS.epochs):
        t1 = time.time()

        n = 0
        for batch in iterate_minibatches_listinputs([normADJ_train, y_train], batchsize=256, shuffle=True):
            [normADJ_batch, y_train_batch] = batch

            # p1 = column_prop(normADJ_batch)
            if rank1 is None:
                support1 = sparse_to_tuple(normADJ_batch)
                features_inputs = train_features
            else:
                distr = np.nonzero(np.sum(normADJ_batch, axis=0))[1]
                if rank1 > len(distr):
                    q1 = distr
                else:
                    q1 = np.random.choice(distr, rank1, replace=False, p=p0[distr]/sum(p0[distr]))  # top layer

                # q1 = np.random.choice(np.arange(numNode_train), rank1, p=p0)  # top layer

                support1 = sparse_to_tuple(normADJ_batch[:, q1].dot(sp.diags(1.0 / (p0[q1] * rank1))))
                if len(support1[1])==0:
                    continue

                features_inputs = train_features[q1, :]  # selected nodes for approximation
            # Construct feed dictionary
            feed_dict = construct_feeddict_forMixlayers(features_inputs, support1, y_train_batch,
                                            placeholders)
            feed_dict.update({placeholders['dropout']: FLAGS.dropout})

            # Training step
            outs = sess.run([model.opt_op, model.loss, model.accuracy], feed_dict=feed_dict)
            n = n+1


        # Validation
        cost, acc, duration = evaluate(features, valSupport, y_val,  placeholders)
        cost_val.append(cost)

        if epoch > 20 and acc>maxACC:
            maxACC = acc
            saver.save(sess, "tmp/tmp_MixModel_sampleA_full.ckpt")

        # Print results
        print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(outs[1]),
              "train_acc=", "{:.5f}".format(outs[2]), "val_loss=", "{:.5f}".format(cost),
              "val_acc=", "{:.5f}".format(acc), "time per batch=", "{:.5f}".format((time.time() - t1)/n))

        if epoch%5==0:
            # Validation
            test_cost, test_acc, test_duration = evaluate(features, testSupport, y_test,
                                                          placeholders)
            print("training time by far=", "{:.5f}".format(time.time() - t),
                  "epoch = {}".format(epoch + 1),
                  "cost=", "{:.5f}".format(test_cost),
                  "accuracy=", "{:.5f}".format(test_acc))

        if epoch > FLAGS.early_stopping and np.mean(cost_val[-2:]) > np.mean(cost_val[-(FLAGS.early_stopping + 1):-1]):
            # print("Early stopping...")
            break

    train_duration = time.time() - t
    # Testing
    if os.path.exists("tmp/tmp_MixModel_sampleA_full.ckpt.index"):
        saver.restore(sess, "tmp/tmp_MixModel_sampleA_full.ckpt")
    test_cost, test_acc, test_duration = evaluate(features, testSupport, y_test,
                                                  placeholders)
    print("rank1 = {}".format(rank1), "cost=", "{:.5f}".format(test_cost),
          "accuracy=", "{:.5f}".format(test_acc), "training time=", "{:.5f}".format(train_duration),
          "epoch = {}".format(epoch+1),
          "test time=", "{:.5f}".format(test_duration))

def transferG2ADJ():
    G = json_graph.node_link_graph(json.load(open("reddit/reddit-G.json")))
    feat_id_map = json.load(open("reddit/reddit-id_map.json"))
    feat_id_map = {id: val for id, val in feat_id_map.iteritems()}
    numNode = len(feat_id_map)
    adj = np.zeros((numNode, numNode))
    newEdges0 = [feat_id_map[edge[0]] for edge in G.edges()]
    newEdges1 = [feat_id_map[edge[1]] for edge in G.edges()]

    # for edge in G.edges():
    #     adj[feat_id_map[edge[0]], feat_id_map[edge[1]]] = 1
    adj = sp.csr_matrix((np.ones((len(newEdges0),)), (newEdges0, newEdges1)), shape=(numNode, numNode))
    sp.save_npz("reddit_adj.npz", adj)


def test(rank1=None):
    # config = tf.ConfigProto(device_count={"CPU": 4}, # limit to num_cpu_core CPU usage
    #                 inter_op_parallelism_threads = 1,
    #                 intra_op_parallelism_threads = 4,
    #                 log_device_placement=False)
    adj, features, y_train, y_val, y_test, train_index, val_index, test_index = loadRedditFromNPZ("data/")
    adj = adj + adj.T

    y_train = transferLabel2Onehot(y_train, 41)
    y_test = transferLabel2Onehot(y_test, 41)

    features = sp.lil_matrix(features)


    numNode_train = y_train.shape[0]

    # print("numNode", numNode)


    if FLAGS.model == 'gcn_mix':
        normADJ = nontuple_preprocess_adj(adj)
        normADJ_test = normADJ[test_index, :]
        # normADJ_val = nontuple_preprocess_adj(adj_val)
        # normADJ_test = nontuple_preprocess_adj(adj_test)

        num_supports = 2
        model_func = GCN_APPRO_Mix
    else:
        raise ValueError('Invalid argument for model: ' + str(FLAGS.model))

    # Some preprocessing
    features = nontuple_preprocess_features(features).todense()

    features = normADJ.dot(features)


    # Define placeholders
    placeholders = {
        'support': tf.sparse_placeholder(tf.float32),
        'AXfeatures': tf.placeholder(tf.float32, shape=(None, features.shape[1])),
        'labels': tf.placeholder(tf.float32, shape=(None, y_train.shape[1])),
        'dropout': tf.placeholder_with_default(0., shape=()),
        'num_features_nonzero': tf.placeholder(tf.int32)  # helper variable for sparse dropout
    }

    # Create model
    model = model_func(placeholders, input_dim=features.shape[-1], logging=True)

    # Initialize session
    sess = tf.Session()

    # Define model evaluation function
    def evaluate(features, support, labels, placeholders):
        t_test = time.time()
        feed_dict_val = construct_feeddict_forMixlayers(features, support, labels, placeholders)
        outs_val = sess.run([model.loss, model.accuracy], feed_dict=feed_dict_val)
        return outs_val[0], outs_val[1], (time.time() - t_test)

    # Init variables
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()

    saver.restore(sess, "tmp/tmp_MixModel_sampleA.ckpt")

    cost_val = []

    p0 = column_prop(normADJ_test)


    t = time.time()

    if rank1 is None:
        support1 = sparse_to_tuple(normADJ_test)
        features_inputs = features
    else:
        distr = np.nonzero(np.sum(normADJ_test, axis=0))[1]
        if rank1 > len(distr):
            q1 = distr
        else:
            q1 = np.random.choice(distr, rank1, replace=False, p=p0[distr] / sum(p0[distr]))  # top layer

        # q1 = np.random.choice(np.arange(numNode_train), rank1, p=p0)  # top layer

        support1 = sparse_to_tuple(normADJ_test[:, q1].dot(sp.diags(1.0 / (p0[q1] * rank1))))


        features_inputs = features[q1, :]  # selected nodes for approximation

    test_cost, test_acc, test_duration = evaluate(features_inputs, support1, y_test,
                                                  placeholders)


    test_duration = time.time() - t
    print("rank1 = {}".format(rank1), "cost=", "{:.5f}".format(test_cost),
          "accuracy=", "{:.5f}".format(test_acc),
          "test time=", "{:.5f}".format(test_duration))

if __name__=="__main__":
    # main(None)
    main(None)
    # for k in [25, 50, 100, 200, 400]:
    #     main(k)

================================================
FILE: train_batch_multiRank_inductive_reddit_Mixlayers_sampleBatch.py
================================================
from __future__ import division
from __future__ import print_function

import time
import tensorflow as tf
import scipy.sparse as sp

from utils import *
from models import GCN_APPRO_Mix
import json
from networkx.readwrite import json_graph
import os

# Set random seed
seed = 123
np.random.seed(seed)
tf.set_random_seed(seed)

# Settings
flags = tf.app.flags
FLAGS = flags.FLAGS

flags.DEFINE_string('model', 'gcn_mix', 'Model string.')  # 'gcn', 'gcn_appr'
flags.DEFINE_float('learning_rate', 0.001, 'Initial learning rate.')
flags.DEFINE_integer('epochs', 200, 'Number of epochs to train.')
flags.DEFINE_integer('hidden1', 128, 'Number of units in hidden layer 1.')
flags.DEFINE_float('dropout', 0.0, 'Dropout rate (1 - keep probability).')
flags.DEFINE_float('weight_decay', 1e-4, 'Weight for L2 loss on embedding matrix.')
flags.DEFINE_integer('early_stopping', 100, 'Tolerance for early stopping (# of epochs).')
flags.DEFINE_integer('max_degree', 3, 'Maximum Chebyshev polynomial degree.')

# Load data


def iterate_minibatches_listinputs(inputs, batchsize, shuffle=False):
    assert inputs is not None
    numSamples = inputs[0].shape[0]
    if shuffle:
        indices = np.arange(numSamples)
        np.random.shuffle(indices)
    for start_idx in range(0, numSamples - batchsize + 1, batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
        yield [input[excerpt] for input in inputs]


def loadRedditFromG(dataset_dir, inputfile):
    f= open(dataset_dir+inputfile)
    objects = []
    for _ in range(pkl.load(f)):
        objects.append(pkl.load(f))
    adj, train_labels, val_labels, test_labels, train_index, val_index, test_index = tuple(objects)
    feats = np.load(dataset_dir + "/reddit-feats.npy")
    return sp.csr_matrix(adj), sp.lil_matrix(feats), train_labels, val_labels, test_labels, train_index, val_index, test_index


def loadRedditFromNPZ(dataset_dir):
    adj = sp.load_npz(dataset_dir+"reddit_adj.npz")
    data = np.load(dataset_dir+"reddit.npz")

    return adj, data['feats'], data['y_train'], data['y_val'], data['y_test'], data['train_index'], data['val_index'], data['test_index']


def transferRedditDataFormat(dataset_dir, output_file):
    G = json_graph.node_link_graph(json.load(open(dataset_dir + "/reddit-G.json")))
    labels = json.load(open(dataset_dir + "/reddit-class_map.json"))

    train_ids = [n for n in G.nodes() if not G.node[n]['val'] and not G.node[n]['test']]
    test_ids = [n for n in G.nodes() if G.node[n]['test']]
    val_ids = [n for n in G.nodes() if G.node[n]['val']]
    train_labels = [labels[i] for i in train_ids]
    test_labels = [labels[i] for i in test_ids]
    val_labels = [labels[i] for i in val_ids]
    feats = np.load(dataset_dir + "/reddit-feats.npy")
    ## Logistic gets thrown off by big counts, so log transform num comments and score
    feats[:, 0] = np.log(feats[:, 0] + 1.0)
    feats[:, 1] = np.log(feats[:, 1] - min(np.min(feats[:, 1]), -1))
    feat_id_map = json.load(open(dataset_dir + "reddit-id_map.json"))
    feat_id_map = {id: val for id, val in feat_id_map.iteritems()}

    # train_feats = feats[[feat_id_map[id] for id in train_ids]]
    # test_feats = feats[[feat_id_map[id] for id in test_ids]]

    # numNode = len(feat_id_map)
    # adj = sp.lil_matrix(np.zeros((numNode,numNode)))
    # for edge in G.edges():
    #     adj[feat_id_map[edge[0]], feat_id_map[edge[1]]] = 1

    train_index = [feat_id_map[id] for id in train_ids]
    val_index = [feat_id_map[id] for id in val_ids]
    test_index = [feat_id_map[id] for id in test_ids]
    np.savez(output_file, feats = feats, y_train=train_labels, y_val=val_labels, y_test = test_labels, train_index = train_index,
             val_index=val_index, test_index = test_index)


def transferLabel2Onehot(labels, N):
    y = np.zeros((len(labels),N))
    for i in range(len(labels)):
        pos = labels[i]
        y[i,pos] =1
    return y

def construct_feeddict_forMixlayers(AXfeatures, support, labels, placeholders):
    feed_dict = dict()
    feed_dict.update({placeholders['labels']: labels})
    feed_dict.update({placeholders['AXfeatures']: AXfeatures})
    feed_dict.update({placeholders['support']: support})
    feed_dict.update({placeholders['num_features_nonzero']: AXfeatures[1].shape})
    return feed_dict

def main(rank1):


    # config = tf.ConfigProto(device_count={"CPU": 4}, # limit to num_cpu_core CPU usage
    #                 inter_op_parallelism_threads = 1,
    #                 intra_op_parallelism_threads = 4,
    #                 log_device_placement=False)
    adj, features, y_train, y_val, y_test,train_index, val_index, test_index = loadRedditFromNPZ("data/")
    adj = adj+adj.T


    y_train = transferLabel2Onehot(y_train, 41)
    y_val = transferLabel2Onehot(y_val, 41)
    y_test = transferLabel2Onehot(y_test, 41)

    features = sp.lil_matrix(features)

    adj_train = adj[train_index, :][:, train_index]


    numNode_train = adj_train.shape[0]


    # print("numNode", numNode)


    if FLAGS.model == 'gcn_mix':
        normADJ_train = nontuple_preprocess_adj(adj_train)
        normADJ = nontuple_preprocess_adj(adj)
        # normADJ_val = nontuple_preprocess_adj(adj_val)
        # normADJ_test = nontuple_preprocess_adj(adj_test)

        num_supports = 2
        model_func = GCN_APPRO_Mix
    else:
        raise ValueError('Invalid argument for model: ' + str(FLAGS.model))

    # Some preprocessing
    features = nontuple_preprocess_features(features).todense()

    train_features = normADJ_train.dot(features[train_index])
    features = normADJ.dot(features)
    nonzero_feature_number = len(np.nonzero(features)[0])
    nonzero_feature_number_train = len(np.nonzero(train_features)[0])


    # Define placeholders
    placeholders = {
        'support': tf.sparse_placeholder(tf.float32) ,
        'AXfeatures': tf.placeholder(tf.float32, shape=(None, features.shape[1])),
        'labels': tf.placeholder(tf.float32, shape=(None, y_train.shape[1])),
        'dropout': tf.placeholder_with_default(0., shape=()),
        'num_features_nonzero': tf.placeholder(tf.int32)  # helper variable for sparse dropout
    }

    # Create model
    model = model_func(placeholders, input_dim=features.shape[-1], logging=True)

    # Initialize session
    sess = tf.Session()

    # Define model evaluation function
    def evaluate(features, support, labels, placeholders):
        t_test = time.time()
        feed_dict_val = construct_feeddict_forMixlayers(features, support, labels, placeholders)
        outs_val = sess.run([model.loss, model.accuracy], feed_dict=feed_dict_val)
        return outs_val[0], outs_val[1], (time.time() - t_test)

    # Init variables
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()

    cost_val = []

    p0 = column_prop(normADJ_train)

    # testSupport = [sparse_to_tuple(normADJ), sparse_to_tuple(normADJ)]
    valSupport = sparse_to_tuple(normADJ[val_index, :])
    testSupport = sparse_to_tuple(normADJ[test_index, :])

    t = time.time()
    maxACC = 0.0
    # Train model
    for epoch in range(FLAGS.epochs):
        t1 = time.time()

        n = 0
        for batch in iterate_minibatches_listinputs([normADJ_train, y_train], batchsize=256, shuffle=True):
            [normADJ_batch, y_train_batch] = batch

            p1 = column_prop(normADJ_batch)
            if rank1 is None:
                support1 = sparse_to_tuple(normADJ_batch)
                features_inputs = train_features
            else:

                q1 = np.random.choice(np.arange(numNode_train), rank1, replace=False, p=p1)  # top layer

                support1 = sparse_to_tuple(normADJ_batch[:, q1].dot(sp.diags(1.0 / (p1[q1] * rank1))))

                features_inputs = train_features[q1, :]  # selected nodes for approximation
            # Construct feed dictionary
            feed_dict = construct_feeddict_forMixlayers(features_inputs, support1, y_train_batch,
                                            placeholders)
            feed_dict.update({placeholders['dropout']: FLAGS.dropout})

            # Training step
            outs = sess.run([model.opt_op, model.loss, model.accuracy], feed_dict=feed_dict)
            n = n +1


        # Validation
        cost, acc, duration = evaluate(features, valSupport, y_val,  placeholders)
        cost_val.append(cost)

        if epoch > 50 and acc>maxACC:
            maxACC = acc
            save_path = saver.save(sess, "tmp/tmp_MixModel.ckpt")

        # Print results
        print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(outs[1]),
              "train_acc=", "{:.5f}".format(outs[2]), "val_loss=", "{:.5f}".format(cost),
              "val_acc=", "{:.5f}".format(acc), "time per batch=", "{:.5f}".format((time.time() - t1)/n))

        if epoch > FLAGS.early_stopping and np.mean(cost_val[-2:]) > np.mean(cost_val[-(FLAGS.early_stopping + 1):-1]):
            # print("Early stopping...")
            break


    train_duration = time.time() - t
    # Testing
    if os.path.exists("tmp/tmp_MixModel.ckpt"):
        saver.restore(sess, "tmp/tmp_MixModel.ckpt")
    test_cost, test_acc, test_duration = evaluate(features, testSupport, y_test,
                                                  placeholders)
    print("rank1 = {}".format(rank1), "cost=", "{:.5f}".format(test_cost),
          "accuracy=", "{:.5f}".format(test_acc), "training time=", "{:.5f}".format(train_duration), "epoch = {}".format(epoch+1),
          "test time=", "{:.5f}".format(test_duration))

if __name__=="__main__":
    # main(100)
    for k in [25, 50]:
        main(k)

================================================
FILE: train_batch_multiRank_inductive_reddit_Mixlayers_uniform.py
================================================
from __future__ import division
from __future__ import print_function

import time
import tensorflow as tf
import scipy.sparse as sp

from utils import *
from models import GCN, MLP, GCN_APPRO_Mix
import json
from networkx.readwrite import json_graph
import os

# Set random seed
seed = 123
np.random.seed(seed)
tf.set_random_seed(seed)

# Settings
flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_string('dataset', 'pubmed', 'Dataset string.')  # 'cora', 'citeseer', 'pubmed'
flags.DEFINE_string('model', 'gcn_mix', 'Model string.')  # 'gcn', 'gcn_appr'
flags.DEFINE_float('learning_rate', 0.001, 'Initial learning rate.')
flags.DEFINE_integer('epochs', 200, 'Number of epochs to train.')
flags.DEFINE_integer('hidden1', 128, 'Number of units in hidden layer 1.')
flags.DEFINE_float('dropout', 0.0, 'Dropout rate (1 - keep probability).')
flags.DEFINE_float('weight_decay', 1e-4, 'Weight for L2 loss on embedding matrix.')
flags.DEFINE_integer('early_stopping', 100, 'Tolerance for early stopping (# of epochs).')
flags.DEFINE_integer('max_degree', 3, 'Maximum Chebyshev polynomial degree.')

# Load data


def iterate_minibatches_listinputs(inputs, batchsize, shuffle=False):
    assert inputs is not None
    numSamples = inputs[0].shape[0]
    if shuffle:
        indices = np.arange(numSamples)
        np.random.shuffle(indices)
    for start_idx in range(0, numSamples - batchsize + 1, batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
        yield [input[excerpt] for input in inputs]


def loadRedditFromG(dataset_dir, inputfile):
    f= open(dataset_dir+inputfile)
    objects = []
    for _ in range(pkl.load(f)):
        objects.append(pkl.load(f))
    adj, train_labels, val_labels, test_labels, train_index, val_index, test_index = tuple(objects)
    feats = np.load(dataset_dir + "/reddit-feats.npy")
    return sp.csr_matrix(adj), sp.lil_matrix(feats), train_labels, val_labels, test_labels, train_index, val_index, test_index


def loadRedditFromNPZ(dataset_dir):
    adj = sp.load_npz(dataset_dir+"reddit_adj.npz")
    data = np.load(dataset_dir+"reddit.npz")

    return adj, data['feats'], data['y_train'], data['y_val'], data['y_test'], data['train_index'], data['val_index'], data['test_index']


def transferRedditDataFormat(dataset_dir, output_file):
    G = json_graph.node_link_graph(json.load(open(dataset_dir + "/reddit-G.json")))
    labels = json.load(open(dataset_dir + "/reddit-class_map.json"))

    train_ids = [n for n in G.nodes() if not G.node[n]['val'] and not G.node[n]['test']]
    test_ids = [n for n in G.nodes() if G.node[n]['test']]
    val_ids = [n for n in G.nodes() if G.node[n]['val']]
    train_labels = [labels[i] for i in train_ids]
    test_labels = [labels[i] for i in test_ids]
    val_labels = [labels[i] for i in val_ids]
    feats = np.load(dataset_dir + "/reddit-feats.npy")
    ## Logistic gets thrown off by big counts, so log transform num comments and score
    feats[:, 0] = np.log(feats[:, 0] + 1.0)
    feats[:, 1] = np.log(feats[:, 1] - min(np.min(feats[:, 1]), -1))
    feat_id_map = json.load(open(dataset_dir + "reddit-id_map.json"))
    feat_id_map = {id: val for id, val in feat_id_map.iteritems()}

    # train_feats = feats[[feat_id_map[id] for id in train_ids]]
    # test_feats = feats[[feat_id_map[id] for id in test_ids]]

    # numNode = len(feat_id_map)
    # adj = sp.lil_matrix(np.zeros((numNode,numNode)))
    # for edge in G.edges():
    #     adj[feat_id_map[edge[0]], feat_id_map[edge[1]]] = 1

    train_index = [feat_id_map[id] for id in train_ids]
    val_index = [feat_id_map[id] for id in val_ids]
    test_index = [feat_id_map[id] for id in test_ids]
    np.savez(output_file, feats = feats, y_train=train_labels, y_val=val_labels, y_test = test_labels, train_index = train_index,
             val_index=val_index, test_index = test_index)


def transferLabel2Onehot(labels, N):
    y = np.zeros((len(labels),N))
    for i in range(len(labels)):
        pos = labels[i]
        y[i,pos] =1
    return y

def construct_feeddict_forMixlayers(AXfeatures, support, labels, placeholders):
    feed_dict = dict()
    feed_dict.update({placeholders['labels']: labels})
    feed_dict.update({placeholders['AXfeatures']: AXfeatures})
    feed_dict.update({placeholders['support']: support})
    feed_dict.update({placeholders['num_features_nonzero']: AXfeatures[1].shape})
    return feed_dict

def main(rank1):


    # config = tf.ConfigProto(device_count={"CPU": 4}, # limit to num_cpu_core CPU usage
    #                 inter_op_parallelism_threads = 1,
    #                 intra_op_parallelism_threads = 4,
    #                 log_device_placement=False)
    adj, features, y_train, y_val, y_test,train_index, val_index, test_index = loadRedditFromNPZ("data/")
    adj = adj+adj.T

    y_train = transferLabel2Onehot(y_train, 41)
    y_val = transferLabel2Onehot(y_val, 41)
    y_test = transferLabel2Onehot(y_test, 41)

    features = sp.lil_matrix(features)

    adj_train = adj[train_index, :][:, train_index]


    numNode_train = adj_train.shape[0]


    # print("numNode", numNode)


    if FLAGS.model == 'gcn_mix':
        normADJ_train = nontuple_preprocess_adj(adj_train)
        normADJ = nontuple_preprocess_adj(adj)
        # normADJ_val = nontuple_preprocess_adj(adj_val)
        # normADJ_test = nontuple_preprocess_adj(adj_test)

        model_func = GCN_APPRO_Mix
    else:
        raise ValueError('Invalid argument for model: ' + str(FLAGS.model))

    # Some preprocessing
    features = nontuple_preprocess_features(features).todense()

    train_features = normADJ_train.dot(features[train_index])
    features = normADJ.dot(features)
    nonzero_feature_number = len(np.nonzero(features)[0])
    nonzero_feature_number_train = len(np.nonzero(train_features)[0])


    # Define placeholders
    placeholders = {
        'support': tf.sparse_placeholder(tf.float32) ,
        'AXfeatures': tf.placeholder(tf.float32, shape=(None, features.shape[1])),
        'labels': tf.placeholder(tf.float32, shape=(None, y_train.shape[1])),
        'dropout': tf.placeholder_with_default(0., shape=()),
        'num_features_nonzero': tf.placeholder(tf.int32)  # helper variable for sparse dropout
    }

    # Create model
    model = model_func(placeholders, input_dim=features.shape[-1], logging=True)

    # Initialize session
    sess = tf.Session()
    saver = tf.train.Saver()

    # Define model evaluation function
    def evaluate(features, support, labels, placeholders):
        t_test = time.time()
        feed_dict_val = construct_feeddict_forMixlayers(features, support, labels, placeholders)
        outs_val = sess.run([model.loss, model.accuracy], feed_dict=feed_dict_val)
        return outs_val[0], outs_val[1], (time.time() - t_test)

    # Init variables
    sess.run(tf.global_variables_initializer())

    cost_val = []


    # testSupport = [sparse_to_tuple(normADJ), sparse_to_tuple(normADJ)]
    valSupport = sparse_to_tuple(normADJ[val_index, :])
    testSupport = sparse_to_tuple(normADJ[test_index, :])

    t = time.time()
    maxACC = 0.0
    # Train model
    for epoch in range(FLAGS.epochs):
        t1 = time.time()

        n = 0
        for batch in iterate_minibatches_listinputs([normADJ_train, y_train], batchsize=256, shuffle=True):
            [normADJ_batch, y_train_batch] = batch

            if rank1 is None:
                support1 = sparse_to_tuple(normADJ_batch)
                features_inputs = train_features
            else:
                distr = np.nonzero(np.sum(normADJ_batch, axis=0))[1]
                if rank1 > len(distr):
                    q1 = distr
                else:
                    q1 = np.random.choice(distr, rank1,replace=False)  # top layer
                # q1 = np.random.choice(np.arange(numNode_train), rank1)  # top layer

                support1 = sparse_to_tuple(normADJ_batch[:, q1]*numNode_train/len(q1))

                features_inputs = train_features[q1, :]  # selected nodes for approximation
            # Construct feed dictionary
            feed_dict = construct_feeddict_forMixlayers(features_inputs, support1, y_train_batch,
                                            placeholders)
            feed_dict.update({placeholders['dropout']: FLAGS.dropout})

            # Training step
            outs = sess.run([model.opt_op, model.loss, model.accuracy], feed_dict=feed_dict)
            n = n+1


        # Validation
        cost, acc, duration = evaluate(features, valSupport, y_val,  placeholders)
        cost_val.append(cost)

        if epoch > 50 and acc > maxACC:
            maxACC = acc
            save_path = saver.save(sess, "tmp/tmp_MixModel_uniform.ckpt")

        # Print results
        print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(outs[1]),
              "train_acc=", "{:.5f}".format(outs[2]), "val_loss=", "{:.5f}".format(cost),
              "val_acc=", "{:.5f}".format(acc), "time per batch=", "{:.5f}".format((time.time() - t1)/n))

        if epoch > FLAGS.early_stopping and np.mean(cost_val[-2:]) > np.mean(cost_val[-(FLAGS.early_stopping + 1):-1]):
            # print("Early stopping...")
            break

    train_duration = time.time() - t
    # Testing
    if os.path.exists("tmp/tmp_MixModel_uniform.ckpt"):
        saver.restore(sess, "tmp/tmp_MixModel_uniform.ckpt")
    test_cost, test_acc, test_duration = evaluate(features, testSupport, y_test,
                                                  placeholders)
    print("rank1 = {}".format(rank1), "cost=", "{:.5f}".format(test_cost),
          "accuracy=", "{:.5f}".format(test_acc), "training time=", "{:.5f}".format(train_duration),
          "epoch = {}".format(epoch + 1),
          "test time=", "{:.5f}".format(test_duration))


def transferG2ADJ():
    G = json_graph.node_link_graph(json.load(open("reddit/reddit-G.json")))
    feat_id_map = json.load(open("reddit/reddit-id_map.json"))
    feat_id_map = {id: val for id, val in feat_id_map.iteritems()}
    numNode = len(feat_id_map)
    adj = np.zeros((numNode, numNode))
    newEdges0 = [feat_id_map[edge[0]] for edge in G.edges()]
    newEdges1 = [feat_id_map[edge[1]] for edge in G.edges()]

    # for edge in G.edges():
    #     adj[feat_id_map[edge[0]], feat_id_map[edge[1]]] = 1
    adj = sp.csr_matrix((np.ones((len(newEdges0),)), (newEdges0, newEdges1)), shape=(numNode, numNode))
    sp.save_npz("reddit_adj.npz", adj)

if __name__=="__main__":

    main(50)
    # for k in [25, 50, 100, 200, 400]:
    #     main(k)

================================================
FILE: train_batch_multiRank_inductive_reddit_appr2layers.py
================================================
from __future__ import division
from __future__ import print_function

import time
import tensorflow as tf
import scipy.sparse as sp

from utils import *
from models import GCN, MLP, GCN_APPRO
import json
from networkx.readwrite import json_graph
import os

# Set random seed
seed = 123
np.random.seed(seed)
tf.set_random_seed(seed)

# Settings
flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_string('dataset', 'pubmed', 'Dataset string.')  # 'cora', 'citeseer', 'pubmed'
flags.DEFINE_string('model', 'gcn_appr', 'Model string.')  # 'gcn', 'gcn_appr'
flags.DEFINE_float('learning_rate', 0.001, 'Initial learning rate.')
flags.DEFINE_integer('epochs', 12, 'Number of epochs to train.')
flags.DEFINE_integer('hidden1', 128, 'Number of units in hidden layer 1.')
flags.DEFINE_float('dropout', 0.0, 'Dropout rate (1 - keep probability).')
flags.DEFINE_float('weight_decay', 1e-4, 'Weight for L2 loss on embedding matrix.')
flags.DEFINE_integer('early_stopping', 100, 'Tolerance for early stopping (# of epochs).')
flags.DEFINE_integer('max_degree', 3, 'Maximum Chebyshev polynomial degree.')

# Load data


def iterate_minibatches_listinputs(inputs, batchsize, shuffle=False):
    assert inputs is not None
    numSamples = inputs[0].shape[0]
    if shuffle:
        indices = np.arange(numSamples)
        np.random.shuffle(indices)
    for start_idx in range(0, numSamples - batchsize + 1, batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
        yield [input[excerpt] for input in inputs]


def loadRedditFromG(dataset_dir, inputfile):
    f= open(dataset_dir+inputfile)
    objects = []
    for _ in range(pkl.load(f)):
        objects.append(pkl.load(f))
    adj, train_labels, val_labels, test_labels, train_index, val_index, test_index = tuple(objects)
    feats = np.load(dataset_dir + "/reddit-feats.npy")
    return sp.csr_matrix(adj), sp.lil_matrix(feats), train_labels, val_labels, test_labels, train_index, val_index, test_index


def loadRedditFromNPZ(dataset_dir):
    adj = sp.load_npz(dataset_dir+"reddit_adj.npz")
    data = np.load(dataset_dir+"reddit.npz")

    return adj, data['feats'], data['y_train'], data['y_val'], data['y_test'], data['train_index'], data['val_index'], data['test_index']


def transferRedditDataFormat(dataset_dir, output_file):
    G = json_graph.node_link_graph(json.load(open(dataset_dir + "/reddit-G.json")))
    labels = json.load(open(dataset_dir + "/reddit-class_map.json"))

    train_ids = [n for n in G.nodes() if not G.node[n]['val'] and not G.node[n]['test']]
    test_ids = [n for n in G.nodes() if G.node[n]['test']]
    val_ids = [n for n in G.nodes() if G.node[n]['val']]
    train_labels = [labels[i] for i in train_ids]
    test_labels = [labels[i] for i in test_ids]
    val_labels = [labels[i] for i in val_ids]
    feats = np.load(dataset_dir + "/reddit-feats.npy")
    ## Logistic gets thrown off by big counts, so log transform num comments and score
    feats[:, 0] = np.log(feats[:, 0] + 1.0)
    feats[:, 1] = np.log(feats[:, 1] - min(np.min(feats[:, 1]), -1))
    feat_id_map = json.load(open(dataset_dir + "reddit-id_map.json"))
    feat_id_map = {id: val for id, val in feat_id_map.iteritems()}

    # train_feats = feats[[feat_id_map[id] for id in train_ids]]
    # test_feats = feats[[feat_id_map[id] for id in test_ids]]

    # numNode = len(feat_id_map)
    # adj = sp.lil_matrix(np.zeros((numNode,numNode)))
    # for edge in G.edges():
    #     adj[feat_id_map[edge[0]], feat_id_map[edge[1]]] = 1

    train_index = [feat_id_map[id] for id in train_ids]
    val_index = [feat_id_map[id] for id in val_ids]
    test_index = [feat_id_map[id] for id in test_ids]
    np.savez(output_file, feats = feats, y_train=train_labels, y_val=val_labels, y_test = test_labels, train_index = train_index,
             val_index=val_index, test_index = test_index)


def transferLabel2Onehot(labels, N):
    y = np.zeros((len(labels),N))
    for i in range(len(labels)):
        pos = labels[i]
        y[i,pos] =1
    return y

def main(rank1, rank0):


    # config = tf.ConfigProto(device_count={"CPU": 4}, # limit to num_cpu_core CPU usage
    #                 inter_op_parallelism_threads = 1,
    #                 intra_op_parallelism_threads = 4,
    #                 log_device_placement=False)
    adj, features, y_train, y_val, y_test,train_index, val_index, test_index = loadRedditFromNPZ("data/")
    adj = adj+adj.T


    y_train = transferLabel2Onehot(y_train, 41)
    y_val = transferLabel2Onehot(y_val, 41)
    y_test = transferLabel2Onehot(y_test, 41)

    features = sp.lil_matrix(features)

    adj_train = adj[train_index, :][:, train_index]


    numNode_train = adj_train.shape[0]

    train_mask = np.ones((numNode_train,))
    val_mask = np.ones((y_val.shape[0],))
    test_mask = np.ones((y_test.shape[0],))


    # print("numNode", numNode)

    # Some preprocessing
    features = nontuple_preprocess_features(features).todense()
    train_features = features[train_index]

    if FLAGS.model == 'gcn_appr':
        normADJ_train = nontuple_preprocess_adj(adj_train)
        normADJ = nontuple_preprocess_adj(adj)
        # normADJ_val = nontuple_preprocess_adj(adj_val)
        # normADJ_test = nontuple_preprocess_adj(adj_test)

        num_supports = 2
        model_func = GCN_APPRO
    else:
        raise ValueError('Invalid argument for model: ' + str(FLAGS.model))

    # Define placeholders
    placeholders = {
        'support': [tf.sparse_placeholder(tf.float32) for _ in range(num_supports)],
        'features': tf.placeholder(tf.float32, shape=(None, features.shape[1])),
        'labels': tf.placeholder(tf.float32, shape=(None, y_train.shape[1])),
        'labels_mask': tf.placeholder(tf.int32),
        'dropout': tf.placeholder_with_default(0., shape=()),
        'num_features_nonzero': tf.placeholder(tf.int32)  # helper variable for sparse dropout
    }

    # Create model
    model = model_func(placeholders, input_dim=features.shape[-1], logging=True)

    # Initialize session
    sess = tf.Session()

    # Define model evaluation function
    def evaluate(features, support, labels, mask, placeholders):
        t_test = time.time()
        feed_dict_val = construct_feed_dict(features, support, labels, mask, placeholders)
        outs_val = sess.run([model.loss, model.accuracy], feed_dict=feed_dict_val)
        return outs_val[0], outs_val[1], (time.time() - t_test)

    # Init variables
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()

    cost_val = []

    p0 = column_prop(normADJ_train)

    # testSupport = [sparse_to_tuple(normADJ), sparse_to_tuple(normADJ)]
    valSupport = [sparse_to_tuple(normADJ), sparse_to_tuple(normADJ[val_index, :])]
    testSupport = [sparse_to_tuple(normADJ), sparse_to_tuple(normADJ[test_index, :])]

    t = time.time()
    maxACC = 0.0
    # Train model
    for epoch in range(FLAGS.epochs):
        t1 = time.time()

        n = 0
        for batch in iterate_minibatches_listinputs([normADJ_train, y_train, train_mask], batchsize=256, shuffle=True):
            [normADJ_batch, y_train_batch, train_mask_batch] = batch
            if sum(train_mask_batch) < 1:
                continue


            p1 = column_prop(normADJ_batch)
            q1 = np.random.choice(np.arange(numNode_train), rank1, replace=False, p=p1)  # top layer
            # q0 = np.random.choice(np.arange(numNode_train), rank0, p=p0)  # bottom layer
            support1 = sparse_to_tuple(normADJ_batch[:, q1].dot(sp.diags(1.0 / (p1[q1] * rank1))))

            p2 = column_prop(normADJ_train[q1, :])
            q0 = np.random.choice(np.arange(numNode_train), rank0, replace=False, p=p2)

            support0 = sparse_to_tuple(normADJ_train[q1, :][:, q0])
            features_inputs = np.diag(1.0 / (p2[q0] * rank0)).dot(train_features[q0, :])  # selected nodes for approximation

            # distr = np.nonzero(np.sum(normADJ_batch, axis=0))[1]
            # if rank1 > len(distr):
            #     q1 = distr
            # else:
            #     q1 = np.random.choice(distr, rank1, replace=False)  # top layer
            # distr0 = np.nonzero(np.sum(normADJ_train[q1,:], axis=0))[1]
            # if rank0 > len(distr0):
            #     q0 = distr0
            # else:
            #     q0 = np.random.choice(distr0, rank0, replace=False)  # top layer
            #
            # support1 = sparse_to_tuple(normADJ_batch[:, q1].dot(sp.diags(1.0 / (p0[q1] * rank1))))
            # support0 = sparse_to_tuple(normADJ_train[q1, :][:, q0])
            # features_inputs = np.diag(1.0 / (p0[q0] * rank0)).dot(train_features[q0, :])  # selected nodes for approximation


            # Construct feed dictionary
            feed_dict = construct_feed_dict(features_inputs, [support0, support1], y_train_batch, train_mask_batch,
                                            placeholders)
            feed_dict.update({placeholders['dropout']: FLAGS.dropout})

            # Training step
            outs = sess.run([model.opt_op, model.loss, model.accuracy], feed_dict=feed_dict)
            n=n+1


        # Validation
        cost, acc, duration = evaluate(features, valSupport, y_val, val_mask, placeholders)
        cost_val.append(cost)

        if epoch > 50 and acc>maxACC:
            maxACC = acc
            save_path = saver.save(sess, "tmp/tmp_redditModel.ckpt")

        # Print results
        print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(outs[1]),
              "train_acc=", "{:.5f}".format(outs[2]), "val_loss=", "{:.5f}".format(cost),
              "val_acc=", "{:.5f}".format(acc), "time per batch=", "{:.5f}".format((time.time() - t1)/n))

        if epoch > FLAGS.early_stopping and np.mean(cost_val[-2:]) > np.mean(cost_val[-(FLAGS.early_stopping + 1):-1]):
            # print("Early stopping...")
            break

    train_duration = time.time() - t
    # Testing
    if os.path.exists("tmp/tmp_redditModel.ckpt"):
        saver.restore(sess, "tmp/tmp_redditModel.ckpt")
    test_cost, test_acc, test_duration = evaluate(features, testSupport, y_test, test_mask,
                                                  placeholders)
    print("rank1 = {}".format(rank1), "rank0 = {}".format(rank0), "cost=", "{:.5f}".format(test_cost),
          "accuracy=", "{:.5f}".format(test_acc), "training time=", "{:.5f}".format(train_duration),
          "epoch = {}".format(epoch + 1),
          "test time=", "{:.5f}".format(test_duration))

def transferG2ADJ():
    G = json_graph.node_link_graph(json.load(open("reddit/reddit-G.json")))
    feat_id_map = json.load(open("reddit/reddit-id_map.json"))
    feat_id_map = {id: val for id, val in feat_id_map.iteritems()}
    numNode = len(feat_id_map)
    adj = np.zeros((numNode, numNode))
    newEdges0 = [feat_id_map[edge[0]] for edge in G.edges()]
    newEdges1 = [feat_id_map[edge[1]] for edge in G.edges()]

    # for edge in G.edges():
    #     adj[feat_id_map[edge[0]], feat_id_map[edge[1]]] = 1
    adj = sp.csr_matrix((np.ones((len(newEdges0),)), (newEdges0, newEdges1)), shape=(numNode, numNode))
    sp.save_npz("reddit_adj.npz", adj)

if __name__=="__main__":
    # transferRedditDataFormat("reddit/","data/reddit.npz")

    main(100,100)
    # for k in [50, 100, 200, 400]:
    #     main(100, k)

================================================
FILE: train_batch_multiRank_inductive_reddit_onelayer.py
================================================
from __future__ import division
from __future__ import print_function

import time
import tensorflow as tf
import scipy.sparse as sp

from utils import *
from models import GCN, MLP, GCN_APPRO_Onelayer
import json
from networkx.readwrite import json_graph

# Set random seed
seed = 123
np.random.seed(seed)
tf.set_random_seed(seed)

# Settings
flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_string('dataset', 'pubmed', 'Dataset string.')  # 'cora', 'citeseer', 'pubmed'
flags.DEFINE_string('model', 'gcn_appr', 'Model string.')  # 'gcn', 'gcn_appr'
flags.DEFINE_float('learning_rate', 0.01, 'Initial learning rate.')
flags.DEFINE_integer('epochs', 300, 'Number of epochs to train.')
flags.DEFINE_integer('hidden1', 64, 'Number of units in hidden layer 1.')
flags.DEFINE_float('dropout', 0.1, 'Dropout rate (1 - keep probability).')
flags.DEFINE_float('weight_decay', 1e-4, 'Weight for L2 loss on embedding matrix.')
flags.DEFINE_integer('early_stopping', 30, 'Tolerance for early stopping (# of epochs).')
flags.DEFINE_integer('max_degree', 3, 'Maximum Chebyshev polynomial degree.')
rank1 = 300
rank0 = 300
# Load data


def iterate_minibatches_listinputs(inputs, batchsize, shuffle=False):
    assert inputs is not None
    numSamples = inputs[0].shape[0]
    if shuffle:
        indices = np.arange(numSamples)
        np.random.shuffle(indices)
    for start_idx in range(0, numSamples - batchsize + 1, batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
        yield [input[excerpt] for input in inputs]


def loadRedditFromG(dataset_dir, inputfile):
    f= open(dataset_dir+inputfile)
    objects = []
    for _ in range(pkl.load(f)):
        objects.append(pkl.load(f))
    adj, train_labels, val_labels, test_labels, train_index, val_index, test_index = tuple(objects)
    feats = np.load(dataset_dir + "/reddit-feats.npy")
    return sp.csr_matrix(adj), sp.lil_matrix(feats), train_labels, val_labels, test_labels, train_index, val_index, test_index


def loadRedditFromNPZ(dataset_dir):
    adj = sp.load_npz(dataset_dir+"reddit_adj.npz")
    data = np.load(dataset_dir+"reddit.npz")

    return adj, data['feats'], data['y_train'], data['y_val'], data['y_test'], data['train_index'], data['val_index'], data['test_index']


def transferRedditDataFormat(dataset_dir, output_file):
    G = json_graph.node_link_graph(json.load(open(dataset_dir + "/reddit-G.json")))
    labels = json.load(open(dataset_dir + "/reddit-class_map.json"))

    train_ids = [n for n in G.nodes() if not G.node[n]['val'] and not G.node[n]['test']]
    test_ids = [n for n in G.nodes() if G.node[n]['test']]
    val_ids = [n for n in G.nodes() if G.node[n]['val']]
    train_labels = [labels[i] for i in train_ids]
    test_labels = [labels[i] for i in test_ids]
    val_labels = [labels[i] for i in val_ids]
    feats = np.load(dataset_dir + "/reddit-feats.npy")
    ## Logistic gets thrown off by big counts, so log transform num comments and score
    feats[:, 0] = np.log(feats[:, 0] + 1.0)
    feats[:, 1] = np.log(feats[:, 1] - min(np.min(feats[:, 1]), -1))
    feat_id_map = json.load(open(dataset_dir + "reddit-id_map.json"))
    feat_id_map = {id: val for id, val in feat_id_map.iteritems()}

    # train_feats = feats[[feat_id_map[id] for id in train_ids]]
    # test_feats = feats[[feat_id_map[id] for id in test_ids]]

    # numNode = len(feat_id_map)
    # adj = sp.lil_matrix(np.zeros((numNode,numNode)))
    # for edge in G.edges():
    #     adj[feat_id_map[edge[0]], feat_id_map[edge[1]]] = 1

    train_index = [feat_id_map[id] for id in train_ids]
    val_index = [feat_id_map[id] for id in val_ids]
    test_index = [feat_id_map[id] for id in test_ids]
    np.savez(output_file, feats = feats, y_train=train_labels, y_val=val_labels, y_test = test_labels, train_index = train_index,
             val_index=val_index, test_index = test_index)


def transferLabel2Onehot(labels, N):
    y = np.zeros((len(labels),N))
    for i in range(len(labels)):
        pos = labels[i]
        y[i,pos] =1
    return y


def run_regression(train_embeds, train_labels, test_embeds, test_labels):
    np.random.seed(1)
    from sklearn.linear_model import SGDClassifier
    from sklearn.dummy import DummyClassifier
    from sklearn.metrics import accuracy_score
    dummy = DummyClassifier()
    dummy.fit(train_embeds, train_labels)
    log = SGDClassifier(loss="log", n_jobs=55)
    log.fit(train_embeds, train_labels)
    print("Test scores")
    print(accuracy_score(test_labels, log.predict(test_embeds)))
    print("Train scores")
    print(accuracy_score(train_labels, log.predict(train_embeds)))
    print("Random baseline")
    print(accuracy_score(test_labels, dummy.predict(test_embeds)))

def main(rank1):
    adj, features, y_train, y_val, y_test,train_index, val_index, test_index = loadRedditFromNPZ("data/")

    adj = adj+adj.T

    # train_index = train_index[:10000]
    # val_index = val_index[:5000]
    # test_index = test_index[:10000]
    # y_train = transferLabel2Onehot(y_train, 50)[:10000]
    # y_val = transferLabel2Onehot(y_val, 50)[:5000]
    # y_test = transferLabel2Onehot(y_test, 50)[:10000]

    y_train = transferLabel2Onehot(y_train, 50)
    y_val = transferLabel2Onehot(y_val, 50)
    y_test = transferLabel2Onehot(y_test, 50)


    # adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data(FLAGS.dataset)

    features = sp.lil_matrix(features)


    adj_train = adj[train_index, :][:, train_index]


    adj_val = adj[val_index, :][:, val_index]

    adj_test = adj[test_index, :][:, test_index]
    numNode_train = adj_train.shape[0]

    train_mask = np.ones((numNode_train,))
    val_mask = np.ones((adj_val.shape[0],))
    test_mask = np.ones((adj_test.shape[0],))


    # print("numNode", numNode)

    # Some preprocessing
    features = nontuple_preprocess_features(features)
    train_features = features[train_index]

    if FLAGS.model == 'gcn_appr':
        normADJ_train = nontuple_preprocess_adj(adj_train)
        normADJ = nontuple_preprocess_adj(adj)
        # normADJ_val = nontuple_preprocess_adj(adj_val)
        # normADJ_test = nontuple_preprocess_adj(adj_test)

        num_supports = 2
        model_func = GCN_APPRO_Onelayer
    else:
        raise ValueError('Invalid argument for model: ' + str(FLAGS.model))

    # Define placeholders
    placeholders = {
        'support': [tf.sparse_placeholder(tf.float32) for _ in range(num_supports)],
        'features': tf.sparse_placeholder(tf.float32),
        'labels': tf.placeholder(tf.float32, shape=(None, y_train.shape[1])),
        'labels_mask': tf.placeholder(tf.int32),
        'dropout': tf.placeholder_with_default(0., shape=()),
        'num_features_nonzero': tf.placeholder(tf.int32)  # helper variable for sparse dropout
    }

    # Create model
    model = model_func(placeholders, input_dim=features.shape[-1], logging=True)

    # Initialize session
    sess = tf.Session()

    # Define model evaluation function
    def evaluate(features, support, labels, mask, placeholders):
        t_test = time.time()
        feed_dict_val = construct_feed_dict(features, support, labels, mask, placeholders)
        outs_val = sess.run([model.loss, model.accuracy], feed_dict=feed_dict_val)
        return outs_val[0], outs_val[1], (time.time() - t_test)

    # Init variables
    sess.run(tf.global_variables_initializer())

    cost_val = []

    p0 = column_prop(normADJ_train)

    # testSupport = [sparse_to_tuple(normADJ), sparse_to_tuple(normADJ)]
    valSupport = [sparse_to_tuple(normADJ[val_index, :])]
    testSupport = [sparse_to_tuple(normADJ[test_index, :])]

    t = time.time()
    # Train model
    for epoch in range(FLAGS.epochs):
        t1 = time.time()

        n = 0
        for batch in iterate_minibatches_listinputs([normADJ_train, y_train, train_mask], batchsize=5120, shuffle=True):
            [normADJ_batch, y_train_batch, train_mask_batch] = batch
            if sum(train_mask_batch) < 1:
                continue
            p1 = column_prop(normADJ_batch)
            if rank1 is not None:
                q1 = np.random.choice(np.arange(numNode_train), rank1, p=p1)  # top layer
                # q0 = np.random.choice(np.arange(numNode_train), rank0, p=p0)  # bottom layer
                support1 = sparse_to_tuple(normADJ_batch[:, q1].dot(sp.diags(1.0 / (p1[q1] * rank1))))

                features_inputs = sparse_to_tuple(train_features[q1, :])  # selected nodes for approximation
            else:
                support1 = sparse_to_tuple(normADJ_batch)
                features_inputs = sparse_to_tuple(train_features)
            # Construct feed dictionary
            feed_dict = construct_feed_dict(features_inputs, [support1], y_train_batch, train_mask_batch,
                                            placeholders)
            feed_dict.update({placeholders['dropout']: FLAGS.dropout})

            # Training step
            outs = sess.run([model.opt_op, model.loss, model.accuracy], feed_dict=feed_dict)


        # Validation
        cost, acc, duration = evaluate(sparse_to_tuple(features), valSupport, y_val, val_mask, placeholders)
        cost_val.append(cost)

        # Print results
        print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(outs[1]),
              "train_acc=", "{:.5f}".format(outs[2]), "val_loss=", "{:.5f}".format(cost),
              "val_acc=", "{:.5f}".format(acc), "time=", "{:.5f}".format(time.time() - t1))

        if epoch > FLAGS.early_stopping and cost_val[-1] > np.mean(cost_val[-(FLAGS.early_stopping + 1):-1]):
            # print("Early stopping...")
            break

    train_duration = time.time() - t
    # Testing
    test_cost, test_acc, test_duration = evaluate(sparse_to_tuple(features), testSupport, y_test, test_mask,
                                                  placeholders)
    print("rank1 = {}".format(rank1), "rank0 = {}".format(rank0), "cost=", "{:.5f}".format(test_cost),
          "accuracy=", "{:.5f}".format(test_acc), "training time=", "{:.5f}".format(train_duration))

def transferG2ADJ():
    G = json_graph.node_link_graph(json.load(open("reddit/reddit-G.json")))
    feat_id_map = json.load(open("reddit/reddit-id_map.json"))
    feat_id_map = {id: val for id, val in feat_id_map.iteritems()}
    numNode = len(feat_id_map)
    adj = np.zeros((numNode, numNode))
    newEdges0 = [feat_id_map[edge[0]] for edge in G.edges()]
    newEdges1 = [feat_id_map[edge[1]] for edge in G.edges()]

    # for edge in G.edges():
    #     adj[feat_id_map[edge[0]], feat_id_map[edge[1]]] = 1
    adj = sp.csr_matrix((np.ones((len(newEdges0),)), (newEdges0, newEdges1)), shape=(numNode, numNode))
    sp.save_npz("reddit_adj.npz", adj)


def original():
    adj, features, y_train, y_val, y_test, train_index, val_index, test_index = loadRedditFromNPZ("data/")
    adj = adj+adj.T
    normADJ = nontuple_preprocess_adj(adj)
    features = adj.dot(features)

    train_feats = features[train_index, :]
    test_feats = features[test_index, :]

    from sklearn.preprocessing import StandardScaler

    scaler = StandardScaler()
    scaler.fit(train_feats)
    train_feats = scaler.transform(train_feats)
    test_feats = scaler.transform(test_feats)
    run_regression(train_feats, y_train, test_feats, y_test)

if __name__=="__main__":
    # transferRedditDataFormat("reddit/","data/reddit.npz")

    # original()
    main(50)


================================================
FILE: transformRedditGraph2NPZ.py
================================================
#### Please first download original Reddit Graph Data: http://snap.stanford.edu/graphsage/reddit.zip
####


import json
from networkx.readwrite import json_graph
import scipy.sparse as sp
import numpy as np
import pickle as pkl


def loadRedditFromG(dataset_dir, inputfile):
    f= open(dataset_dir+inputfile)
    objects = []
    for _ in range(pkl.load(f)):
        objects.append(pkl.load(f))
    adj, train_labels, val_labels, test_labels, train_index, val_index, test_index = tuple(objects)
    feats = np.load(dataset_dir + "/reddit-feats.npy")
    return sp.csr_matrix(adj), sp.lil_matrix(feats), train_labels, val_labels, test_labels, train_index, val_index, test_index


def loadRedditFromNPZ(dataset_dir):
    adj = sp.load_npz(dataset_dir+"reddit_adj.npz")
    data = np.load(dataset_dir+"reddit.npz")

    return adj, data['feats'], data['y_train'], data['y_val'], data['y_test'], data['train_index'], data['val_index'], data['test_index']


def transferRedditData2AdjNPZ(dataset_dir):
    G = json_graph.node_link_graph(json.load(open(dataset_dir + "/reddit-G.json")))
    feat_id_map = json.load(open(dataset_dir + "/reddit-id_map.json"))
    feat_id_map = {id: val for id, val in feat_id_map.iteritems()}
    numNode = len(feat_id_map)
    print(numNode)
    adj = sp.lil_matrix((numNode, numNode))
    print("no")
    for edge in G.edges():
        adj[feat_id_map[edge[0]], feat_id_map[edge[1]]] = 1
    sp.save_npz("reddit_adj.npz", sp.coo_matrix(adj))


def transferRedditDataFormat(dataset_dir, output_file):
    G = json_graph.node_link_graph(json.load(open(dataset_dir + "/reddit-G.json")))
    labels = json.load(open(dataset_dir + "/reddit-class_map.json"))

    train_ids = [n for n in G.nodes() if not G.node[n]['val'] and not G.node[n]['test']]
    test_ids = [n for n in G.nodes() if G.node[n]['test']]
    val_ids = [n for n in G.nodes() if G.node[n]['val']]
    train_labels = [labels[i] for i in train_ids]
    test_labels = [labels[i] for i in test_ids]
    val_labels = [labels[i] for i in val_ids]
    feats = np.load(dataset_dir + "/reddit-feats.npy")
    ## Logistic gets thrown off by big counts, so log transform num comments and score
    feats[:, 0] = np.log(feats[:, 0] + 1.0)
    feats[:, 1] = np.log(feats[:, 1] - min(np.min(feats[:, 1]), -1))
    feat_id_map = json.load(open(dataset_dir + "reddit-id_map.json"))
    feat_id_map = {id: val for id, val in feat_id_map.iteritems()}

    train_index = [feat_id_map[id] for id in train_ids]
    val_index = [feat_id_map[id] for id in val_ids]
    test_index = [feat_id_map[id] for id in test_ids]
    np.savez(output_file, feats=feats, y_train=train_labels, y_val=val_labels, y_test=test_labels,
             train_index=train_index,
             val_index=val_index, test_index=test_index)


if __name__=="__main__":
    # transferRedditData2AdjNPZ("reddit")
    transferRedditDataFormat("reddit","reddit.npz")

================================================
FILE: utils.py
================================================
import numpy as np
import pickle as pkl
import networkx as nx
import scipy.sparse as sp
from scipy.sparse.linalg.eigen.arpack import eigsh
import sys
from scipy.sparse.linalg import norm as sparsenorm
from scipy.linalg import qr
# from sklearn.metrics import f1_score


def parse_index_file(filename):
    """Parse index file."""
    index = []
    for line in open(filename):
        index.append(int(line.strip()))
    return index


def sample_mask(idx, l):
    """Create mask."""
    mask = np.zeros(l)
    mask[idx] = 1
    return np.array(mask, dtype=np.bool)

#
# def calc_f1(y_true, y_pred):
#     y_true = np.argmax(y_true, axis=1)
#     y_pred = np.argmax(y_pred, axis=1)
#     return f1_score(y_true, y_pred, average="micro"), f1_score(y_true, y_pred, average="macro")
#

#
# def load_data(dataset_str):
#     """Load data."""
#     names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
#     objects = []
#     for i in range(len(names)):
#         with open("data/ind.{}.{}".format(dataset_str, names[i]), 'rb') as f:
#             if sys.version_info > (3, 0):
#                 objects.append(pkl.load(f, encoding='latin1'))
#             else:
#                 objects.append(pkl.load(f))
#
#     x, y, tx, ty, allx, ally, graph = tuple(objects)
#     test_idx_reorder = parse_index_file("data/ind.{}.test.index".format(dataset_str))
#     test_idx_range = np.sort(test_idx_reorder)
#
#     if dataset_str == 'citeseer':
#         # Fix citeseer dataset (there are some isolated nodes in the graph)
#         # Find isolated nodes, add them as zero-vecs into the right position
#         test_idx_range_full = range(min(test_idx_reorder), max(test_idx_reorder)+1)
#         tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
#         tx_extended[test_idx_range-min(test_idx_range), :] = tx
#         tx = tx_extended
#         ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
#         ty_extended[test_idx_range-min(test_idx_range), :] = ty
#         ty = ty_extended
#
#     features = sp.vstack((allx, tx)).tolil()
#     features[test_idx_reorder, :] = features[test_idx_range, :]
#     adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))
#
#     labels = np.vstack((ally, ty))
#     labels[test_idx_reorder, :] = labels[test_idx_range, :]
#
#     idx_test = test_idx_range.tolist()
#     idx_train = range(len(y))
#     idx_val = range(len(y), len(y)+500)
#
#     train_mask = sample_mask(idx_train, labels.shape[0])
#     val_mask = sample_mask(idx_val, labels.shape[0])
#     test_mask = sample_mask(idx_test, labels.shape[0])
#
#     y_train = np.zeros(labels.shape)
#     y_val = np.zeros(labels.shape)
#     y_test = np.zeros(labels.shape)
#     y_train[train_mask, :] = labels[train_mask, :]
#     y_val[val_mask, :] = labels[val_mask, :]
#     y_test[test_mask, :] = labels[test_mask, :]
#
#     return adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask
#


def load_data(dataset_str):
    """Load data."""
    names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
    objects = []
    for i in range(len(names)):
        with open("data/ind.{}.{}".format(dataset_str, names[i]), 'rb') as f:
            if sys.version_info > (3, 0):
                objects.append(pkl.load(f, encoding='latin1'))
            else:
                objects.append(pkl.load(f))

    x, y, tx, ty, allx, ally, graph = tuple(objects)
    test_idx_reorder = parse_index_file("data/ind.{}.test.index".format(dataset_str))
    test_idx_range = np.sort(test_idx_reorder)

    if dataset_str == 'citeseer':
        # Fix citeseer dataset (there are some isolated nodes in the graph)
        # Find isolated nodes, add them as zero-vecs into the right position
        test_idx_range_full = range(min(test_idx_reorder), max(test_idx_reorder)+1)
        tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
        tx_extended[test_idx_range-min(test_idx_range), :] = tx
        tx = tx_extended
        ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
        ty_extended[test_idx_range-min(test_idx_range), :] = ty
        ty = ty_extended

    features = sp.vstack((allx, tx)).tolil()
    features[test_idx_reorder, :] = features[test_idx_range, :]
    adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))

    labels = np.vstack((ally, ty))
    labels[test_idx_reorder, :] = labels[test_idx_range, :]

    idx_test = test_idx_range.tolist()
    idx_train = range(len(ally)-500)
    idx_val = range(len(ally)-500, len(ally))

    train_mask = sample_mask(idx_train, labels.shape[0])
    val_mask = sample_mask(idx_val, labels.shape[0])
    test_mask = sample_mask(idx_test, labels.shape[0])

    y_train = np.zeros(labels.shape)
    y_val = np.zeros(labels.shape)
    y_test = np.zeros(labels.shape)
    y_train[train_mask, :] = labels[train_mask, :]
    y_val[val_mask, :] = labels[val_mask, :]
    y_test[test_mask, :] = labels[test_mask, :]

    return adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask

def load_data_original(dataset_str):
    """Load data."""
    names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
    objects = []
    for i in range(len(names)):
        with open("data/ind.{}.{}".format(dataset_str, names[i]), 'rb') as f:
            if sys.version_info > (3, 0):
                objects.append(pkl.load(f, encoding='latin1'))
            else:
                objects.append(pkl.load(f))

    x, y, tx, ty, allx, ally, graph = tuple(objects)
    test_idx_reorder = parse_index_file("data/ind.{}.test.index".format(dataset_str))
    test_idx_range = np.sort(test_idx_reorder)

    if dataset_str == 'citeseer':
        # Fix citeseer dataset (there are some isolated nodes in the graph)
        # Find isolated nodes, add them as zero-vecs into the right position
        test_idx_range_full = range(min(test_idx_reorder), max(test_idx_reorder)+1)
        tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
        tx_extended[test_idx_range-min(test_idx_range), :] = tx
        tx = tx_extended
        ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
        ty_extended[test_idx_range-min(test_idx_range), :] = ty
        ty = ty_extended

    features = sp.vstack((allx, tx)).tolil()
    features[test_idx_reorder, :] = features[test_idx_range, :]
    adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))

    labels = np.vstack((ally, ty))
    labels[test_idx_reorder, :] = labels[test_idx_range, :]

    idx_test = test_idx_range.tolist()
    idx_train = range(len(y))
    idx_val = range(len(y), len(y)+500)

    train_mask = sample_mask(idx_train, labels.shape[0])
    val_mask = sample_mask(idx_val, labels.shape[0])
    test_mask = sample_mask(idx_test, labels.shape[0])

    y_train = np.zeros(labels.shape)
    y_val = np.zeros(labels.shape)
    y_test = np.zeros(labels.shape)
    y_train[train_mask, :] = labels[train_mask, :]
    y_val[val_mask, :] = labels[val_mask, :]
    y_test[test_mask, :] = labels[test_mask, :]

    return adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask


def sparse_to_tuple(sparse_mx):
    """Convert sparse matrix to tuple representation."""
    def to_tuple(mx):
        if not sp.isspmatrix_coo(mx):
            mx = mx.tocoo()
        coords = np.vstack((mx.row, mx.col)).transpose()
        values = mx.data
        shape = mx.shape
        return coords, values, shape

    if isinstance(sparse_mx, list):
        for i in range(len(sparse_mx)):
            sparse_mx[i] = to_tuple(sparse_mx[i])
    else:
        sparse_mx = to_tuple(sparse_mx)

    return sparse_mx


def nontuple_preprocess_features(features):
    """Row-normalize feature matrix and convert to tuple representation"""
    rowsum = np.array(features.sum(1))
    r_inv = np.power(rowsum, -1).flatten()
    r_inv[np.isinf(r_inv)] = 0.
    r_mat_inv = sp.diags(r_inv)
    features = r_mat_inv.dot(features)
    return features


def preprocess_features(features):
    """Row-normalize feature matrix and convert to tuple representation"""
    rowsum = np.array(features.sum(1))
    r_inv = np.power(rowsum, -1).flatten()
    r_inv[np.isinf(r_inv)] = 0.
    r_mat_inv = sp.diags(r_inv)
    features = r_mat_inv.dot(features)
    return sparse_to_tuple(features)


def normalize_adj(adj):
    """Symmetrically normalize adjacency matrix."""
    adj = sp.coo_matrix(adj)
    rowsum = np.array(adj.sum(1))
    d_inv_sqrt = np.power(rowsum, -0.5).flatten()
    d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
    d_mat_inv_sqrt = sp.diags(d_inv_sqrt)
    return adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).tocoo()

def nontuple_preprocess_adj(adj):
    adj_normalized = normalize_adj(sp.eye(adj.shape[0]) + adj)
    # adj_normalized = sp.eye(adj.shape[0]) + normalize_adj(adj)
    return adj_normalized.tocsr()

def column_prop(adj):
    column_norm = sparsenorm(adj, axis=0)
    # column_norm = pow(sparsenorm(adj, axis=0),2)
    norm_sum = sum(column_norm)
    return column_norm/norm_sum

def mix_prop(adj, features, sparseinputs=False):
    adj_column_norm = sparsenorm(adj, axis=0)
    if sparseinputs:
        features_row_norm = sparsenorm(features, axis=1)
    else:
        features_row_norm = np.linalg.norm(features, axis=1)
    mix_norm = adj_column_norm*features_row_norm

    norm_sum = sum(mix_norm)
    return mix_norm / norm_sum


def preprocess_adj(adj):
    """Preprocessing of adjacency matrix for simple GCN model and conversion to tuple representation."""
    # adj_appr = np.array(sp.csr_matrix.todense(adj))
    # # adj_appr = dense_lanczos(adj_appr, 100)
    # adj_appr = dense_RandomSVD(adj_appr, 100)
    # if adj_appr.sum(1).min()<0:
    #     adj_appr = adj_appr- (adj_appr.sum(1).min()-0.5)*sp.eye(adj_appr.shape[0])
    # else:
    #     adj_appr = adj_appr + sp.eye(adj_appr.shape[0])
    # adj_normalized = normalize_adj(adj_appr)

    # adj_normalized = normalize_adj(adj+sp.eye(adj.shape[0]))
    # adj_appr = np.array(sp.coo_matrix.todense(adj_normalized))
    # # adj_normalized = dense_RandomSVD(adj_appr,100)
    # adj_normalized = dense_lanczos(adj_appr, 100)


    adj_normalized = normalize_adj(sp.eye(adj.shape[0]) + adj)
    # adj_normalized = sp.eye(adj.shape[0]) + normalize_adj(adj)
    return sparse_to_tuple(adj_normalized)

from lanczos import lanczos
def dense_lanczos(A,K):
    q = np.random.randn(A.shape[0], )
    Q, sigma = lanczos(A, K, q)
    A2 = np.dot(Q[:,:K], np.dot(sigma[:K,:K], Q[:,:K].T))
    return sp.csr_matrix(A2)

def sparse_lanczos(A,k):
    q = sp.random(A.shape[0],1)
    n = A.shape[0]
    Q = sp.lil_matrix(np.zeros((n,k+1)))
    A = sp.lil_matrix(A)

    Q[:,0] = q/sparsenorm(q)

    alpha = 0
    beta = 0

    for i in range(k):
      if i == 0:
        q = A*Q[:,i]
      else:
        q = A*Q[:,i] - beta*Q[:,i-1]
      alpha = q.T*Q[:,i]
      q = q - Q[:,i]*alpha
      q = q - Q[:,:i]*Q[:,:i].T*q # full reorthogonalization
      beta = sparsenorm(q)
      Q[:,i+1] = q/beta
      print(i)

    Q = Q[:,:k]

    Sigma = Q.T*A*Q
    A2 = Q[:,:k]*Sigma[:k,:k]*Q[:,:k].T
    return A2
    # return Q, Sigma

def dense_RandomSVD(A,K):
    G = np.random.randn(A.shape[0],K)
    B = np.dot(A,G)
    Q,R =qr(B,mode='economic')
    M = np.dot(Q, np.dot(Q.T, A))
    return sp.csr_matrix(M)


def construct_feed_dict(features, support, labels, labels_mask, placeholders):
    """Construct feed dictionary."""
    feed_dict = dict()
    feed_dict.update({placeholders['labels']: labels})
    feed_dict.update({placeholders['labels_mask']: labels_mask})
    feed_dict.update({placeholders['features']: features})
    feed_dict.update({placeholders['support'][i]: support[i] for i in range(len(support))})
    feed_dict.update({placeholders['num_features_nonzero']: features[1].shape})
    return feed_dict


def chebyshev_polynomials(adj, k):
    """Calculate Chebyshev polynomials up to order k. Return a list of sparse matrices (tuple representation)."""
    print("Calculating Chebyshev polynomials up to order {}...".format(k))

    adj_normalized = normalize_adj(adj)
    laplacian = sp.eye(adj.shape[0]) - adj_normalized
    largest_eigval, _ = eigsh(laplacian, 1, which='LM')
    scaled_laplacian = (2. / largest_eigval[0]) * laplacian - sp.eye(adj.shape[0])

    t_k = list()
    t_k.append(sp.eye(adj.shape[0]))
    t_k.append(scaled_laplacian)

    def chebyshev_recurrence(t_k_minus_one, t_k_minus_two, scaled_lap):
        s_lap = sp.csr_matrix(scaled_lap, copy=True)
        return 2 * s_lap.dot(t_k_minus_one) - t_k_minus_two

    for i in range(2, k+1):
        t_k.append(chebyshev_recurrence(t_k[-1], t_k[-2], scaled_laplacian))

    return sparse_to_tuple(t_k)