Repository: kevin031060/RL_TSP_4static
Branch: master
Commit: 4cb6d1d23c5e
Files: 436
Total size: 75.4 KB

Directory structure:
gitextract_7k7g2yn_/

├── Post_process/
│   ├── convet_kro_dataloader.py
│   ├── data/
│   │   ├── obj1_4_100.mat
│   │   ├── obj1_4_150.mat
│   │   ├── obj1_4_200.mat
│   │   ├── obj1_4_40.mat
│   │   ├── obj1_4_500.mat
│   │   ├── obj1_4_70.mat
│   │   ├── obj2_4_100.mat
│   │   ├── obj2_4_150.mat
│   │   ├── obj2_4_200.mat
│   │   ├── obj2_4_40.mat
│   │   ├── obj2_4_500.mat
│   │   ├── obj2_4_70.mat
│   │   ├── rl4_100.mat
│   │   ├── rl4_150.mat
│   │   ├── rl4_200.mat
│   │   ├── rl4_40.mat
│   │   ├── rl4_500.mat
│   │   ├── rl4_70.mat
│   │   ├── tour4_100.mat
│   │   └── tour4_200.mat
│   ├── dis_matrix.py
│   ├── krodata/
│   │   ├── kroA100.tsp
│   │   ├── kroA150.tsp
│   │   ├── kroA200.tsp
│   │   ├── kroB100.tsp
│   │   ├── kroB150.tsp
│   │   └── kroB200.tsp
│   ├── load_all_reward.py
│   ├── obj1.mat
│   ├── obj2.mat
│   └── rl.mat
├── README.md
├── model.py
├── parameter_transfer.py
├── tasks/
│   ├── motsp.py
│   ├── tsp.py
│   └── vrp.py
├── trainer_motsp_no_transfer.py
├── trainer_motsp_transfer.py
├── tsp_transfer_100run_500000_5epoch_20city/
│   └── 20/
│       ├── w_0.04_0.96/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.05_0.95/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.06_0.94/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.07_0.93/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.08_0.92/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.09_0.91/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.10_0.90/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.11_0.89/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.12_0.88/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.13_0.87/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.14_0.86/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.15_0.85/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.16_0.84/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.17_0.83/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.18_0.82/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.19_0.81/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.20_0.80/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.21_0.79/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.22_0.78/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.23_0.77/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.24_0.76/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.25_0.75/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.26_0.74/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.27_0.73/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.28_0.72/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.29_0.71/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.30_0.70/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.31_0.69/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.32_0.68/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.33_0.67/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.34_0.66/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.35_0.65/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.36_0.64/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.37_0.63/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.38_0.62/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.39_0.61/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.40_0.60/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.41_0.59/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.42_0.58/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.43_0.57/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.44_0.56/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.45_0.55/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.46_0.54/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.47_0.53/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.48_0.52/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.49_0.51/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.50_0.50/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.51_0.49/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.52_0.48/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.53_0.47/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.54_0.46/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.55_0.45/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.56_0.44/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.57_0.43/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.58_0.42/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.59_0.41/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.60_0.40/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.61_0.39/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.62_0.38/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.63_0.37/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.64_0.36/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.65_0.35/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.66_0.34/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.67_0.33/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.68_0.32/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.69_0.31/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.70_0.30/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.71_0.29/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.72_0.28/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.73_0.27/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.74_0.26/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.75_0.25/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.76_0.24/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.77_0.23/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.78_0.22/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.79_0.21/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.80_0.20/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.81_0.19/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.82_0.18/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.83_0.17/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.84_0.16/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.85_0.15/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.86_0.14/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.87_0.13/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.88_0.12/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.89_0.11/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.90_0.10/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.91_0.09/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.92_0.08/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.93_0.07/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.94_0.06/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.95_0.05/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.96_0.04/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.97_0.03/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.98_0.02/
│       │   ├── actor.pt
│       │   └── critic.pt
│       ├── w_0.99_0.01/
│       │   ├── actor.pt
│       │   └── critic.pt
│       └── w_1.00_0.00/
│           ├── actor.pt
│           └── critic.pt
└── tsp_transfer_100run_500000_5epoch_40city/
    └── 40/
        ├── w_0.00_1.00/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.01_0.99/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.02_0.98/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.03_0.97/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.04_0.96/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.05_0.95/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.06_0.94/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.07_0.93/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.08_0.92/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.09_0.91/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.10_0.90/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.11_0.89/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.12_0.88/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.13_0.87/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.14_0.86/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.15_0.85/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.16_0.84/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.17_0.83/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.18_0.82/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.19_0.81/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.20_0.80/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.21_0.79/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.22_0.78/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.23_0.77/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.24_0.76/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.25_0.75/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.26_0.74/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.27_0.73/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.28_0.72/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.29_0.71/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.30_0.70/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.31_0.69/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.32_0.68/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.33_0.67/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.34_0.66/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.35_0.65/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.36_0.64/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.37_0.63/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.38_0.62/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.39_0.61/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.40_0.60/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.41_0.59/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.42_0.58/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.43_0.57/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.44_0.56/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.45_0.55/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.46_0.54/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.47_0.53/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.48_0.52/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.49_0.51/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.50_0.50/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.51_0.49/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.52_0.48/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.53_0.47/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.54_0.46/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.55_0.45/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.56_0.44/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.57_0.43/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.58_0.42/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.59_0.41/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.60_0.40/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.61_0.39/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.62_0.38/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.63_0.37/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.64_0.36/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.65_0.35/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.66_0.34/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.67_0.33/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.68_0.32/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.69_0.31/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.70_0.30/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.71_0.29/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.72_0.28/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.73_0.27/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.74_0.26/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.75_0.25/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.76_0.24/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.77_0.23/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.78_0.22/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.79_0.21/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.80_0.20/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.81_0.19/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.82_0.18/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.83_0.17/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.84_0.16/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.85_0.15/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.86_0.14/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.87_0.13/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.88_0.12/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.89_0.11/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.90_0.10/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.91_0.09/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.92_0.08/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.93_0.07/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.94_0.06/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.95_0.05/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.96_0.04/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.97_0.03/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.98_0.02/
        │   ├── actor.pt
        │   └── critic.pt
        ├── w_0.99_0.01/
        │   ├── actor.pt
        │   └── critic.pt
        └── w_1.00_0.00/
            ├── actor.pt
            └── critic.pt

================================================
FILE CONTENTS
================================================

================================================
FILE: Post_process/convet_kro_dataloader.py
================================================
import numpy as np
import torch
from torch.utils.data import Dataset
import matplotlib
# matplotlib.use('Agg')
import matplotlib.pyplot as plt


class Kro_dataset(Dataset):

    def __init__(self, num_nodes):
        super(Kro_dataset, self).__init__()

        x1 = np.loadtxt('krodata/kroA%d.tsp'%num_nodes, skiprows=6, usecols=(1, 2), delimiter=' ', dtype=float)
        x1 = x1 / (np.max(x1,0))
        x2 = np.loadtxt('krodata/kroB%d.tsp'%num_nodes, skiprows=6, usecols=(1, 2), delimiter=' ', dtype=float)
        x2 = x2 / (np.max(x2,0))
        x = np.concatenate((x1, x2),axis=1)
        x = x.T
        x = x.reshape(1, 4, num_nodes)

        self.dataset = torch.from_numpy(x).float()
        self.dynamic = torch.zeros(1, 1, num_nodes)
        self.num_nodes = num_nodes
        self.size = 1


    def __len__(self):
        return self.size

    def __getitem__(self, idx):
        # (static, dynamic, start_loc)
        return (self.dataset[idx], self.dynamic[idx], [])

================================================
FILE: Post_process/dis_matrix.py
================================================
import numpy as np
import torch

def dis_matrix(static, s_size):
    static = static.squeeze(0)

    # [2,20]
    obj1 = static[:2, :]
    # [20]
    obj2 = static[2:, :]

    l = obj1.size()[1]
    obj1_matrix = np.zeros((l, l))
    obj2_matrix = np.zeros((l, l))
    for i in range(l):
        for j in range(l):
            if i != j:
                obj1_matrix[i,j] = torch.sqrt(torch.sum(torch.pow(obj1[:, i] - obj1[:, j], 2))).detach()
                if s_size == 3:
                    obj2_matrix[i, j] = torch.abs(obj2[i] - obj2[j]).detach()
                else:
                    obj2_matrix[i, j] = torch.sqrt(torch.sum(torch.pow(obj2[:, i] - obj2[:, j], 2))).detach()

    return obj1_matrix, obj2_matrix

================================================
FILE: Post_process/krodata/kroA100.tsp
================================================
NAME: kroA100
TYPE: TSP
COMMENT: 100-city problem A (Krolak/Felts/Nelson)
DIMENSION: 100
EDGE_WEIGHT_TYPE : EUC_2D
NODE_COORD_SECTION
1 1380 939
2 2848 96
3 3510 1671
4 457 334
5 3888 666
6 984 965
7 2721 1482
8 1286 525
9 2716 1432
10 738 1325
11 1251 1832
12 2728 1698
13 3815 169
14 3683 1533
15 1247 1945
16 123 862
17 1234 1946
18 252 1240
19 611 673
20 2576 1676
21 928 1700
22 53 857
23 1807 1711
24 274 1420
25 2574 946
26 178 24
27 2678 1825
28 1795 962
29 3384 1498
30 3520 1079
31 1256 61
32 1424 1728
33 3913 192
34 3085 1528
35 2573 1969
36 463 1670
37 3875 598
38 298 1513
39 3479 821
40 2542 236
41 3955 1743
42 1323 280
43 3447 1830
44 2936 337
45 1621 1830
46 3373 1646
47 1393 1368
48 3874 1318
49 938 955
50 3022 474
51 2482 1183
52 3854 923
53 376 825
54 2519 135
55 2945 1622
56 953 268
57 2628 1479
58 2097 981
59 890 1846
60 2139 1806
61 2421 1007
62 2290 1810
63 1115 1052
64 2588 302
65 327 265
66 241 341
67 1917 687
68 2991 792
69 2573 599
70 19 674
71 3911 1673
72 872 1559
73 2863 558
74 929 1766
75 839 620
76 3893 102
77 2178 1619
78 3822 899
79 378 1048
80 1178 100
81 2599 901
82 3416 143
83 2961 1605
84 611 1384
85 3113 885
86 2597 1830
87 2586 1286
88 161 906
89 1429 134
90 742 1025
91 1625 1651
92 1187 706
93 1787 1009
94 22 987
95 3640 43
96 3756 882
97 776 392
98 1724 1642
99 198 1810
100 3950 1558


================================================
FILE: Post_process/krodata/kroA150.tsp
================================================
NAME: kroA150
TYPE: TSP
COMMENT: 150-city problem A (Krolak/Felts/Nelson)
DIMENSION: 150
EDGE_WEIGHT_TYPE : EUC_2D
NODE_COORD_SECTION
1 1380 939
2 2848 96
3 3510 1671
4 457 334
5 3888 666
6 984 965
7 2721 1482
8 1286 525
9 2716 1432
10 738 1325
11 1251 1832
12 2728 1698
13 3815 169
14 3683 1533
15 1247 1945
16 123 862
17 1234 1946
18 252 1240
19 611 673
20 2576 1676
21 928 1700
22 53 857
23 1807 1711
24 274 1420
25 2574 946
26 178 24
27 2678 1825
28 1795 962
29 3384 1498
30 3520 1079
31 1256 61
32 1424 1728
33 3913 192
34 3085 1528
35 2573 1969
36 463 1670
37 3875 598
38 298 1513
39 3479 821
40 2542 236
41 3955 1743
42 1323 280
43 3447 1830
44 2936 337
45 1621 1830
46 3373 1646
47 1393 1368
48 3874 1318
49 938 955
50 3022 474
51 2482 1183
52 3854 923
53 376 825
54 2519 135
55 2945 1622
56 953 268
57 2628 1479
58 2097 981
59 890 1846
60 2139 1806
61 2421 1007
62 2290 1810
63 1115 1052
64 2588 302
65 327 265
66 241 341
67 1917 687
68 2991 792
69 2573 599
70 19 674
71 3911 1673
72 872 1559
73 2863 558
74 929 1766
75 839 620
76 3893 102
77 2178 1619
78 3822 899
79 378 1048
80 1178 100
81 2599 901
82 3416 143
83 2961 1605
84 611 1384
85 3113 885
86 2597 1830
87 2586 1286
88 161 906
89 1429 134
90 742 1025
91 1625 1651
92 1187 706
93 1787 1009
94 22 987
95 3640 43
96 3756 882
97 776 392
98 1724 1642
99 198 1810
100 3950 1558
101 3477 949
102 91 1732
103 3972 329
104 198 1632
105 1806 733
106 538 1023
107 3430 1088
108 2186 766
109 1513 1646
110 2143 1611
111 53 1657
112 3404 1307
113 1034 1344
114 2823 376
115 3104 1931
116 3232 324
117 2790 1457
118 374 9
119 741 146
120 3083 1938
121 3502 1067
122 1280 237
123 3326 1846
124 217 38
125 2503 1172
126 3527 41
127 739 1850
128 3548 1999
129 48 154
130 1419 872
131 1689 1223
132 3468 1404
133 1628 253
134 382 872
135 3029 1242
136 3646 1758
137 285 1029
138 1782 93
139 1067 371
140 2849 1214
141 920 1835
142 1741 712
143 876 220
144 2753 283
145 2609 1286
146 3941 258
147 3613 523
148 1754 559
149 2916 1724
150 2445 1820


================================================
FILE: Post_process/krodata/kroA200.tsp
================================================
NAME: kroA200
TYPE: TSP
COMMENT: 200-city problem A (Krolak/Felts/Nelson)
DIMENSION: 200
EDGE_WEIGHT_TYPE : EUC_2D
NODE_COORD_SECTION
1 1357 1905
2 2650 802
3 1774 107
4 1307 964
5 3806 746
6 2687 1353
7 43 1957
8 3092 1668
9 185 1542
10 834 629
11 40 462
12 1183 1391
13 2048 1628
14 1097 643
15 1838 1732
16 234 1118
17 3314 1881
18 737 1285
19 779 777
20 2312 1949
21 2576 189
22 3078 1541
23 2781 478
24 705 1812
25 3409 1917
26 323 1714
27 1660 1556
28 3729 1188
29 693 1383
30 2361 640
31 2433 1538
32 554 1825
33 913 317
34 3586 1909
35 2636 727
36 1000 457
37 482 1337
38 3704 1082
39 3635 1174
40 1362 1526
41 2049 417
42 2552 1909
43 3939 640
44 219 898
45 812 351
46 901 1552
47 2513 1572
48 242 584
49 826 1226
50 3278 799
51 86 1065
52 14 454
53 1327 1893
54 2773 1286
55 2469 1838
56 3835 963
57 1031 428
58 3853 1712
59 1868 197
60 1544 863
61 457 1607
62 3174 1064
63 192 1004
64 2318 1925
65 2232 1374
66 396 828
67 2365 1649
68 2499 658
69 1410 307
70 2990 214
71 3646 1018
72 3394 1028
73 1779 90
74 1058 372
75 2933 1459
76 3099 173
77 2178 978
78 138 1610
79 2082 1753
80 2302 1127
81 805 272
82 22 1617
83 3213 1085
84 99 536
85 1533 1780
86 3564 676
87 29 6
88 3808 1375
89 2221 291
90 3499 1885
91 3124 408
92 781 671
93 1027 1041
94 3249 378
95 3297 491
96 213 220
97 721 186
98 3736 1542
99 868 731
100 960 303
101 1380 939
102 2848 96
103 3510 1671
104 457 334
105 3888 666
106 984 965
107 2721 1482
108 1286 525
109 2716 1432
110 738 1325
111 1251 1832
112 2728 1698
113 3815 169
114 3683 1533
115 1247 1945
116 123 862
117 1234 1946
118 252 1240
119 611 673
120 2576 1676
121 928 1700
122 53 857
123 1807 1711
124 274 1420
125 2574 946
126 178 24
127 2678 1825
128 1795 962
129 3384 1498
130 3520 1079
131 1256 61
132 1424 1728
133 3913 192
134 3085 1528
135 2573 1969
136 463 1670
137 3875 598
138 298 1513
139 3479 821
140 2542 236
141 3955 1743
142 1323 280
143 3447 1830
144 2936 337
145 1621 1830
146 3373 1646
147 1393 1368
148 3874 1318
149 938 955
150 3022 474
151 2482 1183
152 3854 923
153 376 825
154 2519 135
155 2945 1622
156 953 268
157 2628 1479
158 2097 981
159 890 1846
160 2139 1806
161 2421 1007
162 2290 1810
163 1115 1052
164 2588 302
165 327 265
166 241 341
167 1917 687
168 2991 792
169 2573 599
170 19 674
171 3911 1673
172 872 1559
173 2863 558
174 929 1766
175 839 620
176 3893 102
177 2178 1619
178 3822 899
179 378 1048
180 1178 100
181 2599 901
182 3416 143
183 2961 1605
184 611 1384
185 3113 885
186 2597 1830
187 2586 1286
188 161 906
189 1429 134
190 742 1025
191 1625 1651
192 1187 706
193 1787 1009
194 22 987
195 3640 43
196 3756 882
197 776 392
198 1724 1642
199 198 1810
200 3950 1558


================================================
FILE: Post_process/krodata/kroB100.tsp
================================================
NAME: kroB100
TYPE: TSP
COMMENT: 100-city problem B (Krolak/Felts/Nelson)
DIMENSION: 100
EDGE_WEIGHT_TYPE : EUC_2D
NODE_COORD_SECTION
1 3140 1401
2 556 1056
3 3675 1522
4 1182 1853
5 3595 111
6 962 1895
7 2030 1186
8 3507 1851
9 2642 1269
10 3438 901
11 3858 1472
12 2937 1568
13 376 1018
14 839 1355
15 706 1925
16 749 920
17 298 615
18 694 552
19 387 190
20 2801 695
21 3133 1143
22 1517 266
23 1538 224
24 844 520
25 2639 1239
26 3123 217
27 2489 1520
28 3834 1827
29 3417 1808
30 2938 543
31 71 1323
32 3245 1828
33 731 1741
34 2312 1270
35 2426 1851
36 380 478
37 2310 635
38 2830 775
39 3829 513
40 3684 445
41 171 514
42 627 1261
43 1490 1123
44 61 81
45 422 542
46 2698 1221
47 2372 127
48 177 1390
49 3084 748
50 1213 910
51 3 1817
52 1782 995
53 3896 742
54 1829 812
55 1286 550
56 3017 108
57 2132 1432
58 2000 1110
59 3317 1966
60 1729 1498
61 2408 1747
62 3292 152
63 193 1210
64 782 1462
65 2503 352
66 1697 1924
67 3821 147
68 3370 791
69 3162 367
70 3938 516
71 2741 1583
72 2330 741
73 3918 1088
74 1794 1589
75 2929 485
76 3453 1998
77 896 705
78 399 850
79 2614 195
80 2800 653
81 2630 20
82 563 1513
83 1090 1652
84 2009 1163
85 3876 1165
86 3084 774
87 1526 1612
88 1612 328
89 1423 1322
90 3058 1276
91 3782 1865
92 347 252
93 3904 1444
94 2191 1579
95 3220 1454
96 468 319
97 3611 1968
98 3114 1629
99 3515 1892
100 3060 155


================================================
FILE: Post_process/krodata/kroB150.tsp
================================================
NAME: kroB150
TYPE: TSP
COMMENT: 150-city problem B (Krolak/Felts/Nelson)
DIMENSION: 150
EDGE_WEIGHT_TYPE : EUC_2D
NODE_COORD_SECTION
1 1357 1905
2 2650 802
3 1774 107
4 1307 964
5 3806 746
6 2687 1353
7 43 1957
8 3092 1668
9 185 1542
10 834 629
11 40 462
12 1183 1391
13 2048 1628
14 1097 643
15 1838 1732
16 234 1118
17 3314 1881
18 737 1285
19 779 777
20 2312 1949
21 2576 189
22 3078 1541
23 2781 478
24 705 1812
25 3409 1917
26 323 1714
27 1660 1556
28 3729 1188
29 693 1383
30 2361 640
31 2433 1538
32 554 1825
33 913 317
34 3586 1909
35 2636 727
36 1000 457
37 482 1337
38 3704 1082
39 3635 1174
40 1362 1526
41 2049 417
42 2552 1909
43 3939 640
44 219 898
45 812 351
46 901 1552
47 2513 1572
48 242 584
49 826 1226
50 3278 799
51 86 1065
52 14 454
53 1327 1893
54 2773 1286
55 2469 1838
56 3835 963
57 1031 428
58 3853 1712
59 1868 197
60 1544 863
61 457 1607
62 3174 1064
63 192 1004
64 2318 1925
65 2232 1374
66 396 828
67 2365 1649
68 2499 658
69 1410 307
70 2990 214
71 3646 1018
72 3394 1028
73 1779 90
74 1058 372
75 2933 1459
76 3099 173
77 2178 978
78 138 1610
79 2082 1753
80 2302 1127
81 805 272
82 22 1617
83 3213 1085
84 99 536
85 1533 1780
86 3564 676
87 29 6
88 3808 1375
89 2221 291
90 3499 1885
91 3124 408
92 781 671
93 1027 1041
94 3249 378
95 3297 491
96 213 220
97 721 186
98 3736 1542
99 868 731
100 960 303
101 3825 1101
102 2779 435
103 201 693
104 2502 1274
105 765 833
106 3105 1823
107 1937 1400
108 3364 1498
109 3702 1624
110 2164 1874
111 3019 189
112 3098 1594
113 3239 1376
114 3359 1693
115 2081 1011
116 1398 1100
117 618 1953
118 1878 59
119 3803 886
120 397 1217
121 3035 152
122 2502 146
123 3230 380
124 3479 1023
125 958 1670
126 3423 1241
127 78 1066
128 96 691
129 3431 78
130 2053 1461
131 3048 1
132 571 1711
133 3393 782
134 2835 1472
135 144 1185
136 923 108
137 989 1997
138 3061 1211
139 2977 39
140 1668 658
141 878 715
142 678 1599
143 1086 868
144 640 110
145 3551 1673
146 106 1267
147 2243 1332
148 3796 1401
149 2643 1320
150 48 267


================================================
FILE: Post_process/krodata/kroB200.tsp
================================================
NAME: kroB200
TYPE: TSP
COMMENT: 200-city problem B (Krolak/Felts/Nelson)
DIMENSION: 200
EDGE_WEIGHT_TYPE : EUC_2D
NODE_COORD_SECTION
1 3140 1401
2 556 1056
3 3675 1522
4 1182 1853
5 3595 111
6 962 1895
7 2030 1186
8 3507 1851
9 2642 1269
10 3438 901
11 3858 1472
12 2937 1568
13 376 1018
14 839 1355
15 706 1925
16 749 920
17 298 615
18 694 552
19 387 190
20 2801 695
21 3133 1143
22 1517 266
23 1538 224
24 844 520
25 2639 1239
26 3123 217
27 2489 1520
28 3834 1827
29 3417 1808
30 2938 543
31 71 1323
32 3245 1828
33 731 1741
34 2312 1270
35 2426 1851
36 380 478
37 2310 635
38 2830 775
39 3829 513
40 3684 445
41 171 514
42 627 1261
43 1490 1123
44 61 81
45 422 542
46 2698 1221
47 2372 127
48 177 1390
49 3084 748
50 1213 910
51 3 1817
52 1782 995
53 3896 742
54 1829 812
55 1286 550
56 3017 108
57 2132 1432
58 2000 1110
59 3317 1966
60 1729 1498
61 2408 1747
62 3292 152
63 193 1210
64 782 1462
65 2503 352
66 1697 1924
67 3821 147
68 3370 791
69 3162 367
70 3938 516
71 2741 1583
72 2330 741
73 3918 1088
74 1794 1589
75 2929 485
76 3453 1998
77 896 705
78 399 850
79 2614 195
80 2800 653
81 2630 20
82 563 1513
83 1090 1652
84 2009 1163
85 3876 1165
86 3084 774
87 1526 1612
88 1612 328
89 1423 1322
90 3058 1276
91 3782 1865
92 347 252
93 3904 1444
94 2191 1579
95 3220 1454
96 468 319
97 3611 1968
98 3114 1629
99 3515 1892
100 3060 155
101 2995 264
102 202 233
103 981 848
104 1346 408
105 781 670
106 1009 1001
107 2927 1777
108 2982 949
109 555 1121
110 464 1302
111 3452 637
112 571 1982
113 2656 128
114 1623 1723
115 2067 694
116 1725 927
117 3600 459
118 1109 1196
119 366 339
120 778 1282
121 386 1616
122 3918 1217
123 3332 1049
124 2597 349
125 811 1295
126 241 1069
127 2658 360
128 394 1944
129 3786 1862
130 264 36
131 2050 1833
132 3538 125
133 1646 1817
134 2993 624
135 547 25
136 3373 1902
137 460 267
138 3060 781
139 1828 456
140 1021 962
141 2347 388
142 3535 1112
143 1529 581
144 1203 385
145 1787 1902
146 2740 1101
147 555 1753
148 47 363
149 3935 540
150 3062 329
151 387 199
152 2901 920
153 931 512
154 1766 692
155 401 980
156 149 1629
157 2214 1977
158 3805 1619
159 1179 969
160 1017 333
161 2834 1512
162 634 294
163 1819 814
164 1393 859
165 1768 1578
166 3023 871
167 3248 1906
168 1632 1742
169 2223 990
170 3868 697
171 1541 354
172 2374 1944
173 1962 389
174 3007 1524
175 3220 1945
176 2356 1568
177 1604 706
178 2028 1736
179 2581 121
180 2221 1578
181 2944 632
182 1082 1561
183 997 942
184 2334 523
185 1264 1090
186 1699 1294
187 235 1059
188 2592 248
189 3642 699
190 3599 514
191 1766 678
192 240 619
193 1272 246
194 3503 301
195 80 1533
196 1677 1238
197 3766 154
198 3946 459
199 1994 1852
200 278 165


================================================
FILE: Post_process/load_all_reward.py
================================================
import torch
from tasks import motsp
from tasks.motsp import TSPDataset, reward
from torch.utils.data import DataLoader
from model import DRL4TSP
from trainer_motsp_transfer import StateCritic
import numpy as np
import os
import matplotlib.pyplot as plt
import scipy.io as scio
from Post_process.dis_matrix import dis_matrix
import time

# Load the trained model and convert the obtained Pareto Front to the .mat file.
# It is convenient to visualize it in matlab

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# "../tsp_transfer_100run_500000_5epoch_20city/20"效果一般。应该再训练一遍
save_dir = "../tsp_transfer_100run_500000_5epoch_40city/40"
# save_dir = "../tsp_transfer/100"
# param
update_fn = None
STATIC_SIZE = 4  # (x, y)
DYNAMIC_SIZE = 1  # dummy for compatibility

# claim model
actor = DRL4TSP(STATIC_SIZE,
                DYNAMIC_SIZE,
                128,
                update_fn,
                motsp.update_mask,
                1,
                0.1).to(device)
critic = StateCritic(STATIC_SIZE, DYNAMIC_SIZE, 128).to(device)

# data 143
from Post_process.convet_kro_dataloader import Kro_dataset
kro = 1
D = 200
if kro:
    D = 200
    Test_data = Kro_dataset(D)
    Test_loader = DataLoader(Test_data, 1, False, num_workers=0)
else:
    # 40city_train: city20 13 city40 143 city70 2523
    #
    Test_data = TSPDataset(D, 1, 2523)
    Test_loader = DataLoader(Test_data, 1, False, num_workers=0)

iter_data = iter(Test_loader)
static, dynamic, x0 = iter_data.next()
static = static.to(device)
dynamic = dynamic.to(device)
x0 = x0.to(device) if len(x0) > 0 else None

# load 50 models
N=100
w = np.arange(N+1)/N
objs = np.zeros((N+1,2))
start  = time.time()
t1_all = 0
t2_all = 0
tours=[]
for i in range(0, N+1):
    t1 = time.time()
    ac = os.path.join(save_dir, "w_%2.2f_%2.2f" % (1-w[i], w[i]),"actor.pt")
    cri = os.path.join(save_dir, "w_%2.2f_%2.2f" % (1-w[i], w[i]),"critic.pt")
    actor.load_state_dict(torch.load(ac, device))
    critic.load_state_dict(torch.load(cri, device))
    t1_all = t1_all + time.time()-t1
    # calculate

    with torch.no_grad():
        # t2 = time.time()
        tour_indices, _ = actor.forward(static, dynamic, x0)
        # t2_all = t2_all + time.time() - t2
    _, obj1, obj2 = reward(static, tour_indices, 1-w[i], w[i])
    tours.append(tour_indices.cpu().numpy())
    objs[i,:] = [obj1, obj2]

print("time_load_model:%2.4f"%t1_all)
print("time_predict_model:%2.4f"%t2_all)
print(time.time()-start)

print(tours)
plt.figure()
plt.plot(objs[:,0],objs[:,1],"ro")
plt.show()

# Convert to .mat
obj1_matrix, obj2_matrix = dis_matrix(static, STATIC_SIZE)
scio.savemat("data/obj1_%d_%d.mat"%(STATIC_SIZE, D), {'obj1':obj1_matrix})
scio.savemat("data/obj2_%d_%d.mat"%(STATIC_SIZE, D), {'obj2':obj2_matrix})
scio.savemat("data/rl%d_%d.mat"%(STATIC_SIZE, D),{'rl':objs})
scio.savemat("data/tour%d_%d.mat"%(STATIC_SIZE, D),{'tour':np.array(tours)})


# from load_test_plot import show
# show_if = 1
# if show_if:
#     i = 0
#     ac = os.path.join(save_dir, "w_%2.2f_%2.2f" % (1-w[i], w[i]),"actor.pt")
#     cri = os.path.join(save_dir, "w_%2.2f_%2.2f" % (1-w[i], w[i]),"critic.pt")
#     actor.load_state_dict(torch.load(ac, device))
#     critic.load_state_dict(torch.load(cri, device))
#
#     show(Test_loader, actor)


================================================
FILE: README.md
================================================
# Using Deep Reinforcement Learning method and Attention model to solve the Multiobjectve TSP. 
## This code is the model with four-dimension input (Euclidean-type).
### The model with three-dimension input (Mixed-type) is in the RL_3static_MOTSP.zip.
### Matlab code for visualzing and comparisons in the paper is in the MOTSP_compare_EMO.zip.

+ Trained model is available in the tsp_transfer_... dirs.
+ To test the model, use the load_all_rewards in Post_process dir.
+ To train the model, run train_motsp_transfer.py
+ To visualize the obtained Pareto Front, the result should be visulaized using Matlab.
+ matlab code is in the .zip file. It is in the " MOTSP_compare_EMO/Problems/Combinatorial MOPs/compare.m ". It is used to produce the figures in batch. 
    
    > First you need to run the train_motsp_transfer.py to train the model. 
    
    > Run the load_all_rewards.py to load and test the model. It also converts the obtained Pareto Front to the .mat file
    
    > Run the Matlab code to visualize the Pareto Front and compare with NSGA-II and MOEA/D
    
    
### A lot codes are inherited from https://github.com/mveres01/pytorch-drl4vrp


================================================
FILE: model.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#device = torch.device('cpu')


class Encoder(nn.Module):
    """Encodes the static & dynamic states using 1d Convolution."""

    def __init__(self, input_size, hidden_size):
        super(Encoder, self).__init__()
        self.conv = nn.Conv1d(input_size, hidden_size, kernel_size=1)

    def forward(self, input):
        output = self.conv(input)
        return output  # (batch, hidden_size, seq_len)


class Attention(nn.Module):
    """Calculates attention over the input nodes given the current state."""

    def __init__(self, hidden_size):
        super(Attention, self).__init__()

        # W processes features from static decoder elements
        self.v = nn.Parameter(torch.zeros((1, 1, hidden_size),
                                          device=device, requires_grad=True))

        self.W = nn.Parameter(torch.zeros((1, hidden_size, 3 * hidden_size),
                                          device=device, requires_grad=True))

    def forward(self, static_hidden, dynamic_hidden, decoder_hidden):

        batch_size, hidden_size, _ = static_hidden.size()

        hidden = decoder_hidden.unsqueeze(2).expand_as(static_hidden)
        hidden = torch.cat((static_hidden, dynamic_hidden, hidden), 1)

        # Broadcast some dimensions so we can do batch-matrix-multiply
        v = self.v.expand(batch_size, 1, hidden_size)
        W = self.W.expand(batch_size, hidden_size, -1)

        attns = torch.bmm(v, torch.tanh(torch.bmm(W, hidden)))
        attns = F.softmax(attns, dim=2)  # (batch, seq_len)
        return attns


class Pointer(nn.Module):
    """Calculates the next state given the previous state and input embeddings."""

    def __init__(self, hidden_size, num_layers=1, dropout=0.2):
        super(Pointer, self).__init__()

        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # Used to calculate probability of selecting next state
        self.v = nn.Parameter(torch.zeros((1, 1, hidden_size),
                                          device=device, requires_grad=True))

        self.W = nn.Parameter(torch.zeros((1, hidden_size, 2 * hidden_size),
                                          device=device, requires_grad=True))

        # Used to compute a representation of the current decoder output
        # GRU（输入dim，隐含层dim，层数）
        self.gru = nn.GRU(hidden_size, hidden_size, num_layers,
                          batch_first=True,
                          dropout=dropout if num_layers > 1 else 0)
        self.encoder_attn = Attention(hidden_size)

        self.drop_rnn = nn.Dropout(p=dropout)
        self.drop_hh = nn.Dropout(p=dropout)

    def forward(self, static_hidden, dynamic_hidden, decoder_hidden, last_hh):

        rnn_out, last_hh = self.gru(decoder_hidden.transpose(2, 1), last_hh)
        rnn_out = rnn_out.squeeze(1)

        # Always apply dropout on the RNN output
        rnn_out = self.drop_rnn(rnn_out)
        if self.num_layers == 1:
            # If > 1 layer dropout is already applied
            last_hh = self.drop_hh(last_hh) 

        # Given a summary of the output, find an  input context
        enc_attn = self.encoder_attn(static_hidden, dynamic_hidden, rnn_out)
        context = enc_attn.bmm(static_hidden.permute(0, 2, 1))  # (B, 1, num_feats)

        # Calculate the next output using Batch-matrix-multiply ops
        context = context.transpose(1, 2).expand_as(static_hidden)
        energy = torch.cat((static_hidden, context), dim=1)  # (B, num_feats, seq_len)

        v = self.v.expand(static_hidden.size(0), -1, -1)
        W = self.W.expand(static_hidden.size(0), -1, -1)

        probs = torch.bmm(v, torch.tanh(torch.bmm(W, energy))).squeeze(1)

        return probs, last_hh


class DRL4TSP(nn.Module):
    """Defines the main Encoder, Decoder, and Pointer combinatorial models.

    Parameters
    ----------
    static_size: int
        Defines how many features are in the static elements of the model
        (e.g. 2 for (x, y) coordinates)
    dynamic_size: int > 1
        Defines how many features are in the dynamic elements of the model
        (e.g. 2 for the VRP which has (load, demand) attributes. The TSP doesn't
        have dynamic elements, but to ensure compatility with other optimization
        problems, assume we just pass in a vector of zeros.
    hidden_size: int
        Defines the number of units in the hidden layer for all static, dynamic,
        and decoder output units.
    update_fn: function or None
        If provided, this method is used to calculate how the input dynamic
        elements are updated, and is called after each 'point' to the input element.
    mask_fn: function or None
        Allows us to specify which elements of the input sequence are allowed to
        be selected. This is useful for speeding up training of the networks,
        by providing a sort of 'rules' guidlines to the algorithm. If no mask
        is provided, we terminate the search after a fixed number of iterations
        to avoid tours that stretch forever
    num_layers: int
        Specifies the number of hidden layers to use in the decoder RNN
    dropout: float
        Defines the dropout rate for the decoder
    """

    def __init__(self, static_size, dynamic_size, hidden_size,
                 update_fn=None, mask_fn=None, num_layers=1, dropout=0.):
        super(DRL4TSP, self).__init__()

        if dynamic_size < 1:
            raise ValueError(':param dynamic_size: must be > 0, even if the '
                             'problem has no dynamic elements')

        self.update_fn = update_fn
        self.mask_fn = mask_fn

        # Define the encoder & decoder models
        self.static_encoder = Encoder(static_size, hidden_size)
        self.dynamic_encoder = Encoder(dynamic_size, hidden_size)
        self.decoder = Encoder(static_size, hidden_size)
        self.pointer = Pointer(hidden_size, num_layers, dropout)

        for p in self.parameters():
            if len(p.shape) > 1:
                nn.init.xavier_uniform_(p)

        # Used as a proxy initial state in the decoder when not specified
        self.x0 = torch.zeros((1, static_size, 1), requires_grad=True, device=device)

    def forward(self, static, dynamic, decoder_input=None, last_hh=None):
        """
        Parameters
        ----------
        static: Array of size (batch_size, feats, num_cities)
            Defines the elements to consider as static. For the TSP, this could be
            things like the (x, y) coordinates, which won't change
        dynamic: Array of size (batch_size, feats, num_cities)
            Defines the elements to consider as static. For the VRP, this can be
            things like the (load, demand) of each city. If there are no dynamic
            elements, this can be set to None
        decoder_input: Array of size (batch_size, num_feats)
            Defines the outputs for the decoder. Currently, we just use the
            static elements (e.g. (x, y) coordinates), but this can technically
            be other things as well
        last_hh: Array of size (batch_size, num_hidden)
            Defines the last hidden state for the RNN
        """

        batch_size, input_size, sequence_size = static.size()

        if decoder_input is None:
            decoder_input = self.x0.expand(batch_size, -1, -1)

        # Always use a mask - if no function is provided, we don't update it
        mask = torch.ones(batch_size, sequence_size, device=device)

        # Structures for holding the output sequences
        tour_idx, tour_logp = [], []
        max_steps = sequence_size if self.mask_fn is None else 1000

        # Static elements only need to be processed once, and can be used across
        # all 'pointing' iterations. When / if the dynamic elements change,
        # their representations will need to get calculated again.
        static_hidden = self.static_encoder(static)
        dynamic_hidden = self.dynamic_encoder(dynamic)

        for _ in range(max_steps):

            if not mask.byte().any():
                break

            # ... but compute a hidden rep for each element added to sequence
            decoder_hidden = self.decoder(decoder_input)

            probs, last_hh = self.pointer(static_hidden,
                                          dynamic_hidden,
                                          decoder_hidden, last_hh)
            probs = F.softmax(probs + mask.log(), dim=1)

            # When training, sample the next step according to its probability.
            # During testing, we can take the greedy approach and choose highest
            if self.training:
                m = torch.distributions.Categorical(probs)

                # Sometimes an issue with Categorical & sampling on GPU; See:
                # https://github.com/pemami4911/neural-combinatorial-rl-pytorch/issues/5
                ptr = m.sample()
                while not torch.gather(mask, 1, ptr.data.unsqueeze(1)).byte().all():
                    ptr = m.sample()
                logp = m.log_prob(ptr)
            else:
                prob, ptr = torch.max(probs, 1)  # Greedy
                logp = prob.log()

            # After visiting a node update the dynamic representation
            if self.update_fn is not None:
                dynamic = self.update_fn(dynamic, ptr.data)
                dynamic_hidden = self.dynamic_encoder(dynamic)

                # Since we compute the VRP in minibatches, some tours may have
                # number of stops. We force the vehicles to remain at the depot 
                # in these cases, and logp := 0
                is_done = dynamic[:, 1].sum(1).eq(0).float()
                logp = logp * (1. - is_done)

            # And update the mask so we don't re-visit if we don't need to
            if self.mask_fn is not None:
                mask = self.mask_fn(mask, dynamic, ptr.data).detach()

            tour_logp.append(logp.unsqueeze(1))
            tour_idx.append(ptr.data.unsqueeze(1))

            decoder_input = torch.gather(static, 2,
                                         ptr.view(-1, 1, 1)
                                         .expand(-1, input_size, 1)).detach()

        tour_idx = torch.cat(tour_idx, dim=1)  # (batch_size, seq_len)
        tour_logp = torch.cat(tour_logp, dim=1)  # (batch_size, seq_len)

        return tour_idx, tour_logp


if __name__ == '__main__':
    raise Exception('Cannot be called from main')


================================================
FILE: parameter_transfer.py
================================================
import torch
import os
from model import DRL4TSP, Encoder
import argparse
from tasks import motsp
from trainer_motsp_transfer import StateCritic

'''
This file is used to test. It has been obsoleted
This file is used to convert the trained single-TSP PN model to the parameters from which we can transfer.
The trained single-TSP PN model can be found here: https://github.com/mveres01/pytorch-drl4vrp. Save it as "tsp20".
Then the start-up parameters for the first subproblem of the MOTSP to transfer can be obtained.
'''


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
STATIC_SIZE_original = 2  # (x, y)
STATIC_SIZE = 3  # (x, y)
DYNAMIC_SIZE = 1  # dummy for compatibility
update_fn = None
hidden_size = 128
num_layers = 1
dropout = 0.1
checkpoint = "tsp20"
actor = DRL4TSP(STATIC_SIZE_original,
                DYNAMIC_SIZE,
                hidden_size,
                update_fn,
                motsp.update_mask,
                num_layers,
                dropout).to(device)

critic = StateCritic(STATIC_SIZE_original, DYNAMIC_SIZE, hidden_size).to(device)
# 加载原128*2*1的原模型
path = os.path.join(checkpoint, 'actor.pt')
actor.load_state_dict(torch.load(path, device))

path = os.path.join(checkpoint, 'critic.pt')
critic.load_state_dict(torch.load(path, device))
# 其中actor的static_encoder，decoder需要更改维度，critic需要更改维度
# static_encoder
static_parameter = actor.static_encoder.state_dict()
temp = static_parameter['conv.weight']
temp = torch.cat([temp, temp[:,1,:].unsqueeze(1)], dim=1)   # 在第二维拓展一列
static_parameter['conv.weight'] = temp
actor.static_encoder = Encoder(STATIC_SIZE, hidden_size)
actor.static_encoder.load_state_dict(static_parameter)
# decoder
static_parameter = actor.decoder.state_dict()
temp = static_parameter['conv.weight']
temp = torch.cat([temp, temp[:,1,:].unsqueeze(1)], dim=1)   # 在第二维拓展一列
static_parameter['conv.weight'] = temp
actor.decoder = Encoder(STATIC_SIZE, hidden_size)
actor.decoder.load_state_dict(static_parameter)

# CRITIC
static_parameter = critic.static_encoder.state_dict()
temp = static_parameter['conv.weight']
temp = torch.cat([temp, temp[:,1,:].unsqueeze(1)], dim=1)   # 在第二维拓展一列
static_parameter['conv.weight'] = temp
critic.static_encoder = Encoder(STATIC_SIZE, hidden_size)
critic.static_encoder.load_state_dict(static_parameter)

save_path = os.path.join("modified_checkpoint_3obj", 'actor.pt')
torch.save(actor.state_dict(), save_path)
save_path = os.path.join("modified_checkpoint_3obj", 'critic.pt')
torch.save(critic.state_dict(), save_path)

print(actor,critic)


================================================
FILE: tasks/motsp.py
================================================
"""Defines the main task for the TSP

The TSP is defined by the following traits:
    1. Each city in the list must be visited once and only once
    2. The salesman must return to the original node at the end of the tour

Since the TSP doesn't have dynamic elements, we return an empty list on
__getitem__, which gets processed in trainer.py to be None

"""

import os
import numpy as np
import torch
from torch.utils.data import Dataset
import matplotlib
# matplotlib.use('Agg')
import matplotlib.pyplot as plt


class TSPDataset(Dataset):

    def __init__(self, size=50, num_samples=1e6, seed=None):
        super(TSPDataset, self).__init__()

        if seed is None:
            seed = np.random.randint(123456789)

        np.random.seed(seed)
        torch.manual_seed(seed)
        self.dataset = torch.rand((num_samples, 4, size))
        self.dynamic = torch.zeros(num_samples, 1, size)
        self.num_nodes = size
        self.size = num_samples


    def __len__(self):
        return self.size

    def __getitem__(self, idx):
        # (static, dynamic, start_loc)
        return (self.dataset[idx], self.dynamic[idx], [])


def update_mask(mask, dynamic, chosen_idx):
    """Marks the visited city, so it can't be selected a second time."""
    mask.scatter_(1, chosen_idx.unsqueeze(1), 0)
    return mask


def reward(static, tour_indices, w1=1, w2=0):
    """
    Parameters
    ----------
    static: torch.FloatTensor containing static (e.g. x, y) data
    tour_indices: torch.IntTensor of size (batch_size, num_cities)

    Returns
    -------
    Euclidean distance between consecutive nodes on the route. of size
    (batch_size, num_cities)
    """

    # Convert the indices back into a tour
    idx = tour_indices.unsqueeze(1).expand_as(static)
    tour = torch.gather(static.data, 2, idx).permute(0, 2, 1)

    # Make a full tour by returning to the start
    y = torch.cat((tour, tour[:, :1]), dim=1)
    # first 2 is xy coordinate, third column is another obj
    y_dis = y[:, :, :2]
    y_dis2 = y[:, :, 2:]

    # Euclidean distance between each consecutive point
    tour_len = torch.sqrt(torch.sum(torch.pow(y_dis[:, :-1] - y_dis[:, 1:], 2), dim=2))
    obj1 = tour_len.sum(1).detach()

    tour_len2 = torch.sqrt(torch.sum(torch.pow(y_dis2[:, :-1] - y_dis2[:, 1:], 2), dim=2))
    obj2 = tour_len2.sum(1).detach()

    obj = w1*obj1 + w2*obj2
    return obj, obj1, obj2


def render(static, tour_indices, save_path):
    """Plots the found tours."""

    plt.close('all')

    num_plots = 3 if int(np.sqrt(len(tour_indices))) >= 3 else 1

    _, axes = plt.subplots(nrows=num_plots, ncols=num_plots,
                           sharex='col', sharey='row')

    if num_plots == 1:
        axes = [[axes]]
    axes = [a for ax in axes for a in ax]

    for i, ax in enumerate(axes):

        # Convert the indices back into a tour
        idx = tour_indices[i]
        if len(idx.size()) == 1:
            idx = idx.unsqueeze(0)

        # End tour at the starting index
        idx = idx.expand(static.size(1), -1)
        idx = torch.cat((idx, idx[:, 0:1]), dim=1)

        data = torch.gather(static[i].data, 1, idx).cpu().numpy()

        #plt.subplot(num_plots, num_plots, i + 1)
        ax.plot(data[0], data[1], zorder=1)
        ax.scatter(data[0], data[1], s=4, c='r', zorder=2)
        ax.scatter(data[0, 0], data[1, 0], s=20, c='k', marker='*', zorder=3)

        ax.set_xlim(0, 1)
        ax.set_ylim(0, 1)

    plt.tight_layout()
    plt.savefig(save_path, bbox_inches='tight', dpi=400)


================================================
FILE: tasks/tsp.py
================================================
"""Defines the main task for the TSP

The TSP is defined by the following traits:
    1. Each city in the list must be visited once and only once
    2. The salesman must return to the original node at the end of the tour

Since the TSP doesn't have dynamic elements, we return an empty list on
__getitem__, which gets processed in trainer.py to be None

"""

import os
import numpy as np
import torch
from torch.utils.data import Dataset
import matplotlib
# matplotlib.use('Agg')
import matplotlib.pyplot as plt


class TSPDataset(Dataset):

    def __init__(self, size=50, num_samples=1e6, seed=None):
        super(TSPDataset, self).__init__()

        if seed is None:
            seed = np.random.randint(123456789)

        np.random.seed(seed)
        torch.manual_seed(seed)
        self.dataset = torch.rand((num_samples, 2, size))
        self.dynamic = torch.zeros(num_samples, 1, size)
        self.num_nodes = size
        self.size = num_samples

    def __len__(self):
        return self.size

    def __getitem__(self, idx):
        # (static, dynamic, start_loc)
        return (self.dataset[idx], self.dynamic[idx], [])


def update_mask(mask, dynamic, chosen_idx):
    """Marks the visited city, so it can't be selected a second time."""
    mask.scatter_(1, chosen_idx.unsqueeze(1), 0)
    return mask


def reward(static, tour_indices):
    """
    Parameters
    ----------
    static: torch.FloatTensor containing static (e.g. x, y) data
    tour_indices: torch.IntTensor of size (batch_size, num_cities)

    Returns
    -------
    Euclidean distance between consecutive nodes on the route. of size
    (batch_size, num_cities)
    """

    # Convert the indices back into a tour
    idx = tour_indices.unsqueeze(1).expand_as(static)
    tour = torch.gather(static.data, 2, idx).permute(0, 2, 1)

    # Make a full tour by returning to the start
    y = torch.cat((tour, tour[:, :1]), dim=1)

    # Euclidean distance between each consecutive point
    tour_len = torch.sqrt(torch.sum(torch.pow(y[:, :-1] - y[:, 1:], 2), dim=2))

    return tour_len.sum(1).detach()


def render(static, tour_indices, save_path):
    """Plots the found tours."""

    plt.close('all')

    num_plots = 3 if int(np.sqrt(len(tour_indices))) >= 3 else 1

    _, axes = plt.subplots(nrows=num_plots, ncols=num_plots,
                           sharex='col', sharey='row')

    if num_plots == 1:
        axes = [[axes]]
    axes = [a for ax in axes for a in ax]

    for i, ax in enumerate(axes):

        # Convert the indices back into a tour
        idx = tour_indices[i]
        if len(idx.size()) == 1:
            idx = idx.unsqueeze(0)

        # End tour at the starting index
        idx = idx.expand(static.size(1), -1)
        idx = torch.cat((idx, idx[:, 0:1]), dim=1)

        data = torch.gather(static[i].data, 1, idx).cpu().numpy()

        #plt.subplot(num_plots, num_plots, i + 1)
        ax.plot(data[0], data[1], zorder=1)
        ax.scatter(data[0], data[1], s=4, c='r', zorder=2)
        ax.scatter(data[0, 0], data[1, 0], s=20, c='k', marker='*', zorder=3)

        ax.set_xlim(0, 1)
        ax.set_ylim(0, 1)

    plt.tight_layout()
    plt.savefig(save_path, bbox_inches='tight', dpi=400)


================================================
FILE: tasks/vrp.py
================================================
"""Defines the main task for the VRP.

The VRP is defined by the following traits:
    1. Each city has a demand in [1, 9], which must be serviced by the vehicle
    2. Each vehicle has a capacity (depends on problem), the must visit all cities
    3. When the vehicle load is 0, it __must__ return to the depot to refill
"""

import os
import numpy as np
import torch
from torch.utils.data import Dataset
from torch.autograd import Variable
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt


class VehicleRoutingDataset(Dataset):
    def __init__(self, num_samples, input_size, max_load=20, max_demand=9,
                 seed=None):
        super(VehicleRoutingDataset, self).__init__()

        if max_load < max_demand:
            raise ValueError(':param max_load: must be > max_demand')

        if seed is None:
            seed = np.random.randint(1234567890)
        np.random.seed(seed)
        torch.manual_seed(seed)

        self.num_samples = num_samples
        self.max_load = max_load
        self.max_demand = max_demand

        # Depot location will be the first node in each
        locations = torch.rand((num_samples, 2, input_size + 1))
        self.static = locations

        # All states will broadcast the drivers current load
        # Note that we only use a load between [0, 1] to prevent large
        # numbers entering the neural network
        dynamic_shape = (num_samples, 1, input_size + 1)
        loads = torch.full(dynamic_shape, 1.)

        # All states will have their own intrinsic demand in [1, max_demand), 
        # then scaled by the maximum load. E.g. if load=10 and max_demand=30, 
        # demands will be scaled to the range (0, 3)
        #######################
        # demands = torch.randint(1, max_demand + 1, dynamic_shape)
        demands = torch.randint(1, max_demand + 1, dynamic_shape).float()
        demands = demands / float(max_load)

        demands[:, 0, 0] = 0  # depot starts with a demand of 0
        self.dynamic = torch.tensor(np.concatenate((loads, demands), axis=1))

    def __len__(self):
        return self.num_samples

    def __getitem__(self, idx):
        # (static, dynamic, start_loc)
        return (self.static[idx], self.dynamic[idx], self.static[idx, :, 0:1])

    def update_mask(self, mask, dynamic, chosen_idx=None):
        """Updates the mask used to hide non-valid states.

        Parameters
        ----------
        dynamic: torch.autograd.Variable of size (1, num_feats, seq_len)
        """

        # Convert floating point to integers for calculations
        loads = dynamic.data[:, 0]  # (batch_size, seq_len)
        demands = dynamic.data[:, 1]  # (batch_size, seq_len)

        # If there is no positive demand left, we can end the tour.
        # Note that the first node is the depot, which always has a negative demand
        if demands.eq(0).all():
            return demands * 0.

        # Otherwise, we can choose to go anywhere where demand is > 0
        new_mask = demands.ne(0) * demands.lt(loads)

        # We should avoid traveling to the depot back-to-back
        repeat_home = chosen_idx.ne(0)

        if repeat_home.any():
            new_mask[repeat_home.nonzero(), 0] = 1.
        if (1 - repeat_home).any():
            new_mask[(1 - repeat_home).nonzero(), 0] = 0.

        # ... unless we're waiting for all other samples in a minibatch to finish
        has_no_load = loads[:, 0].eq(0).float()
        has_no_demand = demands[:, 1:].sum(1).eq(0).float()

        combined = (has_no_load + has_no_demand).gt(0)
        if combined.any():
            new_mask[combined.nonzero(), 0] = 1.
            new_mask[combined.nonzero(), 1:] = 0.

        return new_mask.float()

    def update_dynamic(self, dynamic, chosen_idx):
        """Updates the (load, demand) dataset values."""

        # Update the dynamic elements differently for if we visit depot vs. a city
        visit = chosen_idx.ne(0)
        depot = chosen_idx.eq(0)

        # Clone the dynamic variable so we don't mess up graph
        all_loads = dynamic[:, 0].clone()
        all_demands = dynamic[:, 1].clone()

        load = torch.gather(all_loads, 1, chosen_idx.unsqueeze(1))
        demand = torch.gather(all_demands, 1, chosen_idx.unsqueeze(1))

        # Across the minibatch - if we've chosen to visit a city, try to satisfy
        # as much demand as possible
        if visit.any():

            new_load = torch.clamp(load - demand, min=0)
            new_demand = torch.clamp(demand - load, min=0)

            # Broadcast the load to all nodes, but update demand seperately
            visit_idx = visit.nonzero().squeeze()

            all_loads[visit_idx] = new_load[visit_idx]
            all_demands[visit_idx, chosen_idx[visit_idx]] = new_demand[visit_idx].view(-1)
            all_demands[visit_idx, 0] = -1. + new_load[visit_idx].view(-1)

        # Return to depot to fill vehicle load
        if depot.any():
            all_loads[depot.nonzero().squeeze()] = 1.
            all_demands[depot.nonzero().squeeze(), 0] = 0.

        tensor = torch.cat((all_loads.unsqueeze(1), all_demands.unsqueeze(1)), 1)
        return torch.tensor(tensor.data, device=dynamic.device)


def reward(static, tour_indices):
    """
    Euclidean distance between all cities / nodes given by tour_indices
    """

    # Convert the indices back into a tour
    idx = tour_indices.unsqueeze(1).expand(-1, static.size(1), -1)
    tour = torch.gather(static.data, 2, idx).permute(0, 2, 1)

    # Ensure we're always returning to the depot - note the extra concat
    # won't add any extra loss, as the euclidean distance between consecutive
    # points is 0
    start = static.data[:, :, 0].unsqueeze(1)
    y = torch.cat((start, tour, start), dim=1)

    # Euclidean distance between each consecutive point
    tour_len = torch.sqrt(torch.sum(torch.pow(y[:, :-1] - y[:, 1:], 2), dim=2))

    return tour_len.sum(1)


def render(static, tour_indices, save_path):
    """Plots the found solution."""

    plt.close('all')

    num_plots = 3 if int(np.sqrt(len(tour_indices))) >= 3 else 1

    _, axes = plt.subplots(nrows=num_plots, ncols=num_plots,
                           sharex='col', sharey='row')

    if num_plots == 1:
        axes = [[axes]]
    axes = [a for ax in axes for a in ax]

    for i, ax in enumerate(axes):

        # Convert the indices back into a tour
        idx = tour_indices[i]
        if len(idx.size()) == 1:
            idx = idx.unsqueeze(0)

        idx = idx.expand(static.size(1), -1)
        data = torch.gather(static[i].data, 1, idx).cpu().numpy()

        start = static[i, :, 0].cpu().data.numpy()
        x = np.hstack((start[0], data[0], start[0]))
        y = np.hstack((start[1], data[1], start[1]))

        # Assign each subtour a different colour & label in order traveled
        idx = np.hstack((0, tour_indices[i].cpu().numpy().flatten(), 0))
        where = np.where(idx == 0)[0]

        for j in range(len(where) - 1):

            low = where[j]
            high = where[j + 1]

            if low + 1 == high:
                continue

            ax.plot(x[low: high + 1], y[low: high + 1], zorder=1, label=j)

        ax.legend(loc="upper right", fontsize=3, framealpha=0.5)
        ax.scatter(x, y, s=4, c='r', zorder=2)
        ax.scatter(x[0], y[0], s=20, c='k', marker='*', zorder=3)

        ax.set_xlim(0, 1)
        ax.set_ylim(0, 1)

    plt.tight_layout()
    plt.savefig(save_path, bbox_inches='tight', dpi=200)


'''
def render(static, tour_indices, save_path):
    """Plots the found solution."""

    path = 'C:/Users/Matt/Documents/ffmpeg-3.4.2-win64-static/bin/ffmpeg.exe'
    plt.rcParams['animation.ffmpeg_path'] = path

    plt.close('all')

    num_plots = min(int(np.sqrt(len(tour_indices))), 3)
    fig, axes = plt.subplots(nrows=num_plots, ncols=num_plots,
                             sharex='col', sharey='row')
    axes = [a for ax in axes for a in ax]

    all_lines = []
    all_tours = []
    for i, ax in enumerate(axes):

        # Convert the indices back into a tour
        idx = tour_indices[i]
        if len(idx.size()) == 1:
            idx = idx.unsqueeze(0)

        idx = idx.expand(static.size(1), -1)
        data = torch.gather(static[i].data, 1, idx).cpu().numpy()

        start = static[i, :, 0].cpu().data.numpy()
        x = np.hstack((start[0], data[0], start[0]))
        y = np.hstack((start[1], data[1], start[1]))

        cur_tour = np.vstack((x, y))

        all_tours.append(cur_tour)
        all_lines.append(ax.plot([], [])[0])

        ax.scatter(x, y, s=4, c='r', zorder=2)
        ax.scatter(x[0], y[0], s=20, c='k', marker='*', zorder=3)

    from matplotlib.animation import FuncAnimation

    tours = all_tours

    def update(idx):

        for i, line in enumerate(all_lines):

            if idx >= tours[i].shape[1]:
                continue

            data = tours[i][:, idx]

            xy_data = line.get_xydata()
            xy_data = np.vstack((xy_data, np.atleast_2d(data)))

            line.set_data(xy_data[:, 0], xy_data[:, 1])
            line.set_linewidth(0.75)

        return all_lines

    anim = FuncAnimation(fig, update, init_func=None,
                         frames=100, interval=200, blit=False,
                         repeat=False)

    anim.save('line.mp4', dpi=160)
    plt.show()

    import sys
    sys.exit(1)
'''


================================================
FILE: trainer_motsp_no_transfer.py
================================================
"""Defines the main trainer model for combinatorial problems

Each task must define the following functions:
* mask_fn: can be None
* update_fn: can be None
* reward_fn: specifies the quality of found solutions
* render_fn: Specifies how to plot found solutions. Can be None
"""

import os
import time
import argparse
import datetime
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader

from model import DRL4TSP, Encoder

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#device = torch.device('cpu')


class StateCritic(nn.Module):
    """Estimates the problem complexity.

    This is a basic module that just looks at the log-probabilities predicted by
    the encoder + decoder, and returns an estimate of complexity
    """

    def __init__(self, static_size, dynamic_size, hidden_size):
        super(StateCritic, self).__init__()

        self.static_encoder = Encoder(static_size, hidden_size)
        self.dynamic_encoder = Encoder(dynamic_size, hidden_size)

        # Define the encoder & decoder models
        self.fc1 = nn.Conv1d(hidden_size * 2, 20, kernel_size=1)
        self.fc2 = nn.Conv1d(20, 20, kernel_size=1)
        self.fc3 = nn.Conv1d(20, 1, kernel_size=1)

        for p in self.parameters():
            if len(p.shape) > 1:
                nn.init.xavier_uniform_(p)

    def forward(self, static, dynamic):

        # Use the probabilities of visiting each
        static_hidden = self.static_encoder(static)
        dynamic_hidden = self.dynamic_encoder(dynamic)

        hidden = torch.cat((static_hidden, dynamic_hidden), 1)

        output = F.relu(self.fc1(hidden))
        output = F.relu(self.fc2(output))
        output = self.fc3(output).sum(dim=2)
        return output


class Critic(nn.Module):
    """Estimates the problem complexity.

    This is a basic module that just looks at the log-probabilities predicted by
    the encoder + decoder, and returns an estimate of complexity
    """

    def __init__(self, hidden_size):
        super(Critic, self).__init__()

        # Define the encoder & decoder models
        self.fc1 = nn.Conv1d(1, hidden_size, kernel_size=1)
        self.fc2 = nn.Conv1d(hidden_size, 20, kernel_size=1)
        self.fc3 = nn.Conv1d(20, 1, kernel_size=1)

        for p in self.parameters():
            if len(p.shape) > 1:
                nn.init.xavier_uniform_(p)

    def forward(self, input):

        output = F.relu(self.fc1(input.unsqueeze(1)))
        output = F.relu(self.fc2(output)).squeeze(2)
        output = self.fc3(output).sum(dim=2)
        return output


def validate(data_loader, actor, reward_fn, w1, w2, render_fn=None, save_dir='.',
             num_plot=5):
    """Used to monitor progress on a validation set & optionally plot solution."""

    actor.eval()

    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    rewards = []
    obj1s = []
    obj2s = []
    for batch_idx, batch in enumerate(data_loader):

        static, dynamic, x0 = batch

        static = static.to(device)
        dynamic = dynamic.to(device)
        x0 = x0.to(device) if len(x0) > 0 else None

        with torch.no_grad():
            tour_indices, _ = actor.forward(static, dynamic, x0)

        reward, obj1, obj2 = reward_fn(static, tour_indices, w1, w2)

        rewards.append(torch.mean(reward.detach()).item())
        obj1s.append(torch.mean(obj1.detach()).item())
        obj2s.append(torch.mean(obj2.detach()).item())
        if render_fn is not None and batch_idx < num_plot:
            name = 'batch%d_%2.4f.png'%(batch_idx, torch.mean(reward.detach()).item())
            path = os.path.join(save_dir, name)
            render_fn(static, tour_indices, path)

    actor.train()
    return np.mean(rewards), np.mean(obj1s), np.mean(obj2s)


def train(actor, critic, w1, w2, task, num_nodes, train_data, valid_data, reward_fn,
          render_fn, batch_size, actor_lr, critic_lr, max_grad_norm,
          **kwargs):
    """Constructs the main actor & critic networks, and performs all training."""

    now = '%s' % datetime.datetime.now().time()
    now = now.replace(':', '_')
    bname = "_4static"
    save_dir = os.path.join(task+bname, '%d' % num_nodes, 'w_%2.2f_%2.2f' % (w1, w2), now)

    checkpoint_dir = os.path.join(save_dir, 'checkpoints')
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)

    actor_optim = optim.Adam(actor.parameters(), lr=actor_lr)
    critic_optim = optim.Adam(critic.parameters(), lr=critic_lr)

    train_loader = DataLoader(train_data, batch_size, True, num_workers=0)
    valid_loader = DataLoader(valid_data, batch_size, False, num_workers=0)

    best_params = None
    best_reward = np.inf

    for epoch in range(5):
        print("epoch %d start:"% epoch)
        actor.train()
        critic.train()

        times, losses, rewards, critic_rewards = [], [], [], []
        obj1s, obj2s = [], []

        epoch_start = time.time()
        start = epoch_start

        for batch_idx, batch in enumerate(train_loader):

            static, dynamic, x0 = batch

            static = static.to(device)
            dynamic = dynamic.to(device)
            x0 = x0.to(device) if len(x0) > 0 else None

            # Full forward pass through the dataset
            tour_indices, tour_logp = actor(static, dynamic, x0)

            # Sum the log probabilities for each city in the tour
            reward, obj1, obj2 = reward_fn(static, tour_indices, w1, w2)

            # Query the critic for an estimate of the reward
            critic_est = critic(static, dynamic).view(-1)

            advantage = (reward - critic_est)
            actor_loss = torch.mean(advantage.detach() * tour_logp.sum(dim=1))
            critic_loss = torch.mean(advantage ** 2)

            actor_optim.zero_grad()
            actor_loss.backward()
            torch.nn.utils.clip_grad_norm_(actor.parameters(), max_grad_norm)
            actor_optim.step()

            critic_optim.zero_grad()
            critic_loss.backward()
            torch.nn.utils.clip_grad_norm_(critic.parameters(), max_grad_norm)
            critic_optim.step()

            critic_rewards.append(torch.mean(critic_est.detach()).item())
            rewards.append(torch.mean(reward.detach()).item())
            losses.append(torch.mean(actor_loss.detach()).item())
            obj1s.append(torch.mean(obj1.detach()).item())
            obj2s.append(torch.mean(obj2.detach()).item())
            if (batch_idx + 1) % 200 == 0:
                print("\n")
                end = time.time()
                times.append(end - start)
                start = end

                mean_loss = np.mean(losses[-100:])
                mean_reward = np.mean(rewards[-100:])
                mean_obj1 = np.mean(obj1s[-100:])
                mean_obj2 = np.mean(obj2s[-100:])
                print('  Batch %d/%d, reward: %2.3f, obj1: %2.3f, obj2: %2.3f, loss: %2.4f, took: %2.4fs' %
                      (batch_idx, len(train_loader), mean_reward, mean_obj1, mean_obj2, mean_loss,
                       times[-1]))

        mean_loss = np.mean(losses)
        mean_reward = np.mean(rewards)

        # Save the weights
        epoch_dir = os.path.join(checkpoint_dir, '%s' % epoch)
        if not os.path.exists(epoch_dir):
            os.makedirs(epoch_dir)

        save_path = os.path.join(epoch_dir, 'actor.pt')
        torch.save(actor.state_dict(), save_path)

        save_path = os.path.join(epoch_dir, 'critic.pt')
        torch.save(critic.state_dict(), save_path)

        # Save rendering of validation set tours
        valid_dir = os.path.join(save_dir, '%s' % epoch)

        print("begin valid")
        s = time.time()
        mean_valid, mean_obj1_valid, mean_obj2_valid = validate(valid_loader, actor, reward_fn, w1, w2, render_fn,
                              valid_dir, num_plot=5)
        print("valid end time: %2.4f" % (time.time()-s) )
        # Save best model parameters
        if mean_valid < best_reward:

            best_reward = mean_valid

            # save_path = os.path.join(save_dir, 'actor.pt')
            # torch.save(actor.state_dict(), save_path)
            #
            # save_path = os.path.join(save_dir, 'critic.pt')
            # torch.save(critic.state_dict(), save_path)
            # 存在w_1_0主文件夹下，多存一份，用来transfer to next w
            main_dir = os.path.join(task+bname, '%d' % num_nodes, 'w_%2.2f_%2.2f' % (w1, w2))
            save_path = os.path.join(main_dir, 'actor.pt')
            torch.save(actor.state_dict(), save_path)
            save_path = os.path.join(main_dir, 'critic.pt')
            torch.save(critic.state_dict(), save_path)

        print('Mean epoch loss/reward: %2.4f, %2.4f, %2.4f, obj1_valid: %2.3f, obj2_valid: %2.3f. took: %2.4fs '\
              '(%2.4fs / 100 batches)\n' % \
              (mean_loss, mean_reward, mean_valid, mean_obj1_valid, mean_obj2_valid, time.time() - epoch_start,
              np.mean(times)))


def train_tsp(args, w1=1, w2=0, checkpoint = None):

    # Goals from paper:
    # TSP20, 3.97
    # TSP50, 6.08
    # TSP100, 8.44

    from tasks import motsp
    from tasks.motsp import TSPDataset

    STATIC_SIZE = 4 # (x, y)
    DYNAMIC_SIZE = 1 # dummy for compatibility

    train_data = TSPDataset(args.num_nodes, args.train_size, args.seed)
    valid_data = TSPDataset(args.num_nodes, args.valid_size, args.seed + 1)

    update_fn = None

    actor = DRL4TSP(STATIC_SIZE,
                    DYNAMIC_SIZE,
                    args.hidden_size,
                    update_fn,
                    motsp.update_mask,
                    args.num_layers,
                    args.dropout).to(device)

    critic = StateCritic(STATIC_SIZE, DYNAMIC_SIZE, args.hidden_size).to(device)

    kwargs = vars(args)
    kwargs['train_data'] = train_data
    kwargs['valid_data'] = valid_data
    kwargs['reward_fn'] = motsp.reward
    kwargs['render_fn'] = motsp.render

    if checkpoint:
        path = os.path.join(checkpoint, 'actor.pt')
        actor.load_state_dict(torch.load(path, device))
        # actor.static_encoder.state_dict().get("conv.weight").size()
        path = os.path.join(checkpoint, 'critic.pt')
        critic.load_state_dict(torch.load(path, device))

    if not args.test:
        train(actor, critic, w1, w2, **kwargs)

    test_data = TSPDataset(args.num_nodes, args.valid_size, args.seed + 2)

    test_dir = 'test'
    test_loader = DataLoader(test_data, args.valid_size, False, num_workers=0)
    out = validate(test_loader, actor, motsp.reward, w1, w2, motsp.render, test_dir, num_plot=5)

    print('w1=%2.2f,w2=%2.2f. Average tour length: ' % (w1, w2), out)


def train_vrp(args):

    # Goals from paper:
    # VRP10, Capacity 20:  4.84  (Greedy)
    # VRP20, Capacity 30:  6.59  (Greedy)
    # VRP50, Capacity 40:  11.39 (Greedy)
    # VRP100, Capacity 50: 17.23  (Greedy)

    from tasks import vrp
    from tasks.vrp import VehicleRoutingDataset

    # Determines the maximum amount of load for a vehicle based on num nodes
    LOAD_DICT = {10: 20, 20: 30, 50: 40, 100: 50}
    MAX_DEMAND = 9
    STATIC_SIZE = 2 # (x, y)
    DYNAMIC_SIZE = 2 # (load, demand)

    max_load = LOAD_DICT[args.num_nodes]

    train_data = VehicleRoutingDataset(args.train_size,
                                       args.num_nodes,
                                       max_load,
                                       MAX_DEMAND,
                                       args.seed)

    valid_data = VehicleRoutingDataset(args.valid_size,
                                       args.num_nodes,
                                       max_load,
                                       MAX_DEMAND,
                                       args.seed + 1)

    actor = DRL4TSP(STATIC_SIZE,
                    DYNAMIC_SIZE,
                    args.hidden_size,
                    train_data.update_dynamic,
                    train_data.update_mask,
                    args.num_layers,
                    args.dropout).to(device)

    critic = StateCritic(STATIC_SIZE, DYNAMIC_SIZE, args.hidden_size).to(device)

    kwargs = vars(args)
    kwargs['train_data'] = train_data
    kwargs['valid_data'] = valid_data
    kwargs['reward_fn'] = vrp.reward
    kwargs['render_fn'] = vrp.render

    if args.checkpoint:
        path = os.path.join(args.checkpoint, 'actor.pt')
        actor.load_state_dict(torch.load(path, device))

        path = os.path.join(args.checkpoint, 'critic.pt')
        critic.load_state_dict(torch.load(path, device))

    if not args.test:
        train(actor, critic, **kwargs)

    test_data = VehicleRoutingDataset(args.valid_size,
                                      args.num_nodes,
                                      max_load,
                                      MAX_DEMAND,
                                      args.seed + 2)

    test_dir = 'test'
    test_loader = DataLoader(test_data, args.batch_size, False, num_workers=0)
    out = validate(test_loader, actor, vrp.reward, vrp.render, test_dir, num_plot=5)

    print('Average tour length: ', out)


if __name__ == '__main__':

    parser = argparse.ArgumentParser(description='Combinatorial Optimization')
    parser.add_argument('--seed', default=12345, type=int)
    # parser.add_argument('--checkpoint', default="tsp/20/w_1_0/20_06_30.888074")
    parser.add_argument('--test', action='store_true', default=False)
    parser.add_argument('--task', default='tsp')
    parser.add_argument('--nodes', dest='num_nodes', default=40, type=int)
    parser.add_argument('--actor_lr', default=5e-4, type=float)
    parser.add_argument('--critic_lr', default=5e-4, type=float)
    parser.add_argument('--max_grad_norm', default=2., type=float)
    parser.add_argument('--batch_size', default=200, type=int)
    parser.add_argument('--hidden', dest='hidden_size', default=128, type=int)
    parser.add_argument('--dropout', default=0.1, type=float)
    parser.add_argument('--layers', dest='num_layers', default=1, type=int)
    parser.add_argument('--train-size',default=500000, type=int)
    parser.add_argument('--valid-size', default=1000, type=int)

    args = parser.parse_args()

    # Trained without transfer

    if args.task == 'tsp':
        w2_list = np.arange(101)/100
        for i in range(0,101):
            print("Current w:%2.2f/%2.2f"% (1-w2_list[i], w2_list[i]))
            train_tsp(args, 1-w2_list[i], w2_list[i], None)

    elif args.task == 'vrp':
        train_vrp(args)
    else:
        raise ValueError('Task <%s> not understood'%args.task)


================================================
FILE: trainer_motsp_transfer.py
================================================
"""Defines the main trainer model for combinatorial problems

Each task must define the following functions:
* mask_fn: can be None
* update_fn: can be None
* reward_fn: specifies the quality of found solutions
* render_fn: Specifies how to plot found solutions. Can be None
"""

import os
import time
import argparse
import datetime
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader

from model import DRL4TSP, Encoder

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# device = torch.device('cpu')


class StateCritic(nn.Module):
    """Estimates the problem complexity.

    This is a basic module that just looks at the log-probabilities predicted by
    the encoder + decoder, and returns an estimate of complexity
    """

    def __init__(self, static_size, dynamic_size, hidden_size):
        super(StateCritic, self).__init__()

        self.static_encoder = Encoder(static_size, hidden_size)
        self.dynamic_encoder = Encoder(dynamic_size, hidden_size)

        # Define the encoder & decoder models
        self.fc1 = nn.Conv1d(hidden_size * 2, 20, kernel_size=1)
        self.fc2 = nn.Conv1d(20, 20, kernel_size=1)
        self.fc3 = nn.Conv1d(20, 1, kernel_size=1)

        for p in self.parameters():
            if len(p.shape) > 1:
                nn.init.xavier_uniform_(p)

    def forward(self, static, dynamic):

        # Use the probabilities of visiting each
        static_hidden = self.static_encoder(static)
        dynamic_hidden = self.dynamic_encoder(dynamic)

        hidden = torch.cat((static_hidden, dynamic_hidden), 1)

        output = F.relu(self.fc1(hidden))
        output = F.relu(self.fc2(output))
        output = self.fc3(output).sum(dim=2)
        return output


class Critic(nn.Module):
    """Estimates the problem complexity.

    This is a basic module that just looks at the log-probabilities predicted by
    the encoder + decoder, and returns an estimate of complexity
    """

    def __init__(self, hidden_size):
        super(Critic, self).__init__()

        # Define the encoder & decoder models
        self.fc1 = nn.Conv1d(1, hidden_size, kernel_size=1)
        self.fc2 = nn.Conv1d(hidden_size, 20, kernel_size=1)
        self.fc3 = nn.Conv1d(20, 1, kernel_size=1)

        for p in self.parameters():
            if len(p.shape) > 1:
                nn.init.xavier_uniform_(p)

    def forward(self, input):

        output = F.relu(self.fc1(input.unsqueeze(1)))
        output = F.relu(self.fc2(output)).squeeze(2)
        output = self.fc3(output).sum(dim=2)
        return output


def validate(data_loader, actor, reward_fn, w1, w2, render_fn=None, save_dir='.',
             num_plot=5):
    """Used to monitor progress on a validation set & optionally plot solution."""

    actor.eval()

    # if not os.path.exists(save_dir):
    #     os.makedirs(save_dir)

    rewards = []
    obj1s = []
    obj2s = []
    for batch_idx, batch in enumerate(data_loader):

        static, dynamic, x0 = batch

        static = static.to(device)
        dynamic = dynamic.to(device)
        x0 = x0.to(device) if len(x0) > 0 else None

        with torch.no_grad():
            tour_indices, _ = actor.forward(static, dynamic, x0)

        reward, obj1, obj2 = reward_fn(static, tour_indices, w1, w2)

        rewards.append(torch.mean(reward.detach()).item())
        obj1s.append(torch.mean(obj1.detach()).item())
        obj2s.append(torch.mean(obj2.detach()).item())
        # if render_fn is not None and batch_idx < num_plot:
        #     name = 'batch%d_%2.4f.png'%(batch_idx, torch.mean(reward.detach()).item())
        #     path = os.path.join(save_dir, name)
        #     render_fn(static, tour_indices, path)

    actor.train()
    return np.mean(rewards), np.mean(obj1s), np.mean(obj2s)


def train(actor, critic, w1, w2, task, num_nodes, train_data, valid_data, reward_fn,
          render_fn, batch_size, actor_lr, critic_lr, max_grad_norm,
          **kwargs):
    """Constructs the main actor & critic networks, and performs all training."""

    now = '%s' % datetime.datetime.now().time()
    now = now.replace(':', '_')
    bname = "_transfer"
    save_dir = os.path.join(task+bname, '%d' % num_nodes, 'w_%2.2f_%2.2f' % (w1, w2), now)

    checkpoint_dir = os.path.join(save_dir, 'checkpoints')
    if not os.path.exists(checkpoint_dir):
         os.makedirs(checkpoint_dir)

    actor_optim = optim.Adam(actor.parameters(), lr=actor_lr)
    critic_optim = optim.Adam(critic.parameters(), lr=critic_lr)

    train_loader = DataLoader(train_data, batch_size, True, num_workers=0)
    valid_loader = DataLoader(valid_data, batch_size, False, num_workers=0)

    best_params = None
    best_reward = np.inf
    start_total = time.time()
    for epoch in range(3):
        print("epoch %d start:"% epoch)
        actor.train()
        critic.train()

        times, losses, rewards, critic_rewards = [], [], [], []
        obj1s, obj2s = [], []

        epoch_start = time.time()
        start = epoch_start

        for batch_idx, batch in enumerate(train_loader):

            static, dynamic, x0 = batch

            static = static.to(device)
            dynamic = dynamic.to(device)
            x0 = x0.to(device) if len(x0) > 0 else None

            # Full forward pass through the dataset
            tour_indices, tour_logp = actor(static, dynamic, x0)

            # Sum the log probabilities for each city in the tour
            reward, obj1, obj2 = reward_fn(static, tour_indices, w1, w2)

            # Query the critic for an estimate of the reward
            critic_est = critic(static, dynamic).view(-1)

            advantage = (reward - critic_est)
            actor_loss = torch.mean(advantage.detach() * tour_logp.sum(dim=1))
            critic_loss = torch.mean(advantage ** 2)

            actor_optim.zero_grad()
            actor_loss.backward()
            torch.nn.utils.clip_grad_norm_(actor.parameters(), max_grad_norm)
            actor_optim.step()

            critic_optim.zero_grad()
            critic_loss.backward()
            torch.nn.utils.clip_grad_norm_(critic.parameters(), max_grad_norm)
            critic_optim.step()

            critic_rewards.append(torch.mean(critic_est.detach()).item())
            rewards.append(torch.mean(reward.detach()).item())
            losses.append(torch.mean(actor_loss.detach()).item())
            obj1s.append(torch.mean(obj1.detach()).item())
            obj2s.append(torch.mean(obj2.detach()).item())
            if (batch_idx + 1) % 200 == 0:
                print("\n")
                end = time.time()
                times.append(end - start)
                start = end

                mean_loss = np.mean(losses[-100:])
                mean_reward = np.mean(rewards[-100:])
                mean_obj1 = np.mean(obj1s[-100:])
                mean_obj2 = np.mean(obj2s[-100:])
                print('  Batch %d/%d, reward: %2.3f, obj1: %2.3f, obj2: %2.3f, loss: %2.4f, took: %2.4fs' %
                      (batch_idx, len(train_loader), mean_reward, mean_obj1, mean_obj2, mean_loss,
                       times[-1]))

        mean_loss = np.mean(losses)
        mean_reward = np.mean(rewards)

        # Save the weights
        # epoch_dir = os.path.join(checkpoint_dir, '%s' % epoch)
        # if not os.path.exists(epoch_dir):
        #     os.makedirs(epoch_dir)
        #
        # save_path = os.path.join(epoch_dir, 'actor.pt')
        # torch.save(actor.state_dict(), save_path)
        #
        # save_path = os.path.join(epoch_dir, 'critic.pt')
        # torch.save(critic.state_dict(), save_path)

        # Save rendering of validation set tours
        # valid_dir = os.path.join(save_dir, '%s' % epoch)
        mean_valid, mean_obj1_valid, mean_obj2_valid = validate(valid_loader, actor, reward_fn, w1, w2, render_fn,
                              '.', num_plot=5)

        # Save best model parameters
        if mean_valid < best_reward:

            best_reward = mean_valid

            # save_path = os.path.join(save_dir, 'actor.pt')
            # torch.save(actor.state_dict(), save_path)
            #
            # save_path = os.path.join(save_dir, 'critic.pt')
            # torch.save(critic.state_dict(), save_path)
            # 存在w_1_0主文件夹下，多存一份，用来transfer to next w
            main_dir = os.path.join(task+bname, '%d' % num_nodes, 'w_%2.2f_%2.2f' % (w1, w2))
            save_path = os.path.join(main_dir, 'actor.pt')
            torch.save(actor.state_dict(), save_path)
            save_path = os.path.join(main_dir, 'critic.pt')
            torch.save(critic.state_dict(), save_path)

        print('Mean epoch loss/reward: %2.4f, %2.4f, %2.4f, obj1_valid: %2.3f, obj2_valid: %2.3f. took: %2.4fs '\
              '(%2.4fs / 100 batches)\n' % \
              (mean_loss, mean_reward, mean_valid, mean_obj1_valid, mean_obj2_valid, time.time() - epoch_start,
              np.mean(times)))
    print("Total run time of epoches: %2.4f" % (time.time() - start_total))


def train_tsp(args, w1=1, w2=0, checkpoint = None):

    # Goals from paper:
    # TSP20, 3.97
    # TSP50, 6.08
    # TSP100, 8.44

    from tasks import motsp
    from tasks.motsp import TSPDataset

    STATIC_SIZE = 4 # (x, y)
    DYNAMIC_SIZE = 1 # dummy for compatibility

    train_data = TSPDataset(args.num_nodes, args.train_size, args.seed)
    valid_data = TSPDataset(args.num_nodes, args.valid_size, args.seed + 1)

    update_fn = None

    actor = DRL4TSP(STATIC_SIZE,
                    DYNAMIC_SIZE,
                    args.hidden_size,
                    update_fn,
                    motsp.update_mask,
                    args.num_layers,
                    args.dropout).to(device)

    critic = StateCritic(STATIC_SIZE, DYNAMIC_SIZE, args.hidden_size).to(device)

    kwargs = vars(args)
    kwargs['train_data'] = train_data
    kwargs['valid_data'] = valid_data
    kwargs['reward_fn'] = motsp.reward
    kwargs['render_fn'] = motsp.render

    if checkpoint:
        path = os.path.join(checkpoint, 'actor.pt')
        actor.load_state_dict(torch.load(path, device))
        # actor.static_encoder.state_dict().get("conv.weight").size()
        path = os.path.join(checkpoint, 'critic.pt')
        critic.load_state_dict(torch.load(path, device))

    if not args.test:
        train(actor, critic, w1, w2, **kwargs)

    test_data = TSPDataset(args.num_nodes, args.valid_size, args.seed + 2)

    test_dir = 'test'
    test_loader = DataLoader(test_data, args.valid_size, False, num_workers=0)
    out = validate(test_loader, actor, motsp.reward, w1, w2, motsp.render, test_dir, num_plot=5)

    print('w1=%2.2f,w2=%2.2f. Average tour length: ' % (w1, w2), out)


def train_vrp(args):

    # Goals from paper:
    # VRP10, Capacity 20:  4.84  (Greedy)
    # VRP20, Capacity 30:  6.59  (Greedy)
    # VRP50, Capacity 40:  11.39 (Greedy)
    # VRP100, Capacity 50: 17.23  (Greedy)

    from tasks import vrp
    from tasks.vrp import VehicleRoutingDataset

    # Determines the maximum amount of load for a vehicle based on num nodes
    LOAD_DICT = {10: 20, 20: 30, 50: 40, 100: 50}
    MAX_DEMAND = 9
    STATIC_SIZE = 2 # (x, y)
    DYNAMIC_SIZE = 2 # (load, demand)

    max_load = LOAD_DICT[args.num_nodes]

    train_data = VehicleRoutingDataset(args.train_size,
                                       args.num_nodes,
                                       max_load,
                                       MAX_DEMAND,
                                       args.seed)

    valid_data = VehicleRoutingDataset(args.valid_size,
                                       args.num_nodes,
                                       max_load,
                                       MAX_DEMAND,
                                       args.seed + 1)

    actor = DRL4TSP(STATIC_SIZE,
                    DYNAMIC_SIZE,
                    args.hidden_size,
                    train_data.update_dynamic,
                    train_data.update_mask,
                    args.num_layers,
                    args.dropout).to(device)

    critic = StateCritic(STATIC_SIZE, DYNAMIC_SIZE, args.hidden_size).to(device)

    kwargs = vars(args)
    kwargs['train_data'] = train_data
    kwargs['valid_data'] = valid_data
    kwargs['reward_fn'] = vrp.reward
    kwargs['render_fn'] = vrp.render

    if args.checkpoint:
        path = os.path.join(args.checkpoint, 'actor.pt')
        actor.load_state_dict(torch.load(path, device))

        path = os.path.join(args.checkpoint, 'critic.pt')
        critic.load_state_dict(torch.load(path, device))

    if not args.test:
        train(actor, critic, **kwargs)

    test_data = VehicleRoutingDataset(args.valid_size,
                                      args.num_nodes,
                                      max_load,
                                      MAX_DEMAND,
                                      args.seed + 2)

    test_dir = 'test'
    test_loader = DataLoader(test_data, args.batch_size, False, num_workers=0)
    out = validate(test_loader, actor, vrp.reward, vrp.render, test_dir, num_plot=5)

    print('Average tour length: ', out)


if __name__ == '__main__':
    num_nodes = 100
    parser = argparse.ArgumentParser(description='Combinatorial Optimization')
    parser.add_argument('--seed', default=12345, type=int)
    # parser.add_argument('--checkpoint', default="tsp/20/w_1_0/20_06_30.888074")
    parser.add_argument('--test', action='store_true', default=False)
    parser.add_argument('--task', default='tsp')
    parser.add_argument('--nodes', dest='num_nodes', default=num_nodes, type=int)
    parser.add_argument('--actor_lr', default=5e-4, type=float)
    parser.add_argument('--critic_lr', default=5e-4, type=float)
    parser.add_argument('--max_grad_norm', default=2., type=float)
    parser.add_argument('--batch_size', default=200, type=int)
    parser.add_argument('--hidden', dest='hidden_size', default=128, type=int)
    parser.add_argument('--dropout', default=0.1, type=float)
    parser.add_argument('--layers', dest='num_layers', default=1, type=int)
    parser.add_argument('--train-size',default=120000, type=int)
    parser.add_argument('--valid-size', default=1000, type=int)

    args = parser.parse_args()


    T = 100
    if args.task == 'tsp':
        w2_list = np.arange(T+1)/T
        for i in range(0,T+1):
            print("Current w:%2.2f/%2.2f"% (1-w2_list[i], w2_list[i]))
            if i==0:
                # The first subproblem can be trained from scratch. It also can be trained based on a
                # single-TSP trained model, where the model can be obtained from everywhere in github
                checkpoint = 'tsp_transfer_100run_500000_5epoch_40city/40/w_1.00_0.00'
                train_tsp(args, 1, 0, checkpoint)
            else:
                # Parameter transfer. train based on the parameters of the previous subproblem
                checkpoint = 'tsp_transfer/%d/w_%2.2f_%2.2f'%(num_nodes, 1-w2_list[i-1], w2_list[i-1])
                train_tsp(args, 1-w2_list[i], w2_list[i], checkpoint)