Repository: YoZhibo/MSGNet Branch: main Commit: 953b8330a2ca Files: 29 Total size: 141.7 KB Directory structure: gitextract_1z7x72rx/ ├── README.md ├── data_provider/ │ ├── data_factory.py │ └── data_loader.py ├── exp/ │ ├── exp_basic.py │ ├── exp_main.py │ └── exp_stat.py ├── layers/ │ ├── AutoCorrelation.py │ ├── Autoformer_EncDec.py │ ├── Embed.py │ ├── MSGBlock.py │ ├── SelfAttention_Family.py │ └── Transformer_EncDec.py ├── models/ │ ├── Autoformer.py │ ├── DLinear.py │ ├── Informer.py │ └── MSGNet.py ├── run_longExp.py ├── scripts/ │ ├── ETTh1.sh │ ├── ETTh2.sh │ ├── ETTm1.sh │ ├── ETTm2.sh │ ├── Flight.sh │ ├── electricity.sh │ ├── exchange.sh │ └── weather.sh └── utils/ ├── masking.py ├── metrics.py ├── timefeatures.py └── tools.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: README.md ================================================ # MSGNet (AAAI2024) Paper Link:[MSGNet: Learning Multi-Scale Inter-Series Correlations for Multivariate Time Series Forecasting](https://arxiv.org/abs/2401.00423) ## Usage - Train and evaluate MSGNet - You can use the following command:`sh ./scripts/ETTh1.sh`. - Train your model - Add model file in the folder `./models/your_model.py`. - Add model in the ***class*** Exp_Main. - Flight dataset - You can obtain the dataset from [Google Drive](https://drive.google.com/drive/folders/1JSZByfM0Ghat3g_D3a-puTZ2JsfebNWL?usp=sharing). Then please place it in the folder `./dataset`. ## Model MSGNet employs several ScaleGraph blocks, each encompassing three pivotal modules: an FFT module for multi-scale data identification, an adaptive graph convolution module for inter-series correlation learning within a time scale, and a multi-head attention module for intra-series correlation learning.
## Main Results Forecast results with 96 review window and prediction length {96, 192, 336, 720}. The best result is represented in bold, followed by underline.
## Citation ``` @article{cai2023msgnet, title={MSGNet: Learning Multi-Scale Inter-Series Correlations for Multivariate Time Series Forecasting}, author={Cai, Wanlin and Liang, Yuxuan and Liu, Xianggen and Feng, Jianshuai and Wu, Yuankai}, journal={arXiv preprint arXiv:2401.00423}, year={2023} } ``` ## Acknowledgement We appreciate the valuable contributions of the following GitHub. - LTSF-Linear (https://github.com/cure-lab/LTSF-Linear) - TimesNet (https://github.com/thuml/TimesNet) - Time-Series-Library (https://github.com/thuml/Time-Series-Library) - MTGnn (https://github.com/nnzhan/MTGNN) - Autoformer (https://github.com/thuml/Autoformer) - Informer (https://github.com/zhouhaoyi/Informer2020) ================================================ FILE: data_provider/data_factory.py ================================================ from .data_loader import Dataset_ETT_hour, Dataset_ETT_minute, \ Dataset_Custom, Dataset_Pred,Dataset_Flight from torch.utils.data import DataLoader data_dict = { 'ETTh1': Dataset_ETT_hour, 'ETTh2': Dataset_ETT_hour, 'ETTm1': Dataset_ETT_minute, 'ETTm2': Dataset_ETT_minute, 'custom': Dataset_Custom, 'Flight':Dataset_Flight, } # flag = 'train' or 'val' or 'test' def data_provider(args, flag): Data = data_dict[args.data] #time features encoding, options: [timeF, fixed, learned] timeenc = 0 if args.embed != 'timeF' else 1 if flag == 'test': shuffle_flag = False drop_last = True batch_size = args.batch_size freq = args.freq elif flag == 'pred': shuffle_flag = False drop_last = False batch_size = 1 freq = args.freq Data = Dataset_Pred else: shuffle_flag = True drop_last = True batch_size = args.batch_size freq = args.freq data_set = Data( root_path=args.root_path, data_path=args.data_path, flag=flag, size=[args.seq_len, args.label_len, args.pred_len], features=args.features, target=args.target, timeenc=timeenc, freq=freq, seasonal_patterns = args.seasonal_patterns ) print(flag, len(data_set)) data_loader = DataLoader( data_set, batch_size=batch_size, shuffle=shuffle_flag, num_workers=args.num_workers, drop_last=drop_last) return data_set, data_loader ================================================ FILE: data_provider/data_loader.py ================================================ import os import numpy as np import pandas as pd import os import torch from torch.utils.data import Dataset, DataLoader from sklearn.preprocessing import StandardScaler from utils.timefeatures import time_features import warnings warnings.filterwarnings('ignore') #for Flight 4:4:2 split class Dataset_Flight(Dataset): def __init__(self, root_path, flag='train', size=None, features='S', data_path='Flight.csv', target='OT', scale=True, timeenc=0, freq='h',seasonal_patterns=None): # size [seq_len, label_len, pred_len] # info if size == None: self.seq_len = 24 * 4 * 4 self.label_len = 24 * 4 self.pred_len = 24 * 4 else: self.seq_len = size[0] self.label_len = size[1] self.pred_len = size[2] # init assert flag in ['train', 'test', 'val'] type_map = {'train': 0, 'val': 1, 'test': 2} self.set_type = type_map[flag] self.features = features self.target = target self.scale = scale self.timeenc = timeenc self.freq = freq self.root_path = root_path self.data_path = data_path self.__read_data__() def __read_data__(self): self.scaler = StandardScaler() df_raw = pd.read_csv(os.path.join(self.root_path, self.data_path)) ''' df_raw.columns: ['date', ...(other features), target feature] ''' cols = list(df_raw.columns) cols.remove(self.target) cols.remove('date') df_raw = df_raw[['date'] + cols + [self.target]] # print(cols) num_train = int(len(df_raw) * 0.4) num_test = int(len(df_raw) * 0.2) num_vali = len(df_raw) - num_train - num_test border1s = [0, num_train - self.seq_len, len(df_raw) - num_test - self.seq_len] border2s = [num_train, num_train + num_vali, len(df_raw)] border1 = border1s[self.set_type] border2 = border2s[self.set_type] if self.features == 'M' or self.features == 'MS': cols_data = df_raw.columns[1:] df_data = df_raw[cols_data] elif self.features == 'S': df_data = df_raw[[self.target]] if self.scale: train_data = df_data[border1s[0]:border2s[0]] self.scaler.fit(train_data.values) data = self.scaler.transform(df_data.values) else: data = df_data.values df_stamp = df_raw[['date']][border1:border2] df_stamp['date'] = pd.to_datetime(df_stamp.date) if self.timeenc == 0: df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1) df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1) df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1) df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1) data_stamp = df_stamp.drop(['date'], 1).values elif self.timeenc == 1: data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq) data_stamp = data_stamp.transpose(1, 0) self.data_x = data[border1:border2] self.data_y = data[border1:border2] self.data_stamp = data_stamp def __getitem__(self, index): s_begin = index s_end = s_begin + self.seq_len r_begin = s_end - self.label_len r_end = r_begin + self.label_len + self.pred_len seq_x = self.data_x[s_begin:s_end] seq_y = self.data_y[r_begin:r_end] seq_x_mark = self.data_stamp[s_begin:s_end] seq_y_mark = self.data_stamp[r_begin:r_end] return seq_x, seq_y, seq_x_mark, seq_y_mark def __len__(self): return len(self.data_x) - self.seq_len - self.pred_len + 1 def inverse_transform(self, data): return self.scaler.inverse_transform(data) class Dataset_Custom(Dataset): def __init__(self, root_path, flag='train', size=None, features='S', data_path='ETTh1.csv', target='OT', scale=True, timeenc=0, freq='h', seasonal_patterns=None): # size [seq_len, label_len, pred_len] # info if size == None: self.seq_len = 24 * 4 * 4 self.label_len = 24 * 4 self.pred_len = 24 * 4 else: self.seq_len = size[0] self.label_len = size[1] self.pred_len = size[2] # init assert flag in ['train', 'test', 'val'] type_map = {'train': 0, 'val': 1, 'test': 2} self.set_type = type_map[flag] self.features = features self.target = target self.scale = scale self.timeenc = timeenc self.freq = freq self.root_path = root_path self.data_path = data_path self.__read_data__() def __read_data__(self): self.scaler = StandardScaler() df_raw = pd.read_csv(os.path.join(self.root_path, self.data_path)) ''' df_raw.columns: ['date', ...(other features), target feature] ''' cols = list(df_raw.columns) cols.remove(self.target) cols.remove('date') df_raw = df_raw[['date'] + cols + [self.target]] # print(cols) num_train = int(len(df_raw) * 0.7) num_test = int(len(df_raw) * 0.2) num_vali = len(df_raw) - num_train - num_test border1s = [0, num_train - self.seq_len, len(df_raw) - num_test - self.seq_len] border2s = [num_train, num_train + num_vali, len(df_raw)] border1 = border1s[self.set_type] border2 = border2s[self.set_type] if self.features == 'M' or self.features == 'MS': cols_data = df_raw.columns[1:] df_data = df_raw[cols_data] elif self.features == 'S': df_data = df_raw[[self.target]] if self.scale: train_data = df_data[border1s[0]:border2s[0]] self.scaler.fit(train_data.values) # print(self.scaler.mean_) # exit() data = self.scaler.transform(df_data.values) else: data = df_data.values df_stamp = df_raw[['date']][border1:border2] df_stamp['date'] = pd.to_datetime(df_stamp.date) if self.timeenc == 0: df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1) df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1) df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1) df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1) data_stamp = df_stamp.drop(['date'], 1).values elif self.timeenc == 1: data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq) data_stamp = data_stamp.transpose(1, 0) self.data_x = data[border1:border2] self.data_y = data[border1:border2] self.data_stamp = data_stamp def __getitem__(self, index): s_begin = index s_end = s_begin + self.seq_len r_begin = s_end - self.label_len r_end = r_begin + self.label_len + self.pred_len seq_x = self.data_x[s_begin:s_end] seq_y = self.data_y[r_begin:r_end] seq_x_mark = self.data_stamp[s_begin:s_end] seq_y_mark = self.data_stamp[r_begin:r_end] return seq_x, seq_y, seq_x_mark, seq_y_mark def __len__(self): return len(self.data_x) - self.seq_len - self.pred_len + 1 def inverse_transform(self, data): return self.scaler.inverse_transform(data) class Dataset_Pred(Dataset): def __init__(self, root_path, flag='pred', size=None, features='S', data_path='ETTh1.csv', target='OT', scale=True, inverse=False, timeenc=0, freq='15min', seasonal_patterns=None,cols=None): # size [seq_len, label_len, pred_len] # info if size == None: self.seq_len = 24 * 4 * 4 self.label_len = 24 * 4 self.pred_len = 24 * 4 else: self.seq_len = size[0] self.label_len = size[1] self.pred_len = size[2] # init assert flag in ['pred'] self.features = features self.target = target self.scale = scale self.inverse = inverse self.timeenc = timeenc self.freq = freq self.cols = cols self.root_path = root_path self.data_path = data_path self.__read_data__() def __read_data__(self): self.scaler = StandardScaler() df_raw = pd.read_csv(os.path.join(self.root_path, self.data_path)) ''' df_raw.columns: ['date', ...(other features), target feature] ''' if self.cols: cols = self.cols.copy() cols.remove(self.target) else: cols = list(df_raw.columns) cols.remove(self.target) cols.remove('date') df_raw = df_raw[['date'] + cols + [self.target]] border1 = len(df_raw) - self.seq_len border2 = len(df_raw) if self.features == 'M' or self.features == 'MS': cols_data = df_raw.columns[1:] df_data = df_raw[cols_data] elif self.features == 'S': df_data = df_raw[[self.target]] if self.scale: self.scaler.fit(df_data.values) data = self.scaler.transform(df_data.values) else: data = df_data.values tmp_stamp = df_raw[['date']][border1:border2] tmp_stamp['date'] = pd.to_datetime(tmp_stamp.date) pred_dates = pd.date_range(tmp_stamp.date.values[-1], periods=self.pred_len + 1, freq=self.freq) df_stamp = pd.DataFrame(columns=['date']) df_stamp.date = list(tmp_stamp.date.values) + list(pred_dates[1:]) if self.timeenc == 0: df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1) df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1) df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1) df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1) df_stamp['minute'] = df_stamp.date.apply(lambda row: row.minute, 1) df_stamp['minute'] = df_stamp.minute.map(lambda x: x // 15) data_stamp = df_stamp.drop(['date'], 1).values elif self.timeenc == 1: data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq) data_stamp = data_stamp.transpose(1, 0) self.data_x = data[border1:border2] if self.inverse: self.data_y = df_data.values[border1:border2] else: self.data_y = data[border1:border2] self.data_stamp = data_stamp def __getitem__(self, index): s_begin = index s_end = s_begin + self.seq_len r_begin = s_end - self.label_len r_end = r_begin + self.label_len + self.pred_len seq_x = self.data_x[s_begin:s_end] if self.inverse: seq_y = self.data_x[r_begin:r_begin + self.label_len] else: seq_y = self.data_y[r_begin:r_begin + self.label_len] seq_x_mark = self.data_stamp[s_begin:s_end] seq_y_mark = self.data_stamp[r_begin:r_end] return seq_x, seq_y, seq_x_mark, seq_y_mark def __len__(self): return len(self.data_x) - self.seq_len + 1 def inverse_transform(self, data): return self.scaler.inverse_transform(data) class Dataset_ETT_hour(Dataset): def __init__(self, root_path, flag='train', size=None, features='S', data_path='ETTh1.csv', target='OT', scale=True, timeenc=0, freq='h', seasonal_patterns=None): # size [seq_len, label_len, pred_len] # info if size == None: self.seq_len = 24 * 4 * 4 self.label_len = 24 * 4 self.pred_len = 24 * 4 else: self.seq_len = size[0] self.label_len = size[1] self.pred_len = size[2] # init assert flag in ['train', 'test', 'val'] type_map = {'train': 0, 'val': 1, 'test': 2} self.set_type = type_map[flag] # M:multivariate predict multivariate, S:univariate predict univariate, MS:multivariate predict univariate' self.features = features self.target = target self.scale = scale self.timeenc = timeenc self.freq = freq self.root_path = root_path self.data_path = data_path self.__read_data__() def __read_data__(self): self.scaler = StandardScaler() df_raw = pd.read_csv(os.path.join(self.root_path, self.data_path)) border1s = [0, 12 * 30 * 24 - self.seq_len, 12 * 30 * 24 + 4 * 30 * 24 - self.seq_len] border2s = [12 * 30 * 24, 12 * 30 * 24 + 4 * 30 * 24, 12 * 30 * 24 + 8 * 30 * 24] border1 = border1s[self.set_type] border2 = border2s[self.set_type] if self.features == 'M' or self.features == 'MS': cols_data = df_raw.columns[1:] df_data = df_raw[cols_data] elif self.features == 'S': df_data = df_raw[[self.target]] if self.scale: train_data = df_data[border1s[0]:border2s[0]] self.scaler.fit(train_data.values) data = self.scaler.transform(df_data.values) else: data = df_data.values df_stamp = df_raw[['date']][border1:border2] df_stamp['date'] = pd.to_datetime(df_stamp.date) if self.timeenc == 0: df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1) df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1) df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1) df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1) data_stamp = df_stamp.drop(['date'], 1).values elif self.timeenc == 1: data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq) data_stamp = data_stamp.transpose(1, 0) self.data_x = data[border1:border2] self.data_y = data[border1:border2] self.data_stamp = data_stamp def __getitem__(self, index): s_begin = index s_end = s_begin + self.seq_len r_begin = s_end - self.label_len r_end = r_begin + self.label_len + self.pred_len seq_x = self.data_x[s_begin:s_end] seq_y = self.data_y[r_begin:r_end] seq_x_mark = self.data_stamp[s_begin:s_end] seq_y_mark = self.data_stamp[r_begin:r_end] return seq_x, seq_y, seq_x_mark, seq_y_mark def __len__(self): return len(self.data_x) - self.seq_len - self.pred_len + 1 def inverse_transform(self, data): return self.scaler.inverse_transform(data) class Dataset_ETT_minute(Dataset): def __init__(self, root_path, flag='train', size=None, features='S', data_path='ETTm1.csv', target='OT', scale=True, timeenc=0, freq='t', seasonal_patterns=None): # size [seq_len, label_len, pred_len] # info if size == None: self.seq_len = 24 * 4 * 4 self.label_len = 24 * 4 self.pred_len = 24 * 4 else: self.seq_len = size[0] self.label_len = size[1] self.pred_len = size[2] # init assert flag in ['train', 'test', 'val'] type_map = {'train': 0, 'val': 1, 'test': 2} self.set_type = type_map[flag] self.features = features self.target = target self.scale = scale self.timeenc = timeenc self.freq = freq self.root_path = root_path self.data_path = data_path self.__read_data__() def __read_data__(self): self.scaler = StandardScaler() df_raw = pd.read_csv(os.path.join(self.root_path, self.data_path)) border1s = [0, 12 * 30 * 24 * 4 - self.seq_len, 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4 - self.seq_len] border2s = [12 * 30 * 24 * 4, 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4, 12 * 30 * 24 * 4 + 8 * 30 * 24 * 4] border1 = border1s[self.set_type] border2 = border2s[self.set_type] if self.features == 'M' or self.features == 'MS': cols_data = df_raw.columns[1:] df_data = df_raw[cols_data] elif self.features == 'S': df_data = df_raw[[self.target]] if self.scale: train_data = df_data[border1s[0]:border2s[0]] self.scaler.fit(train_data.values) data = self.scaler.transform(df_data.values) else: data = df_data.values df_stamp = df_raw[['date']][border1:border2] df_stamp['date'] = pd.to_datetime(df_stamp.date) if self.timeenc == 0: df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1) df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1) df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1) df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1) df_stamp['minute'] = df_stamp.date.apply(lambda row: row.minute, 1) df_stamp['minute'] = df_stamp.minute.map(lambda x: x // 15) data_stamp = df_stamp.drop(['date'], 1).values elif self.timeenc == 1: data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq) data_stamp = data_stamp.transpose(1, 0) self.data_x = data[border1:border2] self.data_y = data[border1:border2] self.data_stamp = data_stamp def __getitem__(self, index): s_begin = index s_end = s_begin + self.seq_len r_begin = s_end - self.label_len r_end = r_begin + self.label_len + self.pred_len seq_x = self.data_x[s_begin:s_end] seq_y = self.data_y[r_begin:r_end] seq_x_mark = self.data_stamp[s_begin:s_end] seq_y_mark = self.data_stamp[r_begin:r_end] return seq_x, seq_y, seq_x_mark, seq_y_mark def __len__(self): return len(self.data_x) - self.seq_len - self.pred_len + 1 def inverse_transform(self, data): return self.scaler.inverse_transform(data) ================================================ FILE: exp/exp_basic.py ================================================ import os import torch import numpy as np class Exp_Basic(object): def __init__(self, args): self.args = args self.device = self._acquire_device() self.model = self._build_model().to(self.device) def _build_model(self): raise NotImplementedError return None def _acquire_device(self): if self.args.use_gpu: os.environ["CUDA_VISIBLE_DEVICES"] = str( self.args.gpu) if not self.args.use_multi_gpu else self.args.devices device = torch.device('cuda:{}'.format(self.args.gpu)) print('Use GPU: cuda:{}'.format(self.args.gpu)) else: device = torch.device('cpu') print('Use CPU') return device def _get_data(self): pass def vali(self): pass def train(self): pass def test(self): pass ================================================ FILE: exp/exp_main.py ================================================ from data_provider.data_factory import data_provider from .exp_basic import Exp_Basic from models import Informer, Autoformer, DLinear, MSGNet from utils.tools import EarlyStopping, adjust_learning_rate, visual, test_params_flop from utils.metrics import metric import torch import torch.nn as nn from torch import optim, autograd import os import time import warnings import matplotlib.pyplot as plt import numpy as np warnings.filterwarnings('ignore') class Exp_Main(Exp_Basic): def __init__(self, args): super(Exp_Main, self).__init__(args) def _build_model(self): model_dict = { 'Informer': Informer, 'Autoformer': Autoformer, 'DLinear': DLinear, 'MSGNet': MSGNet } model = model_dict[self.args.model].Model(self.args).float() if self.args.use_multi_gpu and self.args.use_gpu: model = nn.DataParallel(model, device_ids=self.args.device_ids) return model #flag = 'train' or 'val' or 'test' def _get_data(self, flag): data_set, data_loader = data_provider(self.args, flag) return data_set, data_loader def _select_optimizer(self): model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate) return model_optim def _select_criterion(self): criterion = nn.MSELoss() return criterion def vali(self, vali_data, vali_loader, criterion): total_loss = [] self.model.eval() with torch.no_grad(): for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(vali_loader): batch_x = batch_x.float().to(self.device) batch_y = batch_y.float() batch_x_mark = batch_x_mark.float().to(self.device) batch_y_mark = batch_y_mark.float().to(self.device) # decoder input dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float() dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device) # encoder - decoder if self.args.use_amp: with torch.cuda.amp.autocast(): if 'Linear' in self.args.model: outputs = self.model(batch_x) else: if self.args.output_attention: outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] else: outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) else: if 'Linear' in self.args.model: outputs = self.model(batch_x) else: if self.args.output_attention: outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] else: outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) f_dim = -1 if self.args.features == 'MS' else 0 outputs = outputs[:, -self.args.pred_len:, f_dim:] batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device) pred = outputs.detach().cpu() true = batch_y.detach().cpu() loss = criterion(pred, true) total_loss.append(loss) total_loss = np.average(total_loss) self.model.train() return total_loss def train(self, setting): train_data, train_loader = self._get_data(flag='train') vali_data, vali_loader = self._get_data(flag='val') test_data, test_loader = self._get_data(flag='test') path = os.path.join(self.args.checkpoints, setting) if not os.path.exists(path): os.makedirs(path) time_now = time.time() train_steps = len(train_loader) early_stopping = EarlyStopping(patience=self.args.patience, verbose=True) model_optim = self._select_optimizer() criterion = self._select_criterion() #use automatic mixed precision training if self.args.use_amp: scaler = torch.cuda.amp.GradScaler() for epoch in range(self.args.train_epochs): iter_count = 0 train_loss = [] self.model.train() epoch_time = time.time() for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader): iter_count += 1 model_optim.zero_grad() batch_x = batch_x.float().to(self.device) batch_y = batch_y.float().to(self.device) batch_x_mark = batch_x_mark.float().to(self.device) batch_y_mark = batch_y_mark.float().to(self.device) dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float() dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device) # encoder - decoder if self.args.use_amp: with torch.cuda.amp.autocast(): if 'Linear' in self.args.model: outputs = self.model(batch_x) else: if self.args.output_attention: outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] else: outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) f_dim = -1 if self.args.features == 'MS' else 0 outputs = outputs[:, -self.args.pred_len:, f_dim:] batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device) loss = criterion(outputs, batch_y) train_loss.append(loss.item()) else: if 'Linear' in self.args.model: # print("Linear") outputs = self.model(batch_x) else: if self.args.output_attention: #whether to output attention in ecoder outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] else: outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) # print(outputs.shape,batch_y.shape) f_dim = -1 if self.args.features == 'MS' else 0 outputs = outputs[:, -self.args.pred_len:, f_dim:] batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device) loss = criterion(outputs, batch_y) train_loss.append(loss.item()) if (i + 1) % 100 == 0: print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item())) speed = (time.time() - time_now) / iter_count left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i) print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time)) iter_count = 0 time_now = time.time() if self.args.use_amp: scaler.scale(loss).backward() scaler.step(model_optim) scaler.update() else: with autograd.detect_anomaly(): loss.backward() model_optim.step() print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time)) train_loss = np.average(train_loss) vali_loss = self.vali(vali_data, vali_loader, criterion) test_loss = self.vali(test_data, test_loader, criterion) print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format( epoch + 1, train_steps, train_loss, vali_loss, test_loss)) early_stopping(vali_loss, self.model, path) if early_stopping.early_stop: print("Early stopping") break adjust_learning_rate(model_optim, epoch + 1, self.args) best_model_path = path + '/' + 'checkpoint.pth' self.model.load_state_dict(torch.load(best_model_path)) return self.model def test(self, setting, test=0): test_data, test_loader = self._get_data(flag='test') if test: print('loading model') self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth'))) preds = [] trues = [] inputx = [] folder_path = './test_results/' + setting + '/' if not os.path.exists(folder_path): os.makedirs(folder_path) self.model.eval() with torch.no_grad(): for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader): batch_x = batch_x.float().to(self.device) batch_y = batch_y.float().to(self.device) batch_x_mark = batch_x_mark.float().to(self.device) batch_y_mark = batch_y_mark.float().to(self.device) # decoder input dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float() dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device) # encoder - decoder if self.args.use_amp: with torch.cuda.amp.autocast(): if 'Linear' in self.args.model: outputs = self.model(batch_x) else: if self.args.output_attention: outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] else: outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) else: if 'Linear' in self.args.model: outputs = self.model(batch_x) else: if self.args.output_attention: outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] else: outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) f_dim = -1 if self.args.features == 'MS' else 0 # print(outputs.shape,batch_y.shape) outputs = outputs[:, -self.args.pred_len:, f_dim:] batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device) outputs = outputs.detach().cpu().numpy() batch_y = batch_y.detach().cpu().numpy() pred = outputs # outputs.detach().cpu().numpy() # .squeeze() true = batch_y # batch_y.detach().cpu().numpy() # .squeeze() preds.append(pred) trues.append(true) inputx.append(batch_x.detach().cpu().numpy()) if i % 10 == 0: input = batch_x.detach().cpu().numpy() gt = np.concatenate((input[0, :, -1], true[0, :, -1]), axis=0) pd = np.concatenate((input[0, :, -1], pred[0, :, -1]), axis=0) visual(gt, pd, os.path.join(folder_path, str(i) + '.pdf')) #See utils / tools for usage if self.args.test_flop: test_params_flop((batch_x.shape[1],batch_x.shape[2])) exit() # print('preds_shape:', len(preds),len(preds[0]),len(preds[1])) preds = np.array(preds) trues = np.array(trues) inputx = np.array(inputx) print('preds_shape:', preds.shape) print('trues_shape:', trues.shape) preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1]) trues = trues.reshape(-1, trues.shape[-2], trues.shape[-1]) inputx = inputx.reshape(-1, inputx.shape[-2], inputx.shape[-1]) # result save folder_path = './results/' + setting + '/' if not os.path.exists(folder_path): os.makedirs(folder_path) mae, mse, rmse, mape, mspe, rse, corr, nd, nrmse = metric(preds, trues) print('nd:{}, nrmse:{}, mse:{}, mae:{}, rse:{}, mape:{}'.format(nd, nrmse,mse, mae, rse, mape)) f = open("result.txt", 'a') f.write(setting + " \n") f.write('nd:{}, nrmse:{}, mse:{}, mae:{}, rse:{}, mape:{}'.format(nd, nrmse,mse, mae, rse, mape)) f.write('\n') f.write('\n') f.close() # np.save(folder_path + 'metrics.npy', np.array([mae, mse, rmse, mape, mspe,rse, corr])) # np.save(folder_path + 'pred.npy', preds) # np.save(folder_path + 'true.npy', trues) # np.save(folder_path + 'x.npy', inputx) return def predict(self, setting, load=False): pred_data, pred_loader = self._get_data(flag='pred') if load: path = os.path.join(self.args.checkpoints, setting) best_model_path = path + '/' + 'checkpoint.pth' self.model.load_state_dict(torch.load(best_model_path)) preds = [] self.model.eval() with torch.no_grad(): for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(pred_loader): batch_x = batch_x.float().to(self.device) batch_y = batch_y.float() batch_x_mark = batch_x_mark.float().to(self.device) batch_y_mark = batch_y_mark.float().to(self.device) # decoder input dec_inp = torch.zeros([batch_y.shape[0], self.args.pred_len, batch_y.shape[2]]).float().to(batch_y.device) dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device) # encoder - decoder if self.args.use_amp: with torch.cuda.amp.autocast(): if 'Linear' in self.args.model: outputs = self.model(batch_x) else: if self.args.output_attention: outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] else: outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) else: if 'Linear' in self.args.model: outputs = self.model(batch_x) else: if self.args.output_attention: outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] else: outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) pred = outputs.detach().cpu().numpy() # .squeeze() preds.append(pred) preds = np.array(preds) preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1]) # result save folder_path = './results/' + setting + '/' if not os.path.exists(folder_path): os.makedirs(folder_path) np.save(folder_path + 'real_prediction.npy', preds) return ================================================ FILE: exp/exp_stat.py ================================================ from data_provider.data_factory import data_provider from exp.exp_basic import Exp_Basic from utils.tools import EarlyStopping, adjust_learning_rate, visual from utils.metrics import metric import numpy as np import torch import torch.nn as nn from torch import optim import os import time import warnings import matplotlib.pyplot as plt from models.Stat_models import * warnings.filterwarnings('ignore') class Exp_Main(Exp_Basic): def __init__(self, args): super(Exp_Main, self).__init__(args) def _build_model(self): model_dict = { 'Naive': Naive_repeat, 'ARIMA': Arima, 'SARIMA': SArima, 'GBRT': GBRT, } model = model_dict[self.args.model](self.args).float() return model def _get_data(self, flag): data_set, data_loader = data_provider(self.args, flag) return data_set, data_loader def test(self, setting, test=0): test_data, test_loader = self._get_data(flag='test') # Sample 10% samples = max(int(self.args.sample * self.args.batch_size),1) preds = [] trues = [] inputx = [] folder_path = './test_results/' + setting + '/' if not os.path.exists(folder_path): os.makedirs(folder_path) with torch.no_grad(): for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader): batch_x = batch_x.float().to(self.device).cpu().numpy() batch_y = batch_y.float().to(self.device).cpu().numpy() batch_x = batch_x[:samples] outputs = self.model(batch_x) f_dim = -1 if self.args.features == 'MS' else 0 # print(outputs.shape,batch_y.shape) outputs = outputs[:, -self.args.pred_len:, f_dim:] batch_y = batch_y[:samples, -self.args.pred_len:, f_dim:] pred = outputs # outputs.detach().cpu().numpy() # .squeeze() true = batch_y # batch_y.detach().cpu().numpy() # .squeeze() preds.append(pred) trues.append(true) inputx.append(batch_x) if i % 20 == 0: input = batch_x gt = np.concatenate((input[0, :, -1], true[0, :, -1]), axis=0) pd = np.concatenate((input[0, :, -1], pred[0, :, -1]), axis=0) visual(gt, pd, os.path.join(folder_path, str(i) + '.pdf')) preds = np.array(preds) trues = np.array(trues) inputx = np.array(inputx) preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1]) trues = trues.reshape(-1, trues.shape[-2], trues.shape[-1]) inputx = inputx.reshape(-1, inputx.shape[-2], inputx.shape[-1]) folder_path = './results/' + setting + '/' if not os.path.exists(folder_path): os.makedirs(folder_path) mae, mse, rmse, mape, mspe, rse, corr = metric(preds, trues) corr = [] print('mse:{}, mae:{}, rse:{}, corr:{}'.format(mse, mae, rse, corr)) f = open("result.txt", 'a') f.write(setting + " \n") f.write('mse:{}, mae:{}, rse:{}, corr:{}'.format(mse, mae, rse, corr)) f.write('\n') f.write('\n') f.close() np.save(folder_path + 'metrics.npy', np.array([mae, mse, rmse, mape, mspe,rse, corr])) np.save(folder_path + 'pred.npy', preds) np.save(folder_path + 'true.npy', trues) # np.save(folder_path + 'x.npy', inputx) return ================================================ FILE: layers/AutoCorrelation.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F import matplotlib.pyplot as plt import numpy as np import math from math import sqrt import os class AutoCorrelation(nn.Module): """ AutoCorrelation Mechanism with the following two phases: (1) period-based dependencies discovery (2) time delay aggregation This block can replace the self-attention family mechanism seamlessly. """ def __init__(self, mask_flag=True, factor=1, scale=None, attention_dropout=0.1, output_attention=False): super(AutoCorrelation, self).__init__() self.factor = factor self.scale = scale self.mask_flag = mask_flag self.output_attention = output_attention self.dropout = nn.Dropout(attention_dropout) def time_delay_agg_training(self, values, corr): """ SpeedUp version of Autocorrelation (a batch-normalization style design) This is for the training phase. """ head = values.shape[1] channel = values.shape[2] length = values.shape[3] # find top k top_k = int(self.factor * math.log(length)) mean_value = torch.mean(torch.mean(corr, dim=1), dim=1) index = torch.topk(torch.mean(mean_value, dim=0), top_k, dim=-1)[1] weights = torch.stack([mean_value[:, index[i]] for i in range(top_k)], dim=-1) # update corr tmp_corr = torch.softmax(weights, dim=-1) # aggregation tmp_values = values delays_agg = torch.zeros_like(values).float() for i in range(top_k): pattern = torch.roll(tmp_values, -int(index[i]), -1) delays_agg = delays_agg + pattern * \ (tmp_corr[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length)) return delays_agg def time_delay_agg_inference(self, values, corr): """ SpeedUp version of Autocorrelation (a batch-normalization style design) This is for the inference phase. """ batch = values.shape[0] head = values.shape[1] channel = values.shape[2] length = values.shape[3] # index init device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # init_index = torch.arange(length).unsqueeze(0).unsqueeze(0).unsqueeze(0).repeat(batch, head, channel, 1).cuda() init_index = torch.arange(length).unsqueeze(0).unsqueeze(0).unsqueeze(0).repeat(batch, head, channel, 1).to(device) # find top k top_k = int(self.factor * math.log(length)) mean_value = torch.mean(torch.mean(corr, dim=1), dim=1) weights = torch.topk(mean_value, top_k, dim=-1)[0] delay = torch.topk(mean_value, top_k, dim=-1)[1] # update corr tmp_corr = torch.softmax(weights, dim=-1) # aggregation tmp_values = values.repeat(1, 1, 1, 2) delays_agg = torch.zeros_like(values).float() for i in range(top_k): tmp_delay = init_index + delay[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length) pattern = torch.gather(tmp_values, dim=-1, index=tmp_delay) delays_agg = delays_agg + pattern * \ (tmp_corr[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length)) return delays_agg def time_delay_agg_full(self, values, corr): """ Standard version of Autocorrelation """ batch = values.shape[0] head = values.shape[1] channel = values.shape[2] length = values.shape[3] # index init init_index = torch.arange(length).unsqueeze(0).unsqueeze(0).unsqueeze(0).repeat(batch, head, channel, 1).cuda() # find top k top_k = int(self.factor * math.log(length)) weights = torch.topk(corr, top_k, dim=-1)[0] delay = torch.topk(corr, top_k, dim=-1)[1] # update corr tmp_corr = torch.softmax(weights, dim=-1) # aggregation tmp_values = values.repeat(1, 1, 1, 2) delays_agg = torch.zeros_like(values).float() for i in range(top_k): tmp_delay = init_index + delay[..., i].unsqueeze(-1) pattern = torch.gather(tmp_values, dim=-1, index=tmp_delay) delays_agg = delays_agg + pattern * (tmp_corr[..., i].unsqueeze(-1)) return delays_agg def forward(self, queries, keys, values, attn_mask): B, L, H, E = queries.shape _, S, _, D = values.shape if L > S: zeros = torch.zeros_like(queries[:, :(L - S), :]).float() values = torch.cat([values, zeros], dim=1) keys = torch.cat([keys, zeros], dim=1) else: values = values[:, :L, :, :] keys = keys[:, :L, :, :] # period-based dependencies (b, len//period , period , d_model) ->(b, period ,d_model, len//period) #(b , T, h , n) ->(b, h, n, T) q_fft = torch.fft.rfft(queries.permute(0, 2, 3, 1).contiguous(), dim=-1) k_fft = torch.fft.rfft(keys.permute(0, 2, 3, 1).contiguous(), dim=-1) #(b, period ,d_model, period), res = q_fft * torch.conj(k_fft) corr = torch.fft.irfft(res, dim=-1) # time delay agg if self.training: V = self.time_delay_agg_training(values.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2) else: V = self.time_delay_agg_inference(values.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2) if self.output_attention: return (V.contiguous(), corr.permute(0, 3, 1, 2)) else: return (V.contiguous(), None) class AutoCorrelationLayer(nn.Module): def __init__(self, correlation, d_model, n_heads, d_keys=None, d_values=None): super(AutoCorrelationLayer, self).__init__() d_keys = d_keys or (d_model // n_heads) d_values = d_values or (d_model // n_heads) self.inner_correlation = correlation self.query_projection = nn.Linear(d_model, d_keys * n_heads) self.key_projection = nn.Linear(d_model, d_keys * n_heads) self.value_projection = nn.Linear(d_model, d_values * n_heads) self.out_projection = nn.Linear(d_values * n_heads, d_model) self.n_heads = n_heads def forward(self, queries, keys, values, attn_mask): B, L, _ = queries.shape _, S, _ = keys.shape H = self.n_heads queries = self.query_projection(queries).view(B, L, H, -1) keys = self.key_projection(keys).view(B, S, H, -1) values = self.value_projection(values).view(B, S, H, -1) out, attn = self.inner_correlation( queries, keys, values, attn_mask ) out = out.view(B, L, -1) return self.out_projection(out), attn ================================================ FILE: layers/Autoformer_EncDec.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F class my_Layernorm(nn.Module): """ Special designed layernorm for the seasonal part """ def __init__(self, channels): super(my_Layernorm, self).__init__() self.layernorm = nn.LayerNorm(channels) def forward(self, x): x_hat = self.layernorm(x) bias = torch.mean(x_hat, dim=1).unsqueeze(1).repeat(1, x.shape[1], 1) return x_hat - bias class moving_avg(nn.Module): """ Moving average block to highlight the trend of time series """ def __init__(self, kernel_size, stride): super(moving_avg, self).__init__() self.kernel_size = kernel_size self.avg = nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=0) def forward(self, x): # padding on the both ends of time series front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1) end = x[:, -1:, :].repeat(1, (self.kernel_size - 1) // 2, 1) x = torch.cat([front, x, end], dim=1) x = self.avg(x.permute(0, 2, 1)) x = x.permute(0, 2, 1) return x class series_decomp(nn.Module): """ Series decomposition block """ def __init__(self, kernel_size): super(series_decomp, self).__init__() self.moving_avg = moving_avg(kernel_size, stride=1) def forward(self, x): moving_mean = self.moving_avg(x) res = x - moving_mean return res, moving_mean class EncoderLayer(nn.Module): """ Autoformer encoder layer with the progressive decomposition architecture """ def __init__(self, attention, d_model, d_ff=None, moving_avg=25, dropout=0.1, activation="relu"): super(EncoderLayer, self).__init__() d_ff = d_ff or 4 * d_model self.attention = attention self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1, bias=False) self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1, bias=False) self.decomp1 = series_decomp(moving_avg) self.decomp2 = series_decomp(moving_avg) self.dropout = nn.Dropout(dropout) self.activation = F.relu if activation == "relu" else F.gelu def forward(self, x, attn_mask=None): new_x, attn = self.attention( x, x, x, attn_mask=attn_mask ) x = x + self.dropout(new_x) x, _ = self.decomp1(x) y = x y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1)))) y = self.dropout(self.conv2(y).transpose(-1, 1)) res, _ = self.decomp2(x + y) return res, attn class Encoder(nn.Module): """ Autoformer encoder """ def __init__(self, attn_layers, conv_layers=None, norm_layer=None): super(Encoder, self).__init__() self.attn_layers = nn.ModuleList(attn_layers) self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None self.norm = norm_layer def forward(self, x, attn_mask=None): attns = [] if self.conv_layers is not None: for attn_layer, conv_layer in zip(self.attn_layers, self.conv_layers): x, attn = attn_layer(x, attn_mask=attn_mask) x = conv_layer(x) attns.append(attn) x, attn = self.attn_layers[-1](x) attns.append(attn) else: for attn_layer in self.attn_layers: x, attn = attn_layer(x, attn_mask=attn_mask) attns.append(attn) if self.norm is not None: x = self.norm(x) return x, attns class DecoderLayer(nn.Module): """ Autoformer decoder layer with the progressive decomposition architecture """ def __init__(self, self_attention, cross_attention, d_model, c_out, d_ff=None, moving_avg=25, dropout=0.1, activation="relu"): super(DecoderLayer, self).__init__() d_ff = d_ff or 4 * d_model self.self_attention = self_attention self.cross_attention = cross_attention self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1, bias=False) self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1, bias=False) self.decomp1 = series_decomp(moving_avg) self.decomp2 = series_decomp(moving_avg) self.decomp3 = series_decomp(moving_avg) self.dropout = nn.Dropout(dropout) self.projection = nn.Conv1d(in_channels=d_model, out_channels=c_out, kernel_size=3, stride=1, padding=1, padding_mode='circular', bias=False) self.activation = F.relu if activation == "relu" else F.gelu def forward(self, x, cross, x_mask=None, cross_mask=None): x = x + self.dropout(self.self_attention( x, x, x, attn_mask=x_mask )[0]) x, trend1 = self.decomp1(x) x = x + self.dropout(self.cross_attention( x, cross, cross, attn_mask=cross_mask )[0]) x, trend2 = self.decomp2(x) y = x y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1)))) y = self.dropout(self.conv2(y).transpose(-1, 1)) x, trend3 = self.decomp3(x + y) residual_trend = trend1 + trend2 + trend3 residual_trend = self.projection(residual_trend.permute(0, 2, 1)).transpose(1, 2) return x, residual_trend class Decoder(nn.Module): """ Autoformer encoder """ def __init__(self, layers, norm_layer=None, projection=None): super(Decoder, self).__init__() self.layers = nn.ModuleList(layers) self.norm = norm_layer self.projection = projection def forward(self, x, cross, x_mask=None, cross_mask=None, trend=None): for layer in self.layers: x, residual_trend = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask) trend = trend + residual_trend if self.norm is not None: x = self.norm(x) if self.projection is not None: x = self.projection(x) return x, trend ================================================ FILE: layers/Embed.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F from torch.nn.utils import weight_norm import math class PositionalEmbedding(nn.Module): def __init__(self, d_model, max_len=5000): super(PositionalEmbedding, self).__init__() # Compute the positional encodings once in log space. pe = torch.zeros(max_len, d_model).float() pe.require_grad = False position = torch.arange(0, max_len).float().unsqueeze(1) div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp() pe[:, 0::2] = torch.sin(position * div_term) pe[:, 1::2] = torch.cos(position * div_term) pe = pe.unsqueeze(0) self.register_buffer('pe', pe) def forward(self, x): return self.pe[:, :x.size(1)] class TokenEmbedding(nn.Module): def __init__(self, c_in, d_model): super(TokenEmbedding, self).__init__() padding = 1 if torch.__version__ >= '1.5.0' else 2 self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model, kernel_size=3, padding=padding, padding_mode='circular', bias=False) for m in self.modules(): if isinstance(m, nn.Conv1d): nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='leaky_relu') def forward(self, x): x = self.tokenConv(x.permute(0, 2, 1)).transpose(1, 2) return x class FixedEmbedding(nn.Module): def __init__(self, c_in, d_model): super(FixedEmbedding, self).__init__() w = torch.zeros(c_in, d_model).float() w.require_grad = False position = torch.arange(0, c_in).float().unsqueeze(1) div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp() w[:, 0::2] = torch.sin(position * div_term) w[:, 1::2] = torch.cos(position * div_term) self.emb = nn.Embedding(c_in, d_model) self.emb.weight = nn.Parameter(w, requires_grad=False) def forward(self, x): return self.emb(x).detach() class TemporalEmbedding(nn.Module): def __init__(self, d_model, embed_type='fixed', freq='h'): super(TemporalEmbedding, self).__init__() minute_size = 4 hour_size = 24 weekday_size = 7 day_size = 32 month_size = 13 Embed = FixedEmbedding if embed_type == 'fixed' else nn.Embedding if freq == 't': self.minute_embed = Embed(minute_size, d_model) self.hour_embed = Embed(hour_size, d_model) self.weekday_embed = Embed(weekday_size, d_model) self.day_embed = Embed(day_size, d_model) self.month_embed = Embed(month_size, d_model) def forward(self, x): x = x.long() minute_x = self.minute_embed(x[:, :, 4]) if hasattr(self, 'minute_embed') else 0. hour_x = self.hour_embed(x[:, :, 3]) weekday_x = self.weekday_embed(x[:, :, 2]) day_x = self.day_embed(x[:, :, 1]) month_x = self.month_embed(x[:, :, 0]) return hour_x + weekday_x + day_x + month_x + minute_x class TimeFeatureEmbedding(nn.Module): def __init__(self, d_model, embed_type='timeF', freq='h'): super(TimeFeatureEmbedding, self).__init__() freq_map = {'h': 4, 't': 5, 's': 6, 'm': 1, 'a': 1, 'w': 2, 'd': 3, 'b': 3} d_inp = freq_map[freq] self.embed = nn.Linear(d_inp, d_model, bias=False) def forward(self, x): return self.embed(x) class DataEmbedding(nn.Module): def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1): super(DataEmbedding, self).__init__() self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model) #(batch_size, len batch_x[1], d_model ) self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type, freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding( d_model=d_model, embed_type=embed_type, freq=freq) #(1, len batch_x[1], d_model) self.position_embedding = PositionalEmbedding(d_model=d_model) self.dropout = nn.Dropout(p=dropout) def forward(self, x, x_mark): if x_mark is None: x = self.value_embedding(x) + self.position_embedding(x) else: x = self.value_embedding(x) + self.temporal_embedding(x_mark) + self.position_embedding(x) return self.dropout(x) class DataEmbedding_wo_pos(nn.Module): def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1): super(DataEmbedding_wo_pos, self).__init__() self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model) self.position_embedding = PositionalEmbedding(d_model=d_model) self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type, freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding( d_model=d_model, embed_type=embed_type, freq=freq) self.dropout = nn.Dropout(p=dropout) def forward(self, x, x_mark): x = self.value_embedding(x) + self.temporal_embedding(x_mark) return self.dropout(x) class DataEmbedding_wo_pos_temp(nn.Module): def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1): super(DataEmbedding_wo_pos_temp, self).__init__() self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model) self.position_embedding = PositionalEmbedding(d_model=d_model) self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type, freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding( d_model=d_model, embed_type=embed_type, freq=freq) self.dropout = nn.Dropout(p=dropout) def forward(self, x, x_mark): x = self.value_embedding(x) return self.dropout(x) class DataEmbedding_wo_temp(nn.Module): def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1): super(DataEmbedding_wo_temp, self).__init__() self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model) self.position_embedding = PositionalEmbedding(d_model=d_model) self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type, freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding( d_model=d_model, embed_type=embed_type, freq=freq) self.dropout = nn.Dropout(p=dropout) def forward(self, x, x_mark): x = self.value_embedding(x) + self.position_embedding(x) return self.dropout(x) ================================================ FILE: layers/MSGBlock.py ================================================ from math import sqrt import numpy as np import torch.nn as nn import torch.nn.functional as F import torch from torch import nn, Tensor from einops import rearrange from einops.layers.torch import Rearrange from utils.masking import TriangularCausalMask class Predict(nn.Module): def __init__(self, individual, c_out, seq_len, pred_len, dropout): super(Predict, self).__init__() self.individual = individual self.c_out = c_out if self.individual: self.seq2pred = nn.ModuleList() self.dropout = nn.ModuleList() for i in range(self.c_out): self.seq2pred.append(nn.Linear(seq_len , pred_len)) self.dropout.append(nn.Dropout(dropout)) else: self.seq2pred = nn.Linear(seq_len , pred_len) self.dropout = nn.Dropout(dropout) #(B, c_out , seq) def forward(self, x): if self.individual: out = [] for i in range(self.c_out): per_out = self.seq2pred[i](x[:,i,:]) per_out = self.dropout[i](per_out) out.append(per_out) out = torch.stack(out,dim=1) else: out = self.seq2pred(x) out = self.dropout(out) return out class Attention_Block(nn.Module): def __init__(self, d_model, d_ff=None, n_heads=8, dropout=0.1, activation="relu"): super(Attention_Block, self).__init__() d_ff = d_ff or 4 * d_model self.attention = self_attention(FullAttention, d_model, n_heads=n_heads) self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1) self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1) self.norm1 = nn.LayerNorm(d_model) self.norm2 = nn.LayerNorm(d_model) self.dropout = nn.Dropout(dropout) self.activation = F.relu if activation == "relu" else F.gelu def forward(self, x, attn_mask=None): new_x, attn = self.attention( x, x, x, attn_mask=attn_mask ) x = x + self.dropout(new_x) y = x = self.norm1(x) y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1)))) y = self.dropout(self.conv2(y).transpose(-1, 1)) return self.norm2(x + y) class self_attention(nn.Module): def __init__(self, attention, d_model ,n_heads): super(self_attention, self).__init__() d_keys = d_model // n_heads d_values = d_model // n_heads self.inner_attention = attention( attention_dropout = 0.1) self.query_projection = nn.Linear(d_model, d_keys * n_heads) self.key_projection = nn.Linear(d_model, d_keys * n_heads) self.value_projection = nn.Linear(d_model, d_values * n_heads) self.out_projection = nn.Linear(d_values * n_heads, d_model) self.n_heads = n_heads def forward(self, queries ,keys ,values, attn_mask= None): B, L, _ = queries.shape _, S, _ = keys.shape H = self.n_heads queries = self.query_projection(queries).view(B, L, H, -1) keys = self.key_projection(keys).view(B, S, H, -1) values = self.value_projection(values).view(B, S, H, -1) out, attn = self.inner_attention( queries, keys, values, attn_mask ) out = out.view(B, L, -1) out = self.out_projection(out) return out , attn class FullAttention(nn.Module): def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False): super(FullAttention, self).__init__() self.scale = scale self.mask_flag = mask_flag self.output_attention = output_attention self.dropout = nn.Dropout(attention_dropout) def forward(self, queries, keys, values, attn_mask): B, L, H, E = queries.shape _, S, _, D = values.shape scale = self.scale or 1. / sqrt(E) scores = torch.einsum("blhe,bshe->bhls", queries, keys) if self.mask_flag: if attn_mask is None: attn_mask = TriangularCausalMask(B, L, device=queries.device) scores.masked_fill_(attn_mask.mask, -np.inf) A = self.dropout(torch.softmax(scale * scores, dim=-1)) V = torch.einsum("bhls,bshd->blhd", A, values) # return V.contiguous() if self.output_attention: return (V.contiguous(), A) else: return (V.contiguous(), None) class GraphBlock(nn.Module): def __init__(self, c_out , d_model , conv_channel, skip_channel, gcn_depth , dropout, propalpha ,seq_len , node_dim): super(GraphBlock, self).__init__() self.nodevec1 = nn.Parameter(torch.randn(c_out, node_dim), requires_grad=True) self.nodevec2 = nn.Parameter(torch.randn(node_dim, c_out), requires_grad=True) self.start_conv = nn.Conv2d(1 , conv_channel, (d_model - c_out + 1, 1)) self.gconv1 = mixprop(conv_channel, skip_channel, gcn_depth, dropout, propalpha) self.gelu = nn.GELU() self.end_conv = nn.Conv2d(skip_channel, seq_len , (1, seq_len )) self.linear = nn.Linear(c_out, d_model) self.norm = nn.LayerNorm(d_model) # x in (B, T, d_model) # Here we use a mlp to fit a complex mapping f (x) def forward(self, x): adp = F.softmax(F.relu(torch.mm(self.nodevec1, self.nodevec2)), dim=1) out = x.unsqueeze(1).transpose(2, 3) out = self.start_conv(out) out = self.gelu(self.gconv1(out , adp)) out = self.end_conv(out).squeeze() out = self.linear(out) return self.norm(x + out) class nconv(nn.Module): def __init__(self): super(nconv,self).__init__() def forward(self,x, A): x = torch.einsum('ncwl,vw->ncvl',(x,A)) # x = torch.einsum('ncwl,wv->nclv',(x,A) return x.contiguous() class linear(nn.Module): def __init__(self,c_in,c_out,bias=True): super(linear,self).__init__() self.mlp = torch.nn.Conv2d(c_in, c_out, kernel_size=(1, 1), padding=(0,0), stride=(1,1), bias=bias) def forward(self,x): return self.mlp(x) class mixprop(nn.Module): def __init__(self,c_in,c_out,gdep,dropout,alpha): super(mixprop, self).__init__() self.nconv = nconv() self.mlp = linear((gdep+1)*c_in,c_out) self.gdep = gdep self.dropout = dropout self.alpha = alpha def forward(self, x, adj): adj = adj + torch.eye(adj.size(0)).to(x.device) d = adj.sum(1) h = x out = [h] a = adj / d.view(-1, 1) for i in range(self.gdep): h = self.alpha*x + (1-self.alpha)*self.nconv(h,a) out.append(h) ho = torch.cat(out,dim=1) ho = self.mlp(ho) return ho class simpleVIT(nn.Module): def __init__(self, in_channels, emb_size, patch_size=2, depth=1, num_heads=4, dropout=0.1,init_weight =True): super(simpleVIT, self).__init__() self.emb_size = emb_size self.depth = depth self.to_patch = nn.Sequential( nn.Conv2d(in_channels, emb_size, 2 * patch_size + 1, padding= patch_size), Rearrange('b e (h) (w) -> b (h w) e'), ) self.layers = nn.ModuleList([]) for _ in range(self.depth): self.layers.append(nn.ModuleList([ nn.LayerNorm(emb_size), MultiHeadAttention(emb_size, num_heads, dropout), FeedForward(emb_size, emb_size) ])) if init_weight: self._initialize_weights() def _initialize_weights(self): for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') if m.bias is not None: nn.init.constant_(m.bias, 0) def forward(self,x): B , N ,_ ,P = x.shape x = self.to_patch(x) # x = x.permute(0, 2, 3, 1).reshape(B,-1, N) for norm ,attn, ff in self.layers: x = attn(norm(x)) + x x = ff(x) + x x = x.transpose(1,2).reshape(B, self.emb_size ,-1, P) return x class MultiHeadAttention(nn.Module): def __init__(self, emb_size, num_heads, dropout): super().__init__() self.emb_size = emb_size self.num_heads = num_heads self.keys = nn.Linear(emb_size, emb_size) self.queries = nn.Linear(emb_size, emb_size) self.values = nn.Linear(emb_size, emb_size) self.att_drop = nn.Dropout(dropout) self.projection = nn.Linear(emb_size, emb_size) def forward(self, x: Tensor, mask: Tensor = None) -> Tensor: queries = rearrange(self.queries(x), "b n (h d) -> b h n d", h=self.num_heads) keys = rearrange(self.keys(x), "b n (h d) -> b h n d", h=self.num_heads) values = rearrange(self.values(x), "b n (h d) -> b h n d", h=self.num_heads) energy = torch.einsum('bhqd, bhkd -> bhqk', queries, keys) if mask is not None: fill_value = torch.finfo(torch.float32).min energy.mask_fill(~mask, fill_value) scaling = self.emb_size ** (1 / 2) att = F.softmax(energy, dim=-1) / scaling att = self.att_drop(att) # sum up over the third axis out = torch.einsum('bhal, bhlv -> bhav ', att, values) out = rearrange(out, "b h n d -> b n (h d)") out = self.projection(out) return out class FeedForward(nn.Module): def __init__(self, dim, hidden_dim): super().__init__() self.net = nn.Sequential( nn.LayerNorm(dim), nn.Linear(dim, hidden_dim), nn.GELU(), nn.Linear(hidden_dim, dim), ) def forward(self, x): return self.net(x) ================================================ FILE: layers/SelfAttention_Family.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F import matplotlib.pyplot as plt import numpy as np import math from math import sqrt from utils.masking import TriangularCausalMask, ProbMask import os class FullAttention(nn.Module): def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False): super(FullAttention, self).__init__() self.scale = scale self.mask_flag = mask_flag self.output_attention = output_attention self.dropout = nn.Dropout(attention_dropout) def forward(self, queries, keys, values, attn_mask): B, L, H, E = queries.shape _, S, _, D = values.shape scale = self.scale or 1. / sqrt(E) scores = torch.einsum("blhe,bshe->bhls", queries, keys) if self.mask_flag: if attn_mask is None: attn_mask = TriangularCausalMask(B, L, device=queries.device) scores.masked_fill_(attn_mask.mask, -np.inf) A = self.dropout(torch.softmax(scale * scores, dim=-1)) V = torch.einsum("bhls,bshd->blhd", A, values) if self.output_attention: return (V.contiguous(), A) else: return (V.contiguous(), None) class ProbAttention(nn.Module): def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False): super(ProbAttention, self).__init__() self.factor = factor self.scale = scale self.mask_flag = mask_flag self.output_attention = output_attention self.dropout = nn.Dropout(attention_dropout) def _prob_QK(self, Q, K, sample_k, n_top): # n_top: c*ln(L_q) # Q [B, H, L, D] B, H, L_K, E = K.shape _, _, L_Q, _ = Q.shape # calculate the sampled Q_K K_expand = K.unsqueeze(-3).expand(B, H, L_Q, L_K, E) index_sample = torch.randint(L_K, (L_Q, sample_k)) # real U = U_part(factor*ln(L_k))*L_q K_sample = K_expand[:, :, torch.arange(L_Q).unsqueeze(1), index_sample, :] Q_K_sample = torch.matmul(Q.unsqueeze(-2), K_sample.transpose(-2, -1)).squeeze() # find the Top_k query with sparisty measurement M = Q_K_sample.max(-1)[0] - torch.div(Q_K_sample.sum(-1), L_K) M_top = M.topk(n_top, sorted=False)[1] # use the reduced Q to calculate Q_K Q_reduce = Q[torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], M_top, :] # factor*ln(L_q) Q_K = torch.matmul(Q_reduce, K.transpose(-2, -1)) # factor*ln(L_q)*L_k return Q_K, M_top def _get_initial_context(self, V, L_Q): B, H, L_V, D = V.shape if not self.mask_flag: # V_sum = V.sum(dim=-2) V_sum = V.mean(dim=-2) contex = V_sum.unsqueeze(-2).expand(B, H, L_Q, V_sum.shape[-1]).clone() else: # use mask assert (L_Q == L_V) # requires that L_Q == L_V, i.e. for self-attention only contex = V.cumsum(dim=-2) return contex def _update_context(self, context_in, V, scores, index, L_Q, attn_mask): B, H, L_V, D = V.shape if self.mask_flag: attn_mask = ProbMask(B, H, L_Q, index, scores, device=V.device) scores.masked_fill_(attn_mask.mask, -np.inf) attn = torch.softmax(scores, dim=-1) # nn.Softmax(dim=-1)(scores) context_in[torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], index, :] = torch.matmul(attn, V).type_as(context_in) if self.output_attention: attns = (torch.ones([B, H, L_V, L_V]) / L_V).type_as(attn).to(attn.device) attns[torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], index, :] = attn return (context_in, attns) else: return (context_in, None) def forward(self, queries, keys, values, attn_mask): B, L_Q, H, D = queries.shape _, L_K, _, _ = keys.shape queries = queries.transpose(2, 1) keys = keys.transpose(2, 1) values = values.transpose(2, 1) U_part = self.factor * np.ceil(np.log(L_K)).astype('int').item() # c*ln(L_k) u = self.factor * np.ceil(np.log(L_Q)).astype('int').item() # c*ln(L_q) U_part = U_part if U_part < L_K else L_K u = u if u < L_Q else L_Q scores_top, index = self._prob_QK(queries, keys, sample_k=U_part, n_top=u) # add scale factor scale = self.scale or 1. / sqrt(D) if scale is not None: scores_top = scores_top * scale # get the context context = self._get_initial_context(values, L_Q) # update the context with selected top_k queries context, attn = self._update_context(context, values, scores_top, index, L_Q, attn_mask) return context.contiguous(), attn class AttentionLayer(nn.Module): def __init__(self, attention, d_model, n_heads, d_keys=None, d_values=None): super(AttentionLayer, self).__init__() d_keys = d_keys or (d_model // n_heads) d_values = d_values or (d_model // n_heads) self.inner_attention = attention self.query_projection = nn.Linear(d_model, d_keys * n_heads) self.key_projection = nn.Linear(d_model, d_keys * n_heads) self.value_projection = nn.Linear(d_model, d_values * n_heads) self.out_projection = nn.Linear(d_values * n_heads, d_model) self.n_heads = n_heads def forward(self, queries, keys, values, attn_mask): B, L, _ = queries.shape _, S, _ = keys.shape H = self.n_heads queries = self.query_projection(queries).view(B, L, H, -1) keys = self.key_projection(keys).view(B, S, H, -1) values = self.value_projection(values).view(B, S, H, -1) out, attn = self.inner_attention( queries, keys, values, attn_mask ) out = out.view(B, L, -1) return self.out_projection(out), attn ================================================ FILE: layers/Transformer_EncDec.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F class ConvLayer(nn.Module): def __init__(self, c_in): super(ConvLayer, self).__init__() self.downConv = nn.Conv1d(in_channels=c_in, out_channels=c_in, kernel_size=3, padding=2, padding_mode='circular') self.norm = nn.BatchNorm1d(c_in) self.activation = nn.ELU() self.maxPool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1) def forward(self, x): x = self.downConv(x.permute(0, 2, 1)) x = self.norm(x) x = self.activation(x) x = self.maxPool(x) x = x.transpose(1, 2) return x class EncoderLayer(nn.Module): def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"): super(EncoderLayer, self).__init__() d_ff = d_ff or 4 * d_model self.attention = attention self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1) self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1) self.norm1 = nn.LayerNorm(d_model) self.norm2 = nn.LayerNorm(d_model) self.dropout = nn.Dropout(dropout) self.activation = F.relu if activation == "relu" else F.gelu def forward(self, x, attn_mask=None): new_x, attn = self.attention( x, x, x, attn_mask=attn_mask ) x = x + self.dropout(new_x) y = x = self.norm1(x) y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1)))) y = self.dropout(self.conv2(y).transpose(-1, 1)) return self.norm2(x + y), attn class Encoder(nn.Module): def __init__(self, attn_layers, conv_layers=None, norm_layer=None): super(Encoder, self).__init__() self.attn_layers = nn.ModuleList(attn_layers) self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None self.norm = norm_layer def forward(self, x, attn_mask=None): attns = [] if self.conv_layers is not None: for attn_layer, conv_layer in zip(self.attn_layers, self.conv_layers): x, attn = attn_layer(x, attn_mask=attn_mask) x = conv_layer(x) attns.append(attn) x, attn = self.attn_layers[-1](x) attns.append(attn) else: for attn_layer in self.attn_layers: x, attn = attn_layer(x, attn_mask=attn_mask) attns.append(attn) if self.norm is not None: x = self.norm(x) return x, attns class DecoderLayer(nn.Module): def __init__(self, self_attention, cross_attention, d_model, d_ff=None, dropout=0.1, activation="relu"): super(DecoderLayer, self).__init__() d_ff = d_ff or 4 * d_model self.self_attention = self_attention self.cross_attention = cross_attention self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1) self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1) self.norm1 = nn.LayerNorm(d_model) self.norm2 = nn.LayerNorm(d_model) self.norm3 = nn.LayerNorm(d_model) self.dropout = nn.Dropout(dropout) self.activation = F.relu if activation == "relu" else F.gelu # dec_out(in), enc_out def forward(self, x, cross, x_mask=None, cross_mask=None): x = x + self.dropout(self.self_attention( x, x, x, attn_mask=x_mask )[0]) x = self.norm1(x) x = x + self.dropout(self.cross_attention( x, cross, cross, #q,k,v attn_mask=cross_mask )[0]) y = x = self.norm2(x) y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1)))) y = self.dropout(self.conv2(y).transpose(-1, 1)) return self.norm3(x + y) class Decoder(nn.Module): def __init__(self, layers, norm_layer=None, projection=None): super(Decoder, self).__init__() self.layers = nn.ModuleList(layers) self.norm = norm_layer self.projection = projection #self.decoder(dec_out(in), enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask) def forward(self, x, cross, x_mask=None, cross_mask=None ,external=None): for layer in self.layers: x = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask) if self.norm is not None: x = self.norm(x) if self.projection is not None: x = self.projection(x) return x ================================================ FILE: models/Autoformer.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F from layers.Embed import DataEmbedding, DataEmbedding_wo_pos,DataEmbedding_wo_pos_temp,DataEmbedding_wo_temp from layers.AutoCorrelation import AutoCorrelation, AutoCorrelationLayer from layers.Autoformer_EncDec import Encoder, Decoder, EncoderLayer, DecoderLayer, my_Layernorm, series_decomp import math import numpy as np class Model(nn.Module): """ Autoformer is the first method to achieve the series-wise connection, with inherent O(LlogL) complexity """ def __init__(self, configs): super(Model, self).__init__() self.seq_len = configs.seq_len self.label_len = configs.label_len self.pred_len = configs.pred_len self.output_attention = configs.output_attention # Decomp kernel_size = configs.moving_avg self.decomp = series_decomp(kernel_size) # Embedding # The series-wise connection inherently contains the sequential information. # Thus, we can discard the position embedding of transformers. if configs.embed_type == 0: self.enc_embedding = DataEmbedding_wo_pos(configs.enc_in, configs.d_model, configs.embed, configs.freq, configs.dropout) self.dec_embedding = DataEmbedding_wo_pos(configs.dec_in, configs.d_model, configs.embed, configs.freq, configs.dropout) elif configs.embed_type == 1: self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq, configs.dropout) self.dec_embedding = DataEmbedding(configs.dec_in, configs.d_model, configs.embed, configs.freq, configs.dropout) elif configs.embed_type == 2: self.enc_embedding = DataEmbedding_wo_pos(configs.enc_in, configs.d_model, configs.embed, configs.freq, configs.dropout) self.dec_embedding = DataEmbedding_wo_pos(configs.dec_in, configs.d_model, configs.embed, configs.freq, configs.dropout) elif configs.embed_type == 3: self.enc_embedding = DataEmbedding_wo_temp(configs.enc_in, configs.d_model, configs.embed, configs.freq, configs.dropout) self.dec_embedding = DataEmbedding_wo_temp(configs.dec_in, configs.d_model, configs.embed, configs.freq, configs.dropout) elif configs.embed_type == 4: self.enc_embedding = DataEmbedding_wo_pos_temp(configs.enc_in, configs.d_model, configs.embed, configs.freq, configs.dropout) self.dec_embedding = DataEmbedding_wo_pos_temp(configs.dec_in, configs.d_model, configs.embed, configs.freq, configs.dropout) # Encoder self.encoder = Encoder( [ EncoderLayer( AutoCorrelationLayer( AutoCorrelation(False, configs.factor, attention_dropout=configs.dropout, output_attention=configs.output_attention), configs.d_model, configs.n_heads), configs.d_model, configs.d_ff, moving_avg=configs.moving_avg, dropout=configs.dropout, activation=configs.activation ) for l in range(configs.e_layers) ], norm_layer=my_Layernorm(configs.d_model) ) # Decoder self.decoder = Decoder( [ DecoderLayer( AutoCorrelationLayer( AutoCorrelation(True, configs.factor, attention_dropout=configs.dropout, output_attention=False), configs.d_model, configs.n_heads), AutoCorrelationLayer( AutoCorrelation(False, configs.factor, attention_dropout=configs.dropout, output_attention=False), configs.d_model, configs.n_heads), configs.d_model, configs.c_out, configs.d_ff, moving_avg=configs.moving_avg, dropout=configs.dropout, activation=configs.activation, ) for l in range(configs.d_layers) ], norm_layer=my_Layernorm(configs.d_model), projection=nn.Linear(configs.d_model, configs.c_out, bias=True) ) def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None): # decomp init mean = torch.mean(x_enc, dim=1).unsqueeze(1).repeat(1, self.pred_len, 1) zeros = torch.zeros([x_dec.shape[0], self.pred_len, x_dec.shape[2]], device=x_enc.device) seasonal_init, trend_init = self.decomp(x_enc) # decoder input trend_init = torch.cat([trend_init[:, -self.label_len:, :], mean], dim=1) seasonal_init = torch.cat([seasonal_init[:, -self.label_len:, :], zeros], dim=1) # enc enc_out = self.enc_embedding(x_enc, x_mark_enc) enc_out, attns = self.encoder(enc_out, attn_mask=enc_self_mask) # dec dec_out = self.dec_embedding(seasonal_init, x_mark_dec) seasonal_part, trend_part = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask, trend=trend_init) # final dec_out = trend_part + seasonal_part if self.output_attention: return dec_out[:, -self.pred_len:, :], attns else: return dec_out[:, -self.pred_len:, :] # [B, L, D] ================================================ FILE: models/DLinear.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F import numpy as np class moving_avg(nn.Module): """ Moving average block to highlight the trend of time series """ def __init__(self, kernel_size, stride): super(moving_avg, self).__init__() self.kernel_size = kernel_size self.avg = nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=0) def forward(self, x): # padding on the both ends of time series front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1) end = x[:, -1:, :].repeat(1, (self.kernel_size - 1) // 2, 1) x = torch.cat([front, x, end], dim=1) # print(x.permute(0, 2, 1)) x = self.avg(x.permute(0, 2, 1)) x = x.permute(0, 2, 1) return x class series_decomp(nn.Module): """ Series decomposition block """ def __init__(self, kernel_size): super(series_decomp, self).__init__() self.moving_avg = moving_avg(kernel_size, stride=1) def forward(self, x): moving_mean = self.moving_avg(x) res = x - moving_mean return res, moving_mean class Model(nn.Module): """ Decomposition-Linear """ def __init__(self, configs): super(Model, self).__init__() self.seq_len = configs.seq_len self.pred_len = configs.pred_len # Decompsition Kernel Size kernel_size = 25 self.decompsition = series_decomp(kernel_size) #return res, moving_mean self.individual = configs.individual self.channels = configs.enc_in if self.individual: self.Linear_Seasonal = nn.ModuleList() self.Linear_Trend = nn.ModuleList() for i in range(self.channels): self.Linear_Seasonal.append(nn.Linear(self.seq_len,self.pred_len)) self.Linear_Trend.append(nn.Linear(self.seq_len,self.pred_len)) # Use this two lines if you want to visualize the weights # self.Linear_Seasonal[i].weight = nn.Parameter((1/self.seq_len)*torch.ones([self.pred_len,self.seq_len])) # self.Linear_Trend[i].weight = nn.Parameter((1/self.seq_len)*torch.ones([self.pred_len,self.seq_len])) else: self.Linear_Seasonal = nn.Linear(self.seq_len,self.pred_len) self.Linear_Trend = nn.Linear(self.seq_len,self.pred_len) # Use this two lines if you want to visualize the weights # self.Linear_Seasonal.weight = nn.Parameter((1/self.seq_len)*torch.ones([self.pred_len,self.seq_len])) # self.Linear_Trend.weight = nn.Parameter((1/self.seq_len)*torch.ones([self.pred_len,self.seq_len])) def forward(self, x): # x: [Batch, Input length, Channel] seasonal_init, trend_init = self.decompsition(x) #return res, moving_mean seasonal_init, trend_init = seasonal_init.permute(0,2,1), trend_init.permute(0,2,1) if self.individual: seasonal_output = torch.zeros([seasonal_init.size(0),seasonal_init.size(1),self.pred_len],dtype=seasonal_init.dtype).to(seasonal_init.device) trend_output = torch.zeros([trend_init.size(0),trend_init.size(1),self.pred_len],dtype=trend_init.dtype).to(trend_init.device) for i in range(self.channels): seasonal_output[:,i,:] = self.Linear_Seasonal[i](seasonal_init[:,i,:]) trend_output[:,i,:] = self.Linear_Trend[i](trend_init[:,i,:]) else: seasonal_output = self.Linear_Seasonal(seasonal_init) trend_output = self.Linear_Trend(trend_init) x = seasonal_output + trend_output return x.permute(0,2,1) # to [Batch, Output length, Channel] ================================================ FILE: models/Informer.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F from utils.masking import TriangularCausalMask, ProbMask from layers.Transformer_EncDec import Decoder, DecoderLayer, Encoder, EncoderLayer, ConvLayer from layers.SelfAttention_Family import FullAttention, ProbAttention, AttentionLayer from layers.Embed import DataEmbedding,DataEmbedding_wo_pos,DataEmbedding_wo_temp,DataEmbedding_wo_pos_temp import numpy as np class Model(nn.Module): """ Informer with Propspare attention in O(LlogL) complexity """ def __init__(self, configs): super(Model, self).__init__() self.pred_len = configs.pred_len self.output_attention = configs.output_attention # Embedding if configs.embed_type == 0: self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq, configs.dropout) self.dec_embedding = DataEmbedding(configs.dec_in, configs.d_model, configs.embed, configs.freq, configs.dropout) elif configs.embed_type == 1: self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq, configs.dropout) self.dec_embedding = DataEmbedding(configs.dec_in, configs.d_model, configs.embed, configs.freq, configs.dropout) elif configs.embed_type == 2: self.enc_embedding = DataEmbedding_wo_pos(configs.enc_in, configs.d_model, configs.embed, configs.freq, configs.dropout) self.dec_embedding = DataEmbedding_wo_pos(configs.dec_in, configs.d_model, configs.embed, configs.freq, configs.dropout) elif configs.embed_type == 3: self.enc_embedding = DataEmbedding_wo_temp(configs.enc_in, configs.d_model, configs.embed, configs.freq, configs.dropout) self.dec_embedding = DataEmbedding_wo_temp(configs.dec_in, configs.d_model, configs.embed, configs.freq, configs.dropout) elif configs.embed_type == 4: self.enc_embedding = DataEmbedding_wo_pos_temp(configs.enc_in, configs.d_model, configs.embed, configs.freq, configs.dropout) self.dec_embedding = DataEmbedding_wo_pos_temp(configs.dec_in, configs.d_model, configs.embed, configs.freq, configs.dropout) # Encoder self.encoder = Encoder( [ EncoderLayer( AttentionLayer( ProbAttention(False, configs.factor, attention_dropout=configs.dropout, output_attention=configs.output_attention), configs.d_model, configs.n_heads), configs.d_model, configs.d_ff, dropout=configs.dropout, activation=configs.activation ) for l in range(configs.e_layers) ], [ ConvLayer( configs.d_model ) for l in range(configs.e_layers - 1) ] if configs.distil else None, norm_layer=torch.nn.LayerNorm(configs.d_model) ) # Decoder self.decoder = Decoder( [ DecoderLayer( AttentionLayer( ProbAttention(True, configs.factor, attention_dropout=configs.dropout, output_attention=False), configs.d_model, configs.n_heads), AttentionLayer( ProbAttention(False, configs.factor, attention_dropout=configs.dropout, output_attention=False), configs.d_model, configs.n_heads), configs.d_model, configs.d_ff, dropout=configs.dropout, activation=configs.activation, ) for l in range(configs.d_layers) ], norm_layer=torch.nn.LayerNorm(configs.d_model), projection=nn.Linear(configs.d_model, configs.c_out, bias=True) ) def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None): enc_out = self.enc_embedding(x_enc, x_mark_enc) enc_out, attns = self.encoder(enc_out, attn_mask=enc_self_mask) dec_out = self.dec_embedding(x_dec, x_mark_dec) dec_out = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask) if self.output_attention: return dec_out[:, -self.pred_len:, :], attns else: return dec_out[:, -self.pred_len:, :] # [B, L, D] ================================================ FILE: models/MSGNet.py ================================================ import numpy as np # import pywt import torch import torch.nn as nn import torch.nn.functional as F import torch.fft from layers.Embed import DataEmbedding from layers.MSGBlock import GraphBlock, simpleVIT, Attention_Block, Predict def FFT_for_Period(x, k=2): # [B, T, C] xf = torch.fft.rfft(x, dim=1) frequency_list = abs(xf).mean(0).mean(-1) frequency_list[0] = 0 _, top_list = torch.topk(frequency_list, k) top_list = top_list.detach().cpu().numpy() period = x.shape[1] // top_list return period, abs(xf).mean(-1)[:, top_list] class ScaleGraphBlock(nn.Module): def __init__(self, configs): super(ScaleGraphBlock, self).__init__() self.seq_len = configs.seq_len self.pred_len = configs.pred_len self.k = configs.top_k self.att0 = Attention_Block(configs.d_model, configs.d_ff, n_heads=configs.n_heads, dropout=configs.dropout, activation="gelu") self.norm = nn.LayerNorm(configs.d_model) self.gelu = nn.GELU() self.gconv = nn.ModuleList() for i in range(self.k): self.gconv.append( GraphBlock(configs.c_out , configs.d_model , configs.conv_channel, configs.skip_channel, configs.gcn_depth , configs.dropout, configs.propalpha ,configs.seq_len, configs.node_dim)) def forward(self, x): B, T, N = x.size() scale_list, scale_weight = FFT_for_Period(x, self.k) res = [] for i in range(self.k): scale = scale_list[i] #Gconv x = self.gconv[i](x) # paddng if (self.seq_len) % scale != 0: length = (((self.seq_len) // scale) + 1) * scale padding = torch.zeros([x.shape[0], (length - (self.seq_len)), x.shape[2]]).to(x.device) out = torch.cat([x, padding], dim=1) else: length = self.seq_len out = x out = out.reshape(B, length // scale, scale, N) #for Mul-attetion out = out.reshape(-1 , scale , N) out = self.norm(self.att0(out)) out = self.gelu(out) out = out.reshape(B, -1 , scale , N).reshape(B ,-1 ,N) # #for simpleVIT # out = self.att(out.permute(0, 3, 1, 2).contiguous()) #return # out = out.permute(0, 2, 3, 1).reshape(B, -1 ,N) out = out[:, :self.seq_len, :] res.append(out) res = torch.stack(res, dim=-1) # adaptive aggregation scale_weight = F.softmax(scale_weight, dim=1) scale_weight = scale_weight.unsqueeze(1).unsqueeze(1).repeat(1, T, N, 1) res = torch.sum(res * scale_weight, -1) # residual connection res = res + x return res class Model(nn.Module): def __init__(self, configs): super(Model, self).__init__() self.configs = configs self.task_name = configs.task_name self.seq_len = configs.seq_len self.label_len = configs.label_len self.pred_len = configs.pred_len self.device = "cuda" if torch.cuda.is_available() else "cpu" # for graph # self.num_nodes = configs.c_out # self.subgraph_size = configs.subgraph_size # self.node_dim = configs.node_dim # to return adj (node , node) # self.graph = constructor_graph() self.model = nn.ModuleList([ScaleGraphBlock(configs) for _ in range(configs.e_layers)]) self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq, configs.dropout) self.layer = configs.e_layers self.layer_norm = nn.LayerNorm(configs.d_model) self.predict_linear = nn.Linear( self.seq_len, self.pred_len + self.seq_len) self.projection = nn.Linear( configs.d_model, configs.c_out, bias=True) self.seq2pred = Predict(configs.individual ,configs.c_out, configs.seq_len, configs.pred_len, configs.dropout) def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): # Normalization from Non-stationary Transformer means = x_enc.mean(1, keepdim=True).detach() x_enc = x_enc - means stdev = torch.sqrt( torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) x_enc /= stdev # embedding enc_out = self.enc_embedding(x_enc, x_mark_enc) # [B,T,C] # adp = self.graph(torch.arange(self.num_nodes).to(self.device)) for i in range(self.layer): enc_out = self.layer_norm(self.model[i](enc_out)) # porject back dec_out = self.projection(enc_out) dec_out = self.seq2pred(dec_out.transpose(1, 2)).transpose(1, 2) # De-Normalization from Non-stationary Transformer dec_out = dec_out * \ (stdev[:, 0, :].unsqueeze(1).repeat( 1, self.pred_len, 1)) dec_out = dec_out + \ (means[:, 0, :].unsqueeze(1).repeat( 1, self.pred_len, 1)) return dec_out[:, -self.pred_len:, :] ================================================ FILE: run_longExp.py ================================================ import argparse import os import time from multiprocessing import freeze_support import torch from exp.exp_main import Exp_Main import random import numpy as np fix_seed = 2021 random.seed(fix_seed) torch.manual_seed(fix_seed) np.random.seed(fix_seed) parser = argparse.ArgumentParser(description='MSGNet for Time Series Forecasting') # basic config parser.add_argument('--task_name', type=str, required=False, default='long_term_forecast', help='task name, options:[long_term_forecast, mask, short_term_forecast, imputation, classification, anomaly_detection]') parser.add_argument('--is_training', type=int, required=True, default=1, help='status') parser.add_argument('--model_id', type=str, required=True, default='test', help='model id') parser.add_argument('--model', type=str, required=True, default='Autoformer', help='model name, options: [Autoformer, Informer, Transformer]') # data loader parser.add_argument('--data', type=str, required=True, default='ETTm1', help='dataset type') parser.add_argument('--root_path', type=str, default='./data/ETT/', help='root path of the data file') parser.add_argument('--data_path', type=str, default='ETTh1.csv', help='data file') parser.add_argument('--features', type=str, default='M', help='forecasting task, options:[M, S, MS]; M:multivariate predict multivariate,' ' S:univariate predict univariate, MS:multivariate predict univariate') parser.add_argument('--target', type=str, default='OT', help='target feature in S or MS task') parser.add_argument('--freq', type=str, default='h', help='freq for time features encoding, ' 'options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], ' 'you can also use more detailed freq like 15min or 3h') parser.add_argument('--checkpoints', type=str, default='./checkpoints/', help='location of model checkpoints') # forecasting task parser.add_argument('--seq_len', type=int, default=96, help='input sequence length') parser.add_argument('--label_len', type=int, default=48, help='start token length') parser.add_argument('--pred_len', type=int, default=96, help='prediction sequence length') parser.add_argument('--seasonal_patterns', type=str, default='Monthly', help='subset for M4') parser.add_argument('--top_k', type=int, default=5, help='for TimesBlock/ScaleGraphBlock') parser.add_argument('--num_kernels', type=int, default=6, help='for Inception') parser.add_argument('--num_nodes', type=int, default=7, help='to create Graph') parser.add_argument('--subgraph_size', type=int, default=3, help='neighbors number') parser.add_argument('--tanhalpha', type=float, default=3, help='') #GCN parser.add_argument('--node_dim', type=int, default=10, help='each node embbed to dim dimentions') parser.add_argument('--gcn_depth', type=int, default=2, help='') parser.add_argument('--gcn_dropout', type=float, default=0.3, help='') parser.add_argument('--propalpha', type=float, default=0.3, help='') parser.add_argument('--conv_channel', type=int, default=32, help='') parser.add_argument('--skip_channel', type=int, default=32, help='') # DLinear parser.add_argument('--individual', action='store_true', default=False, help='DLinear: a linear layer for each variate(channel) individually') # Formers parser.add_argument('--embed_type', type=int, default=0, help='0: default ' '1: value embedding + temporal embedding + positional embedding ' '2: value embedding + temporal embedding ' '3: value embedding + positional embedding ' '4: value embedding') parser.add_argument('--enc_in', type=int, default=7, help='encoder input size') parser.add_argument('--dec_in', type=int, default=7, help='decoder input size') parser.add_argument('--c_out', type=int, default=7, help='output size') parser.add_argument('--d_model', type=int, default=512, help='dimension of model') parser.add_argument('--n_heads', type=int, default=8, help='num of heads') parser.add_argument('--e_layers', type=int, default=2, help='num of encoder layers') parser.add_argument('--d_layers', type=int, default=1, help='num of decoder layers') parser.add_argument('--d_ff', type=int, default=2048, help='dimension of fcn') parser.add_argument('--moving_avg', type=int, default=25, help='window size of moving average') parser.add_argument('--factor', type=int, default=1, help='attn factor') parser.add_argument('--distil', action='store_false', help='whether to use distilling in encoder, using this argument means not using distilling', default=True) parser.add_argument('--dropout', type=float, default=0.05, help='dropout') parser.add_argument('--embed', type=str, default='timeF', help='time features encoding, options:[timeF, fixed, learned]') parser.add_argument('--activation', type=str, default='gelu', help='activation') parser.add_argument('--output_attention', action='store_true', help='whether to output attention in encoder') parser.add_argument('--do_predict', action='store_true', help='whether to predict unseen future data') # optimization parser.add_argument('--num_workers', type=int, default=8, help='data loader num workers') parser.add_argument('--itr', type=int, default=2, help='experiments times') parser.add_argument('--train_epochs', type=int, default=10, help='train epochs') parser.add_argument('--batch_size', type=int, default=32, help='batch size of train input data') parser.add_argument('--patience', type=int, default=3, help='early stopping patience') parser.add_argument('--learning_rate', type=float, default=0.0001, help='optimizer learning rate') parser.add_argument('--des', type=str, default='test', help='exp description') parser.add_argument('--loss', type=str, default='MSE', help='loss function') parser.add_argument('--lradj', type=str, default='type1', help='adjust learning rate') parser.add_argument('--use_amp', action='store_true', help='use automatic mixed precision training', default=False) # GPU parser.add_argument('--use_gpu', type=bool, default=True, help='use gpu') parser.add_argument('--gpu', type=int, default=0, help='gpu') parser.add_argument('--use_multi_gpu', action='store_true', help='use multiple gpus', default=False) parser.add_argument('--devices', type=str, default='0,1,2,3', help='device ids of multile gpus') parser.add_argument('--test_flop', action='store_true', default=False, help='See utils/tools for usage') args = parser.parse_args() args.use_gpu = True if torch.cuda.is_available() and args.use_gpu else False if args.use_gpu and args.use_multi_gpu: args.dvices = args.devices.replace(' ', '') device_ids = args.devices.split(',') args.device_ids = [int(id_) for id_ in device_ids] args.gpu = args.device_ids[0] print('Args in experiment:') print(args) Exp = Exp_Main if args.is_training: start = time.time() for ii in range(args.itr): # setting record of experiments setting = '{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_fc{}_eb{}_dt{}_{}_{}'.format( args.model_id, args.model, args.data, args.features, args.seq_len, args.label_len, args.pred_len, args.d_model, args.n_heads, args.e_layers, args.d_layers, args.d_ff, args.factor, args.embed, args.distil, args.des, ii) exp = Exp(args) # set experiments print('>>>>>>>start training : {}>>>>>>>>>>>>>>>>>>>>>>>>>>'.format(setting)) exp.train(setting) print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting)) exp.test(setting) # if args.do_predict: # print('>>>>>>>predicting : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting)) # exp.predict(setting, True) torch.cuda.empty_cache() end = time.time() used_time = end -start print("time:",used_time) f = open("result.txt", 'a') f.write('time:{}'.format(used_time)) f.write('\n') f.write('\n') f.close() else: ii = 0 setting = '{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_fc{}_eb{}_dt{}_{}_{}'.format(args.model_id, args.model, args.data, args.features, args.seq_len, args.label_len, args.pred_len, args.d_model, args.n_heads, args.e_layers, args.d_layers, args.d_ff, args.factor, args.embed, args.distil, args.des, ii) exp = Exp(args) # set experiments print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting)) exp.test(setting, test=1) torch.cuda.empty_cache() ================================================ FILE: scripts/ETTh1.sh ================================================ if [ ! -d "./logs" ]; then mkdir ./logs fi if [ ! -d "./logs/ETTh1" ]; then mkdir ./logs/ETTh1 fi export CUDA_VISIBLE_DEVICES=0 seq_len=96 label_len=48 model_name=MSGNet pred_len=96 python -u run_longExp.py \ --is_training 1 \ --root_path ./dataset/ \ --data_path ETTh1.csv \ --model_id ETTh1'_'$seq_len'_'$pred_len \ --model $model_name \ --data ETTh1 \ --features M \ --freq h \ --target 'OT' \ --seq_len $seq_len \ --label_len $label_len \ --pred_len $pred_len \ --e_layers 1 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --d_model 32 \ --d_ff 64 \ --top_k 3 \ --conv_channel 32 \ --skip_channel 32 \ --dropout 0.1 \ --batch_size 32 \ --itr 1 #>logs/ETTh1/$model_name'_'ETTh1_$seq_len'_'$pred_len.log pred_len=192 python -u run_longExp.py \ --is_training 1 \ --root_path ./dataset/ \ --data_path ETTh1.csv \ --model_id ETTh1'_'$seq_len'_'$pred_len \ --model $model_name \ --data ETTh1 \ --features M \ --freq h \ --target 'OT' \ --seq_len $seq_len \ --label_len $label_len \ --pred_len $pred_len \ --e_layers 1 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --d_model 32 \ --d_ff 64 \ --top_k 3 \ --conv_channel 32 \ --skip_channel 32 \ --dropout 0.1 \ --batch_size 32 \ --itr 1 #>logs/ETTh1/$model_name'_'ETTh1_$seq_len'_'$pred_len.log pred_len=336 python -u run_longExp.py \ --is_training 1 \ --root_path ./dataset/ \ --data_path ETTh1.csv \ --model_id ETTh1'_'$seq_len'_'$pred_len \ --model $model_name \ --data ETTh1 \ --features M \ --freq h \ --target 'OT' \ --seq_len $seq_len \ --label_len $label_len \ --pred_len $pred_len \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --d_model 32 \ --d_ff 64 \ --top_k 3 \ --conv_channel 32 \ --skip_channel 32 \ --dropout 0.1 \ --batch_size 32 \ --itr 1 #>logs/ETTh1/$model_name'_'ETTh1_$seq_len'_'$pred_len.log pred_len=720 python -u run_longExp.py \ --is_training 1 \ --root_path ./dataset/ \ --data_path ETTh1.csv \ --model_id ETTh1'_'$seq_len'_'$pred_len \ --model $model_name \ --data ETTh1 \ --features M \ --freq h \ --target 'OT' \ --seq_len $seq_len \ --label_len $label_len \ --pred_len $pred_len \ --e_layers 1 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --d_model 16 \ --d_ff 32 \ --top_k 3 \ --conv_channel 32 \ --skip_channel 32 \ --dropout 0.1 \ --batch_size 32 \ --itr 1 #>logs/ETTh1/$model_name'_'ETTh1_$seq_len'_'$pred_len.log ================================================ FILE: scripts/ETTh2.sh ================================================ if [ ! -d "./logs" ]; then mkdir ./logs fi if [ ! -d "./logs/ETTh2" ]; then mkdir ./logs/ETTh2 fi export CUDA_VISIBLE_DEVICES=1 seq_len=96 label_len=48 model_name=MSGNet pred_len=96 python -u run_longExp.py \ --is_training 1 \ --root_path ./dataset/ \ --data_path ETTh2.csv \ --model_id ETTh2'_'$seq_len'_'$pred_len \ --model $model_name \ --data ETTh2 \ --features M \ --freq h \ --target 'OT' \ --seq_len $seq_len \ --label_len $label_len \ --pred_len $pred_len \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --d_model 16 \ --d_ff 32 \ --conv_channel 32 \ --skip_channel 32 \ --top_k 5 \ --batch_size 32 \ --itr 1 #>logs/ETTh2/$model_name'_'ETTh2_$seq_len'_'$pred_len.log pred_len=192 python -u run_longExp.py \ --is_training 1 \ --root_path ./dataset/ \ --data_path ETTh2.csv \ --model_id ETTh2'_'$seq_len'_'$pred_len \ --model $model_name \ --data ETTh2 \ --features M \ --freq h \ --target 'OT' \ --seq_len $seq_len \ --label_len $label_len \ --pred_len $pred_len \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --d_model 16 \ --d_ff 32 \ --conv_channel 32 \ --skip_channel 32 \ --top_k 5 \ --batch_size 32 \ --itr 1 #>logs/ETTh2/$model_name'_'ETTh2_$seq_len'_'$pred_len.log pred_len=336 python -u run_longExp.py \ --is_training 1 \ --root_path ./dataset/ \ --data_path ETTh2.csv \ --model_id ETTh2'_'$seq_len'_'$pred_len \ --model $model_name \ --data ETTh2 \ --features M \ --freq h \ --target 'OT' \ --seq_len $seq_len \ --label_len $label_len \ --pred_len $pred_len \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --d_model 16 \ --d_ff 32 \ --conv_channel 32 \ --skip_channel 32 \ --top_k 5 \ --batch_size 32 \ --itr 1 #>logs/ETTh2/$model_name'_'ETTh2_$seq_len'_'$pred_len.log pred_len=720 python -u run_longExp.py \ --is_training 1 \ --root_path ./dataset/ \ --data_path ETTh2.csv \ --model_id ETTh2'_'$seq_len'_'$pred_len \ --model $model_name \ --data ETTh2 \ --features M \ --freq h \ --target 'OT' \ --seq_len $seq_len \ --label_len $label_len \ --pred_len $pred_len \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --d_model 16 \ --d_ff 32 \ --conv_channel 32 \ --skip_channel 32 \ --top_k 5 \ --batch_size 32 \ --itr 1 #>logs/ETTh2/$model_name'_'ETTh2_$seq_len'_'$pred_len.log ================================================ FILE: scripts/ETTm1.sh ================================================ if [ ! -d "./logs" ]; then mkdir ./logs fi if [ ! -d "./logs/ETTm1" ]; then mkdir ./logs/ETTm1 fi export CUDA_VISIBLE_DEVICES=2 seq_len=96 label_len=48 model_name=MSGNet pred_len=96 python -u run_longExp.py \ --is_training 1 \ --root_path ./dataset/ \ --data_path ETTm1.csv \ --model_id ETTm1'_'$seq_len'_'$pred_len \ --model $model_name \ --data ETTm1 \ --features M \ --target 'OT' \ --seq_len $seq_len \ --label_len $label_len \ --pred_len $pred_len \ --e_layers 1 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --d_model 32 \ --d_ff 32 \ --top_k 3 \ --conv_channel 32 \ --skip_channel 32 \ --batch_size 32 \ --itr 1 #>logs/ETTm1/$model_name'_'ETTm1_$seq_len'_'$pred_len.log pred_len=192 python -u run_longExp.py \ --is_training 1 \ --root_path ./dataset/ \ --data_path ETTm1.csv \ --model_id ETTm1'_'$seq_len'_'$pred_len \ --model $model_name \ --data ETTm1 \ --features M \ --target 'OT' \ --seq_len $seq_len \ --label_len $label_len \ --pred_len $pred_len \ --e_layers 1 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --d_model 32 \ --d_ff 32 \ --top_k 3 \ --conv_channel 16 \ --skip_channel 32 \ --batch_size 32 \ --itr 1 #>logs/ETTm1/$model_name'_'ETTm1_$seq_len'_'$pred_len.log pred_len=336 python -u run_longExp.py \ --is_training 1 \ --root_path ./dataset/ \ --data_path ETTm1.csv \ --model_id ETTm1'_'$seq_len'_'$pred_len \ --model $model_name \ --data ETTm1 \ --features M \ --target 'OT' \ --seq_len $seq_len \ --label_len $label_len \ --pred_len $pred_len \ --e_layers 1 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --d_model 32 \ --d_ff 32 \ --top_k 3 \ --conv_channel 16 \ --skip_channel 32 \ --batch_size 32 \ --itr 1 #>logs/ETTm1/$model_name'_'ETTm1_$seq_len'_'$pred_len.log pred_len=720 python -u run_longExp.py \ --is_training 1 \ --root_path ./dataset/ \ --data_path ETTm1.csv \ --model_id ETTm1'_'$seq_len'_'$pred_len \ --model $model_name \ --data ETTm1 \ --features M \ --target 'OT' \ --seq_len $seq_len \ --label_len $label_len \ --pred_len $pred_len \ --e_layers 1 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --d_model 32 \ --d_ff 32 \ --top_k 3 \ --conv_channel 16 \ --skip_channel 32 \ --batch_size 32 \ --itr 1 #>logs/ETTm1/$model_name'_'ETTm1_$seq_len'_'$pred_len.log ================================================ FILE: scripts/ETTm2.sh ================================================ if [ ! -d "./logs" ]; then mkdir ./logs fi if [ ! -d "./logs/ETTm2" ]; then mkdir ./logs/ETTm2 fi export CUDA_VISIBLE_DEVICES=3 seq_len=96 label_len=48 model_name=MSGNet pred_len=96 python -u run_longExp.py \ --is_training 1 \ --root_path ./dataset/ \ --data_path ETTm2.csv \ --model_id ETTm2'_'$seq_len'_'$pred_len \ --model $model_name \ --data ETTm2 \ --features M \ --target 'OT' \ --seq_len $seq_len \ --label_len $label_len \ --pred_len $pred_len \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --d_model 32 \ --d_ff 32 \ --top_k 3 \ --conv_channel 32 \ --skip_channel 32 \ --dropout 0.3 \ --batch_size 32 \ --itr 1 #>logs/ETTm2/$model_name'_'ETTm2_$seq_len'_'$pred_len.log pred_len=192 python -u run_longExp.py \ --is_training 1 \ --root_path ./dataset/ \ --data_path ETTm2.csv \ --model_id ETTm2'_'$seq_len'_'$pred_len \ --model $model_name \ --data ETTm2 \ --features M \ --target 'OT' \ --seq_len $seq_len \ --label_len $label_len \ --pred_len $pred_len \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --d_model 32 \ --d_ff 64 \ --top_k 3 \ --conv_channel 32 \ --skip_channel 32 \ --dropout 0.3 \ --batch_size 32 \ --itr 1 #>logs/ETTm2/$model_name'_'ETTm2_$seq_len'_'$pred_len.log pred_len=336 python -u run_longExp.py \ --is_training 1 \ --root_path ./dataset/ \ --data_path ETTm2.csv \ --model_id ETTm2'_'$seq_len'_'$pred_len \ --model $model_name \ --data ETTm2 \ --features M \ --target 'OT' \ --seq_len $seq_len \ --label_len $label_len \ --pred_len $pred_len \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --d_model 32 \ --d_ff 32 \ --top_k 3 \ --conv_channel 32 \ --skip_channel 32 \ --dropout 0.3 \ --batch_size 32 \ --itr 1 #>logs/ETTm2/$model_name'_'ETTm2_$seq_len'_'$pred_len.log pred_len=720 python -u run_longExp.py \ --is_training 1 \ --root_path ./dataset/ \ --data_path ETTm2.csv \ --model_id ETTm2'_'$seq_len'_'$pred_len \ --model $model_name \ --data ETTm2 \ --features M \ --target 'OT' \ --seq_len $seq_len \ --label_len $label_len \ --pred_len $pred_len \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --d_model 32 \ --d_ff 64 \ --top_k 3 \ --conv_channel 32 \ --skip_channel 32 \ --dropout 0.3 \ --batch_size 32 \ --itr 1 #>logs/ETTm2/$model_name'_'ETTm2_$seq_len'_'$pred_len.log ================================================ FILE: scripts/Flight.sh ================================================ if [ ! -d "./logs" ]; then mkdir ./logs fi if [ ! -d "./logs/Flight" ]; then mkdir ./logs/Flight fi export CUDA_VISIBLE_DEVICES=2 seq_len=96 label_len=48 model_name=MSGNet for pred_len in 96 192 336 720 do python -u run_longExp.py \ --is_training 1 \ --root_path ./dataset/ \ --data_path Flight.csv \ --model_id Flight'_'$seq_len'_'$pred_len \ --model $model_name \ --data custom \ --features M \ --freq h \ --target 'UUEE' \ --seq_len $seq_len \ --label_len $label_len \ --pred_len $pred_len \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 \ --d_model 16 \ --d_ff 32 \ --top_k 5 \ --conv_channel 32 \ --skip_channel 32 \ --node_dim 100 \ --batch_size 32 #>logs/Flight/$model_name'_'Flight_$seq_len'_'$pred_len.log done ================================================ FILE: scripts/electricity.sh ================================================ if [ ! -d "./logs" ]; then mkdir ./logs fi if [ ! -d "./logs/electricity" ]; then mkdir ./logs/electricity fi export CUDA_VISIBLE_DEVICES=3 seq_len=96 label_len=48 model_name=MSGNet pred_len=96 python -u run_longExp.py \ --is_training 1 \ --root_path ./dataset/ \ --data_path electricity.csv \ --model_id electricity'_'$seq_len'_'$pred_len \ --model $model_name \ --data custom \ --features M \ --freq h \ --target 'OT' \ --seq_len $seq_len \ --label_len $label_len \ --pred_len $pred_len \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --d_model 1024 \ --d_ff 512 \ --top_k 5 \ --conv_channel 16 \ --skip_channel 32 \ --node_dim 100 \ --batch_size 32 \ --itr 1 #>logs/electricity/$model_name'_'electricity_$seq_len'_'$pred_len.log pred_len=192 python -u run_longExp.py \ --is_training 1 \ --root_path ./dataset/ \ --data_path electricity.csv \ --model_id electricity'_'$seq_len'_'$pred_len \ --model $model_name \ --data custom \ --features M \ --freq h \ --target 'OT' \ --seq_len $seq_len \ --label_len $label_len \ --pred_len $pred_len \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --d_model 1024 \ --d_ff 512 \ --top_k 5 \ --conv_channel 16 \ --skip_channel 32 \ --node_dim 100 \ --batch_size 32 \ --itr 1 #>logs/electricity/$model_name'_'electricity_$seq_len'_'$pred_len.log pred_len=336 python -u run_longExp.py \ --is_training 1 \ --root_path ./dataset/ \ --data_path electricity.csv \ --model_id electricity'_'$seq_len'_'$pred_len \ --model $model_name \ --data custom \ --features M \ --freq h \ --target 'OT' \ --seq_len $seq_len \ --label_len $label_len \ --pred_len $pred_len \ --e_layers 3 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --d_model 1024 \ --d_ff 512 \ --top_k 5 \ --conv_channel 16 \ --skip_channel 32 \ --node_dim 100 \ --batch_size 32 \ --itr 1 #>logs/electricity/$model_name'_'electricity_$seq_len'_'$pred_len.log pred_len=720 python -u run_longExp.py \ --is_training 1 \ --root_path ./dataset/ \ --data_path electricity.csv \ --model_id electricity'_'$seq_len'_'$pred_len \ --model $model_name \ --data custom \ --features M \ --freq h \ --target 'OT' \ --seq_len $seq_len \ --label_len $label_len \ --pred_len $pred_len \ --e_layers 3 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --d_model 1024 \ --d_ff 512 \ --top_k 5 \ --conv_channel 16 \ --skip_channel 32 \ --node_dim 100 \ --batch_size 32 \ --itr 1 #>logs/electricity/$model_name'_'electricity_$seq_len'_'$pred_len.log ================================================ FILE: scripts/exchange.sh ================================================ if [ ! -d "./logs" ]; then mkdir ./logs fi if [ ! -d "./logs/exchange" ]; then mkdir ./logs/exchange fi export CUDA_VISIBLE_DEVICES=2 seq_len=96 label_len=48 model_name=MSGNet pred_len=96 python -u run_longExp.py \ --is_training 1 \ --root_path ./dataset/ \ --data_path exchange_rate.csv \ --model_id exchange'_'$seq_len'_'$pred_len \ --model $model_name \ --data custom \ --features M \ --freq h \ --target 'OT' \ --seq_len $seq_len \ --label_len $label_len \ --pred_len $pred_len \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 8 \ --dec_in 8 \ --c_out 8 \ --des 'Exp' \ --d_model 64 \ --d_ff 128 \ --top_k 3 \ --dropout 0.2 \ --conv_channel 16 \ --skip_channel 32 \ --batch_size 32 \ --itr 1 #>logs/exchange/$model_name'_'exchange_$seq_len'_'$pred_len.log pred_len=192 python -u run_longExp.py \ --is_training 1 \ --root_path ./dataset/ \ --data_path exchange_rate.csv \ --model_id exchange'_'$seq_len'_'$pred_len \ --model $model_name \ --data custom \ --features M \ --freq h \ --target 'OT' \ --seq_len $seq_len \ --label_len $label_len \ --pred_len $pred_len \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 8 \ --dec_in 8 \ --c_out 8 \ --des 'Exp' \ --d_model 64 \ --d_ff 128 \ --top_k 5 \ --node_dim 30 \ --conv_channel 16 \ --skip_channel 32 \ --batch_size 32 \ --itr 1 #>logs/exchange/$model_name'_'exchange_$seq_len'_'$pred_len.log pred_len=336 python -u run_longExp.py \ --is_training 1 \ --root_path ./dataset/ \ --data_path exchange_rate.csv \ --model_id exchange'_'$seq_len'_'$pred_len \ --model $model_name \ --data custom \ --features M \ --freq h \ --target 'OT' \ --seq_len $seq_len \ --label_len $label_len \ --pred_len $pred_len \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 8 \ --dec_in 8 \ --c_out 8 \ --des 'Exp' \ --d_model 64 \ --d_ff 128 \ --top_k 5 \ --node_dim 30 \ --conv_channel 16 \ --skip_channel 32 \ --batch_size 32 \ --itr 1 #>logs/exchange/$model_name'_'exchange_$seq_len'_'$pred_len.log pred_len=720 python -u run_longExp.py \ --is_training 1 \ --root_path ./dataset/ \ --data_path exchange_rate.csv \ --model_id exchange'_'$seq_len'_'$pred_len \ --model $model_name \ --data custom \ --features M \ --freq h \ --target 'OT' \ --seq_len $seq_len \ --label_len $label_len \ --pred_len $pred_len \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 8 \ --dec_in 8 \ --c_out 8 \ --des 'Exp' \ --d_model 64 \ --d_ff 128 \ --top_k 5 \ --conv_channel 16 \ --skip_channel 32 \ --batch_size 32 \ --itr 1 #>logs/exchange/$model_name'_'exchange_$seq_len'_'$pred_len.log ================================================ FILE: scripts/weather.sh ================================================ if [ ! -d "./logs" ]; then mkdir ./logs fi if [ ! -d "./logs/weather" ]; then mkdir ./logs/weather fi export CUDA_VISIBLE_DEVICES=2 seq_len=96 label_len=48 model_name=MSGNet pred_len=96 python -u run_longExp.py \ --is_training 1 \ --root_path ./dataset/ \ --data_path weather.csv \ --model_id weather'_'$seq_len'_'$pred_len \ --model $model_name \ --data custom \ --features M \ --freq h \ --target 'OT' \ --seq_len $seq_len \ --label_len $label_len \ --pred_len $pred_len \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --des 'Exp' \ --d_model 64 \ --d_ff 128 \ --top_k 5 \ --conv_channel 32 \ --skip_channel 32 \ --batch_size 32 \ --train_epochs 3 \ --itr 1 #>logs/weather/$model_name'_'weather_$seq_len'_'$pred_len.log pred_len=192 python -u run_longExp.py \ --is_training 1 \ --root_path ./dataset/ \ --data_path weather.csv \ --model_id weather'_'$seq_len'_'$pred_len \ --model $model_name \ --data custom \ --features M \ --freq h \ --target 'OT' \ --seq_len $seq_len \ --label_len $label_len \ --pred_len $pred_len \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --des 'Exp' \ --d_model 64 \ --d_ff 128 \ --top_k 5 \ --conv_channel 32 \ --skip_channel 32 \ --batch_size 32 \ --itr 1 #>logs/weather/$model_name'_'weather_$seq_len'_'$pred_len.log pred_len=336 python -u run_longExp.py \ --is_training 1 \ --root_path ./dataset/ \ --data_path weather.csv \ --model_id weather'_'$seq_len'_'$pred_len \ --model $model_name \ --data custom \ --features M \ --freq h \ --target 'OT' \ --seq_len $seq_len \ --label_len $label_len \ --pred_len $pred_len \ --e_layers 1 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --des 'Exp' \ --d_model 64 \ --d_ff 128 \ --top_k 5 \ --conv_channel 32 \ --skip_channel 32 \ --batch_size 32 \ --itr 1 #>logs/weather/$model_name'_'weather_$seq_len'_'$pred_len.log pred_len=720 python -u run_longExp.py \ --is_training 1 \ --root_path ./dataset/ \ --data_path weather.csv \ --model_id weather'_'$seq_len'_'$pred_len \ --model $model_name \ --data custom \ --features M \ --freq h \ --target 'OT' \ --seq_len $seq_len \ --label_len $label_len \ --pred_len $pred_len \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --des 'Exp' \ --d_model 64 \ --d_ff 128 \ --top_k 5 \ --conv_channel 32 \ --skip_channel 32 \ --batch_size 32 \ --itr 1 #>logs/weather/$model_name'_'weather_$seq_len'_'$pred_len.log ================================================ FILE: utils/masking.py ================================================ import torch class TriangularCausalMask(): def __init__(self, B, L, device="cpu"): mask_shape = [B, 1, L, L] with torch.no_grad(): self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device) @property def mask(self): return self._mask class ProbMask(): def __init__(self, B, H, L, index, scores, device="cpu"): _mask = torch.ones(L, scores.shape[-1], dtype=torch.bool).to(device).triu(1) _mask_ex = _mask[None, None, :].expand(B, H, L, scores.shape[-1]) indicator = _mask_ex[torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], index, :].to(device) self._mask = indicator.view(scores.shape).to(device) @property def mask(self): return self._mask ================================================ FILE: utils/metrics.py ================================================ import numpy as np def MAE(pred, true): return np.mean(np.abs(pred - true)) def MAPE(pred, true): return np.mean(np.abs((pred - true) / true)) def ND(pred, true): return np.mean(np.abs(true - pred)) / np.mean(np.abs(true)) def MSE(pred, true): return np.mean((pred - true) ** 2) def RMSE(pred, true): return np.sqrt(MSE(pred, true)) def NRMSE(pred, true): return np.sqrt(np.mean(np.power((pred - true), 2))) / (np.mean(np.abs(true))) def RSE(pred, true): return np.sqrt(np.sum((true - pred) ** 2)) / np.sqrt(np.sum((true - true.mean()) ** 2)) def CORR(pred, true): u = ((true - true.mean(0)) * (pred - pred.mean(0))).sum(0) d = np.sqrt(((true - true.mean(0)) ** 2 * (pred - pred.mean(0)) ** 2).sum(0)) d += 1e-12 return 0.01*(u / d).mean(-1) def MSPE(pred, true): return np.mean(np.square((pred - true) / true)) def metric(pred, true): mae = MAE(pred, true) mse = MSE(pred, true) rmse = RMSE(pred, true) mape = MAPE(pred, true) mspe = MSPE(pred, true) rse = RSE(pred, true) corr = CORR(pred, true) nd = ND(pred,true) nrmse = NRMSE(pred,true) return mae, mse, rmse, mape, mspe, rse , corr, nd, nrmse def metric2(pred, true): mae = MAE(pred, true) mse = MSE(pred, true) rmse = RMSE(pred, true) mape = MAPE(pred, true) mspe = MSPE(pred, true) rse = RSE(pred, true) nd = ND(pred,true) nrmse = NRMSE(pred,true) return mae, mse, rmse, mape, mspe, rse , nd, nrmse ================================================ FILE: utils/timefeatures.py ================================================ from typing import List import numpy as np import pandas as pd from pandas.tseries import offsets from pandas.tseries.frequencies import to_offset class TimeFeature: def __init__(self): pass def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: pass def __repr__(self): return self.__class__.__name__ + "()" class SecondOfMinute(TimeFeature): """Minute of hour encoded as value between [-0.5, 0.5]""" def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: return index.second / 59.0 - 0.5 class MinuteOfHour(TimeFeature): """Minute of hour encoded as value between [-0.5, 0.5]""" def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: return index.minute / 59.0 - 0.5 class HourOfDay(TimeFeature): """Hour of day encoded as value between [-0.5, 0.5]""" def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: return index.hour / 23.0 - 0.5 class DayOfWeek(TimeFeature): """Hour of day encoded as value between [-0.5, 0.5]""" def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: return index.dayofweek / 6.0 - 0.5 class DayOfMonth(TimeFeature): """Day of month encoded as value between [-0.5, 0.5]""" def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: return (index.day - 1) / 30.0 - 0.5 class DayOfYear(TimeFeature): """Day of year encoded as value between [-0.5, 0.5]""" def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: return (index.dayofyear - 1) / 365.0 - 0.5 class MonthOfYear(TimeFeature): """Month of year encoded as value between [-0.5, 0.5]""" def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: return (index.month - 1) / 11.0 - 0.5 class WeekOfYear(TimeFeature): """Week of year encoded as value between [-0.5, 0.5]""" def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: return (index.isocalendar().week - 1) / 52.0 - 0.5 def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]: """ Returns a list of time features that will be appropriate for the given frequency string. Parameters ---------- freq_str Frequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc. """ features_by_offsets = { offsets.YearEnd: [], offsets.QuarterEnd: [MonthOfYear], offsets.MonthEnd: [MonthOfYear], offsets.Week: [DayOfMonth, WeekOfYear], offsets.Day: [DayOfWeek, DayOfMonth, DayOfYear], offsets.BusinessDay: [DayOfWeek, DayOfMonth, DayOfYear], offsets.Hour: [HourOfDay, DayOfWeek, DayOfMonth, DayOfYear], offsets.Minute: [ MinuteOfHour, HourOfDay, DayOfWeek, DayOfMonth, DayOfYear, ], offsets.Second: [ SecondOfMinute, MinuteOfHour, HourOfDay, DayOfWeek, DayOfMonth, DayOfYear, ], } offset = to_offset(freq_str) for offset_type, feature_classes in features_by_offsets.items(): if isinstance(offset, offset_type): return [cls() for cls in feature_classes] supported_freq_msg = f""" Unsupported frequency {freq_str} The following frequencies are supported: Y - yearly alias: A M - monthly W - weekly D - daily B - business days H - hourly T - minutely alias: min S - secondly """ raise RuntimeError(supported_freq_msg) def time_features(dates, freq='h'): return np.vstack([feat(dates) for feat in time_features_from_frequency_str(freq)]) ================================================ FILE: utils/tools.py ================================================ import numpy as np import torch import matplotlib.pyplot as plt import time plt.switch_backend('agg') def adjust_learning_rate(optimizer, epoch, args): # lr = args.learning_rate * (0.2 ** (epoch // 2)) if args.lradj == 'type1': lr_adjust = {epoch: args.learning_rate * (0.5 ** ((epoch - 1) // 1))} elif args.lradj == 'type2': lr_adjust = { 2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6, 10: 5e-7, 15: 1e-7, 20: 5e-8 } elif args.lradj == '3': lr_adjust = {epoch: args.learning_rate if epoch < 10 else args.learning_rate*0.1} elif args.lradj == '4': lr_adjust = {epoch: args.learning_rate if epoch < 15 else args.learning_rate*0.1} elif args.lradj == '5': lr_adjust = {epoch: args.learning_rate if epoch < 25 else args.learning_rate*0.1} elif args.lradj == '6': lr_adjust = {epoch: args.learning_rate if epoch < 5 else args.learning_rate*0.1} if epoch in lr_adjust.keys(): lr = lr_adjust[epoch] for param_group in optimizer.param_groups: param_group['lr'] = lr print('Updating learning rate to {}'.format(lr)) class EarlyStopping: def __init__(self, patience=7, verbose=False, delta=0): self.patience = patience self.verbose = verbose self.counter = 0 self.best_score = None self.early_stop = False self.val_loss_min = np.Inf self.delta = delta def __call__(self, val_loss, model, path): score = -val_loss if self.best_score is None: self.best_score = score self.save_checkpoint(val_loss, model, path) elif score < self.best_score + self.delta: self.counter += 1 print(f'EarlyStopping counter: {self.counter} out of {self.patience}') if self.counter >= self.patience: self.early_stop = True else: self.best_score = score self.save_checkpoint(val_loss, model, path) self.counter = 0 def save_checkpoint(self, val_loss, model, path): if self.verbose: print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...') torch.save(model.state_dict(), path + '/' + 'checkpoint.pth') self.val_loss_min = val_loss class dotdict(dict): """dot.notation access to dictionary attributes""" __getattr__ = dict.get __setattr__ = dict.__setitem__ __delattr__ = dict.__delitem__ class StandardScaler(): def __init__(self, mean, std): self.mean = mean self.std = std def transform(self, data): return (data - self.mean) / self.std def inverse_transform(self, data): return (data * self.std) + self.mean def visual(true, preds=None, name='./pic/test.pdf'): """ Results visualization """ plt.figure() plt.plot(true, label='GroundTruth', linewidth=2) if preds is not None: plt.plot(preds, label='Prediction', linewidth=2) plt.legend() plt.show() plt.savefig(name, bbox_inches='tight') def test_params_flop(model,x_shape): """ If you want to thest former's flop, you need to give default value to inputs in model.forward(), the following code can only pass one argument to forward() """ model_params = 0 for parameter in model.parameters(): model_params += parameter.numel() print('INFO: Trainable parameter count: {:.2f}M'.format(model_params / 1000000.0)) from ptflops import get_model_complexity_info with torch.cuda.device(0): macs, params = get_model_complexity_info(model.cuda(), x_shape, as_strings=True, print_per_layer_stat=True) # print('Flops:' + flops) # print('Params:' + params) print('{:<30} {:<8}'.format('Computational complexity: ', macs)) print('{:<30} {:<8}'.format('Number of parameters: ', params))