Repository: YoZhibo/MSGNet
Branch: main
Commit: 953b8330a2ca
Files: 29
Total size: 141.7 KB
Directory structure:
gitextract_1z7x72rx/
├── README.md
├── data_provider/
│ ├── data_factory.py
│ └── data_loader.py
├── exp/
│ ├── exp_basic.py
│ ├── exp_main.py
│ └── exp_stat.py
├── layers/
│ ├── AutoCorrelation.py
│ ├── Autoformer_EncDec.py
│ ├── Embed.py
│ ├── MSGBlock.py
│ ├── SelfAttention_Family.py
│ └── Transformer_EncDec.py
├── models/
│ ├── Autoformer.py
│ ├── DLinear.py
│ ├── Informer.py
│ └── MSGNet.py
├── run_longExp.py
├── scripts/
│ ├── ETTh1.sh
│ ├── ETTh2.sh
│ ├── ETTm1.sh
│ ├── ETTm2.sh
│ ├── Flight.sh
│ ├── electricity.sh
│ ├── exchange.sh
│ └── weather.sh
└── utils/
├── masking.py
├── metrics.py
├── timefeatures.py
└── tools.py
================================================
FILE CONTENTS
================================================
================================================
FILE: README.md
================================================
# MSGNet (AAAI2024)
Paper Link:[MSGNet: Learning Multi-Scale Inter-Series Correlations for Multivariate Time Series Forecasting](https://arxiv.org/abs/2401.00423)
## Usage
- Train and evaluate MSGNet
- You can use the following command:`sh ./scripts/ETTh1.sh`.
- Train your model
- Add model file in the folder `./models/your_model.py`.
- Add model in the ***class*** Exp_Main.
- Flight dataset
- You can obtain the dataset from [Google Drive](https://drive.google.com/drive/folders/1JSZByfM0Ghat3g_D3a-puTZ2JsfebNWL?usp=sharing). Then please place it in the folder `./dataset`.
## Model
MSGNet employs several ScaleGraph blocks, each encompassing three pivotal modules: an FFT module for multi-scale data identification, an adaptive graph convolution module for inter-series correlation learning within a time scale, and a multi-head attention module for intra-series correlation learning.
<div align=center>
<img src="https://github.com/YoZhibo/MSGNet/blob/main/pic/model1.jpg" width='45%'> <img src="https://github.com/YoZhibo/MSGNet/blob/main/pic/model2.jpg" width='47%'>
</div>
## Main Results
Forecast results with 96 review window and prediction length {96, 192, 336, 720}. The best result is represented in bold, followed by underline.
<div align=center>
<img src="https://github.com/YoZhibo/MSGNet/blob/main/pic/main_result.jpg" width='75%'>
</div>
## Citation
```
@article{cai2023msgnet,
title={MSGNet: Learning Multi-Scale Inter-Series Correlations for Multivariate Time Series Forecasting},
author={Cai, Wanlin and Liang, Yuxuan and Liu, Xianggen and Feng, Jianshuai and Wu, Yuankai},
journal={arXiv preprint arXiv:2401.00423},
year={2023}
}
```
## Acknowledgement
We appreciate the valuable contributions of the following GitHub.
- LTSF-Linear (https://github.com/cure-lab/LTSF-Linear)
- TimesNet (https://github.com/thuml/TimesNet)
- Time-Series-Library (https://github.com/thuml/Time-Series-Library)
- MTGnn (https://github.com/nnzhan/MTGNN)
- Autoformer (https://github.com/thuml/Autoformer)
- Informer (https://github.com/zhouhaoyi/Informer2020)
================================================
FILE: data_provider/data_factory.py
================================================
from .data_loader import Dataset_ETT_hour, Dataset_ETT_minute, \
Dataset_Custom, Dataset_Pred,Dataset_Flight
from torch.utils.data import DataLoader
data_dict = {
'ETTh1': Dataset_ETT_hour,
'ETTh2': Dataset_ETT_hour,
'ETTm1': Dataset_ETT_minute,
'ETTm2': Dataset_ETT_minute,
'custom': Dataset_Custom,
'Flight':Dataset_Flight,
}
# flag = 'train' or 'val' or 'test'
def data_provider(args, flag):
Data = data_dict[args.data]
#time features encoding, options: [timeF, fixed, learned]
timeenc = 0 if args.embed != 'timeF' else 1
if flag == 'test':
shuffle_flag = False
drop_last = True
batch_size = args.batch_size
freq = args.freq
elif flag == 'pred':
shuffle_flag = False
drop_last = False
batch_size = 1
freq = args.freq
Data = Dataset_Pred
else:
shuffle_flag = True
drop_last = True
batch_size = args.batch_size
freq = args.freq
data_set = Data(
root_path=args.root_path,
data_path=args.data_path,
flag=flag,
size=[args.seq_len, args.label_len, args.pred_len],
features=args.features,
target=args.target,
timeenc=timeenc,
freq=freq,
seasonal_patterns = args.seasonal_patterns
)
print(flag, len(data_set))
data_loader = DataLoader(
data_set,
batch_size=batch_size,
shuffle=shuffle_flag,
num_workers=args.num_workers,
drop_last=drop_last)
return data_set, data_loader
================================================
FILE: data_provider/data_loader.py
================================================
import os
import numpy as np
import pandas as pd
import os
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from utils.timefeatures import time_features
import warnings
warnings.filterwarnings('ignore')
#for Flight 4:4:2 split
class Dataset_Flight(Dataset):
def __init__(self, root_path, flag='train', size=None,
features='S', data_path='Flight.csv',
target='OT', scale=True, timeenc=0, freq='h',seasonal_patterns=None):
# size [seq_len, label_len, pred_len]
# info
if size == None:
self.seq_len = 24 * 4 * 4
self.label_len = 24 * 4
self.pred_len = 24 * 4
else:
self.seq_len = size[0]
self.label_len = size[1]
self.pred_len = size[2]
# init
assert flag in ['train', 'test', 'val']
type_map = {'train': 0, 'val': 1, 'test': 2}
self.set_type = type_map[flag]
self.features = features
self.target = target
self.scale = scale
self.timeenc = timeenc
self.freq = freq
self.root_path = root_path
self.data_path = data_path
self.__read_data__()
def __read_data__(self):
self.scaler = StandardScaler()
df_raw = pd.read_csv(os.path.join(self.root_path,
self.data_path))
'''
df_raw.columns: ['date', ...(other features), target feature]
'''
cols = list(df_raw.columns)
cols.remove(self.target)
cols.remove('date')
df_raw = df_raw[['date'] + cols + [self.target]]
# print(cols)
num_train = int(len(df_raw) * 0.4)
num_test = int(len(df_raw) * 0.2)
num_vali = len(df_raw) - num_train - num_test
border1s = [0, num_train - self.seq_len, len(df_raw) - num_test - self.seq_len]
border2s = [num_train, num_train + num_vali, len(df_raw)]
border1 = border1s[self.set_type]
border2 = border2s[self.set_type]
if self.features == 'M' or self.features == 'MS':
cols_data = df_raw.columns[1:]
df_data = df_raw[cols_data]
elif self.features == 'S':
df_data = df_raw[[self.target]]
if self.scale:
train_data = df_data[border1s[0]:border2s[0]]
self.scaler.fit(train_data.values)
data = self.scaler.transform(df_data.values)
else:
data = df_data.values
df_stamp = df_raw[['date']][border1:border2]
df_stamp['date'] = pd.to_datetime(df_stamp.date)
if self.timeenc == 0:
df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)
df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)
df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)
df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)
data_stamp = df_stamp.drop(['date'], 1).values
elif self.timeenc == 1:
data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)
data_stamp = data_stamp.transpose(1, 0)
self.data_x = data[border1:border2]
self.data_y = data[border1:border2]
self.data_stamp = data_stamp
def __getitem__(self, index):
s_begin = index
s_end = s_begin + self.seq_len
r_begin = s_end - self.label_len
r_end = r_begin + self.label_len + self.pred_len
seq_x = self.data_x[s_begin:s_end]
seq_y = self.data_y[r_begin:r_end]
seq_x_mark = self.data_stamp[s_begin:s_end]
seq_y_mark = self.data_stamp[r_begin:r_end]
return seq_x, seq_y, seq_x_mark, seq_y_mark
def __len__(self):
return len(self.data_x) - self.seq_len - self.pred_len + 1
def inverse_transform(self, data):
return self.scaler.inverse_transform(data)
class Dataset_Custom(Dataset):
def __init__(self, root_path, flag='train', size=None,
features='S', data_path='ETTh1.csv',
target='OT', scale=True, timeenc=0, freq='h', seasonal_patterns=None):
# size [seq_len, label_len, pred_len]
# info
if size == None:
self.seq_len = 24 * 4 * 4
self.label_len = 24 * 4
self.pred_len = 24 * 4
else:
self.seq_len = size[0]
self.label_len = size[1]
self.pred_len = size[2]
# init
assert flag in ['train', 'test', 'val']
type_map = {'train': 0, 'val': 1, 'test': 2}
self.set_type = type_map[flag]
self.features = features
self.target = target
self.scale = scale
self.timeenc = timeenc
self.freq = freq
self.root_path = root_path
self.data_path = data_path
self.__read_data__()
def __read_data__(self):
self.scaler = StandardScaler()
df_raw = pd.read_csv(os.path.join(self.root_path,
self.data_path))
'''
df_raw.columns: ['date', ...(other features), target feature]
'''
cols = list(df_raw.columns)
cols.remove(self.target)
cols.remove('date')
df_raw = df_raw[['date'] + cols + [self.target]]
# print(cols)
num_train = int(len(df_raw) * 0.7)
num_test = int(len(df_raw) * 0.2)
num_vali = len(df_raw) - num_train - num_test
border1s = [0, num_train - self.seq_len, len(df_raw) - num_test - self.seq_len]
border2s = [num_train, num_train + num_vali, len(df_raw)]
border1 = border1s[self.set_type]
border2 = border2s[self.set_type]
if self.features == 'M' or self.features == 'MS':
cols_data = df_raw.columns[1:]
df_data = df_raw[cols_data]
elif self.features == 'S':
df_data = df_raw[[self.target]]
if self.scale:
train_data = df_data[border1s[0]:border2s[0]]
self.scaler.fit(train_data.values)
# print(self.scaler.mean_)
# exit()
data = self.scaler.transform(df_data.values)
else:
data = df_data.values
df_stamp = df_raw[['date']][border1:border2]
df_stamp['date'] = pd.to_datetime(df_stamp.date)
if self.timeenc == 0:
df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)
df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)
df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)
df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)
data_stamp = df_stamp.drop(['date'], 1).values
elif self.timeenc == 1:
data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)
data_stamp = data_stamp.transpose(1, 0)
self.data_x = data[border1:border2]
self.data_y = data[border1:border2]
self.data_stamp = data_stamp
def __getitem__(self, index):
s_begin = index
s_end = s_begin + self.seq_len
r_begin = s_end - self.label_len
r_end = r_begin + self.label_len + self.pred_len
seq_x = self.data_x[s_begin:s_end]
seq_y = self.data_y[r_begin:r_end]
seq_x_mark = self.data_stamp[s_begin:s_end]
seq_y_mark = self.data_stamp[r_begin:r_end]
return seq_x, seq_y, seq_x_mark, seq_y_mark
def __len__(self):
return len(self.data_x) - self.seq_len - self.pred_len + 1
def inverse_transform(self, data):
return self.scaler.inverse_transform(data)
class Dataset_Pred(Dataset):
def __init__(self, root_path, flag='pred', size=None,
features='S', data_path='ETTh1.csv',
target='OT', scale=True, inverse=False, timeenc=0, freq='15min', seasonal_patterns=None,cols=None):
# size [seq_len, label_len, pred_len]
# info
if size == None:
self.seq_len = 24 * 4 * 4
self.label_len = 24 * 4
self.pred_len = 24 * 4
else:
self.seq_len = size[0]
self.label_len = size[1]
self.pred_len = size[2]
# init
assert flag in ['pred']
self.features = features
self.target = target
self.scale = scale
self.inverse = inverse
self.timeenc = timeenc
self.freq = freq
self.cols = cols
self.root_path = root_path
self.data_path = data_path
self.__read_data__()
def __read_data__(self):
self.scaler = StandardScaler()
df_raw = pd.read_csv(os.path.join(self.root_path,
self.data_path))
'''
df_raw.columns: ['date', ...(other features), target feature]
'''
if self.cols:
cols = self.cols.copy()
cols.remove(self.target)
else:
cols = list(df_raw.columns)
cols.remove(self.target)
cols.remove('date')
df_raw = df_raw[['date'] + cols + [self.target]]
border1 = len(df_raw) - self.seq_len
border2 = len(df_raw)
if self.features == 'M' or self.features == 'MS':
cols_data = df_raw.columns[1:]
df_data = df_raw[cols_data]
elif self.features == 'S':
df_data = df_raw[[self.target]]
if self.scale:
self.scaler.fit(df_data.values)
data = self.scaler.transform(df_data.values)
else:
data = df_data.values
tmp_stamp = df_raw[['date']][border1:border2]
tmp_stamp['date'] = pd.to_datetime(tmp_stamp.date)
pred_dates = pd.date_range(tmp_stamp.date.values[-1], periods=self.pred_len + 1, freq=self.freq)
df_stamp = pd.DataFrame(columns=['date'])
df_stamp.date = list(tmp_stamp.date.values) + list(pred_dates[1:])
if self.timeenc == 0:
df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)
df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)
df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)
df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)
df_stamp['minute'] = df_stamp.date.apply(lambda row: row.minute, 1)
df_stamp['minute'] = df_stamp.minute.map(lambda x: x // 15)
data_stamp = df_stamp.drop(['date'], 1).values
elif self.timeenc == 1:
data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)
data_stamp = data_stamp.transpose(1, 0)
self.data_x = data[border1:border2]
if self.inverse:
self.data_y = df_data.values[border1:border2]
else:
self.data_y = data[border1:border2]
self.data_stamp = data_stamp
def __getitem__(self, index):
s_begin = index
s_end = s_begin + self.seq_len
r_begin = s_end - self.label_len
r_end = r_begin + self.label_len + self.pred_len
seq_x = self.data_x[s_begin:s_end]
if self.inverse:
seq_y = self.data_x[r_begin:r_begin + self.label_len]
else:
seq_y = self.data_y[r_begin:r_begin + self.label_len]
seq_x_mark = self.data_stamp[s_begin:s_end]
seq_y_mark = self.data_stamp[r_begin:r_end]
return seq_x, seq_y, seq_x_mark, seq_y_mark
def __len__(self):
return len(self.data_x) - self.seq_len + 1
def inverse_transform(self, data):
return self.scaler.inverse_transform(data)
class Dataset_ETT_hour(Dataset):
def __init__(self, root_path, flag='train', size=None,
features='S', data_path='ETTh1.csv',
target='OT', scale=True, timeenc=0, freq='h', seasonal_patterns=None):
# size [seq_len, label_len, pred_len]
# info
if size == None:
self.seq_len = 24 * 4 * 4
self.label_len = 24 * 4
self.pred_len = 24 * 4
else:
self.seq_len = size[0]
self.label_len = size[1]
self.pred_len = size[2]
# init
assert flag in ['train', 'test', 'val']
type_map = {'train': 0, 'val': 1, 'test': 2}
self.set_type = type_map[flag]
# M:multivariate predict multivariate, S:univariate predict univariate, MS:multivariate predict univariate'
self.features = features
self.target = target
self.scale = scale
self.timeenc = timeenc
self.freq = freq
self.root_path = root_path
self.data_path = data_path
self.__read_data__()
def __read_data__(self):
self.scaler = StandardScaler()
df_raw = pd.read_csv(os.path.join(self.root_path,
self.data_path))
border1s = [0, 12 * 30 * 24 - self.seq_len, 12 * 30 * 24 + 4 * 30 * 24 - self.seq_len]
border2s = [12 * 30 * 24, 12 * 30 * 24 + 4 * 30 * 24, 12 * 30 * 24 + 8 * 30 * 24]
border1 = border1s[self.set_type]
border2 = border2s[self.set_type]
if self.features == 'M' or self.features == 'MS':
cols_data = df_raw.columns[1:]
df_data = df_raw[cols_data]
elif self.features == 'S':
df_data = df_raw[[self.target]]
if self.scale:
train_data = df_data[border1s[0]:border2s[0]]
self.scaler.fit(train_data.values)
data = self.scaler.transform(df_data.values)
else:
data = df_data.values
df_stamp = df_raw[['date']][border1:border2]
df_stamp['date'] = pd.to_datetime(df_stamp.date)
if self.timeenc == 0:
df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)
df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)
df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)
df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)
data_stamp = df_stamp.drop(['date'], 1).values
elif self.timeenc == 1:
data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)
data_stamp = data_stamp.transpose(1, 0)
self.data_x = data[border1:border2]
self.data_y = data[border1:border2]
self.data_stamp = data_stamp
def __getitem__(self, index):
s_begin = index
s_end = s_begin + self.seq_len
r_begin = s_end - self.label_len
r_end = r_begin + self.label_len + self.pred_len
seq_x = self.data_x[s_begin:s_end]
seq_y = self.data_y[r_begin:r_end]
seq_x_mark = self.data_stamp[s_begin:s_end]
seq_y_mark = self.data_stamp[r_begin:r_end]
return seq_x, seq_y, seq_x_mark, seq_y_mark
def __len__(self):
return len(self.data_x) - self.seq_len - self.pred_len + 1
def inverse_transform(self, data):
return self.scaler.inverse_transform(data)
class Dataset_ETT_minute(Dataset):
def __init__(self, root_path, flag='train', size=None,
features='S', data_path='ETTm1.csv',
target='OT', scale=True, timeenc=0, freq='t', seasonal_patterns=None):
# size [seq_len, label_len, pred_len]
# info
if size == None:
self.seq_len = 24 * 4 * 4
self.label_len = 24 * 4
self.pred_len = 24 * 4
else:
self.seq_len = size[0]
self.label_len = size[1]
self.pred_len = size[2]
# init
assert flag in ['train', 'test', 'val']
type_map = {'train': 0, 'val': 1, 'test': 2}
self.set_type = type_map[flag]
self.features = features
self.target = target
self.scale = scale
self.timeenc = timeenc
self.freq = freq
self.root_path = root_path
self.data_path = data_path
self.__read_data__()
def __read_data__(self):
self.scaler = StandardScaler()
df_raw = pd.read_csv(os.path.join(self.root_path,
self.data_path))
border1s = [0,
12 * 30 * 24 * 4 - self.seq_len,
12 * 30 * 24 * 4 + 4 * 30 * 24 * 4 - self.seq_len]
border2s = [12 * 30 * 24 * 4,
12 * 30 * 24 * 4 + 4 * 30 * 24 * 4,
12 * 30 * 24 * 4 + 8 * 30 * 24 * 4]
border1 = border1s[self.set_type]
border2 = border2s[self.set_type]
if self.features == 'M' or self.features == 'MS':
cols_data = df_raw.columns[1:]
df_data = df_raw[cols_data]
elif self.features == 'S':
df_data = df_raw[[self.target]]
if self.scale:
train_data = df_data[border1s[0]:border2s[0]]
self.scaler.fit(train_data.values)
data = self.scaler.transform(df_data.values)
else:
data = df_data.values
df_stamp = df_raw[['date']][border1:border2]
df_stamp['date'] = pd.to_datetime(df_stamp.date)
if self.timeenc == 0:
df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)
df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)
df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)
df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)
df_stamp['minute'] = df_stamp.date.apply(lambda row: row.minute, 1)
df_stamp['minute'] = df_stamp.minute.map(lambda x: x // 15)
data_stamp = df_stamp.drop(['date'], 1).values
elif self.timeenc == 1:
data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)
data_stamp = data_stamp.transpose(1, 0)
self.data_x = data[border1:border2]
self.data_y = data[border1:border2]
self.data_stamp = data_stamp
def __getitem__(self, index):
s_begin = index
s_end = s_begin + self.seq_len
r_begin = s_end - self.label_len
r_end = r_begin + self.label_len + self.pred_len
seq_x = self.data_x[s_begin:s_end]
seq_y = self.data_y[r_begin:r_end]
seq_x_mark = self.data_stamp[s_begin:s_end]
seq_y_mark = self.data_stamp[r_begin:r_end]
return seq_x, seq_y, seq_x_mark, seq_y_mark
def __len__(self):
return len(self.data_x) - self.seq_len - self.pred_len + 1
def inverse_transform(self, data):
return self.scaler.inverse_transform(data)
================================================
FILE: exp/exp_basic.py
================================================
import os
import torch
import numpy as np
class Exp_Basic(object):
def __init__(self, args):
self.args = args
self.device = self._acquire_device()
self.model = self._build_model().to(self.device)
def _build_model(self):
raise NotImplementedError
return None
def _acquire_device(self):
if self.args.use_gpu:
os.environ["CUDA_VISIBLE_DEVICES"] = str(
self.args.gpu) if not self.args.use_multi_gpu else self.args.devices
device = torch.device('cuda:{}'.format(self.args.gpu))
print('Use GPU: cuda:{}'.format(self.args.gpu))
else:
device = torch.device('cpu')
print('Use CPU')
return device
def _get_data(self):
pass
def vali(self):
pass
def train(self):
pass
def test(self):
pass
================================================
FILE: exp/exp_main.py
================================================
from data_provider.data_factory import data_provider
from .exp_basic import Exp_Basic
from models import Informer, Autoformer, DLinear, MSGNet
from utils.tools import EarlyStopping, adjust_learning_rate, visual, test_params_flop
from utils.metrics import metric
import torch
import torch.nn as nn
from torch import optim, autograd
import os
import time
import warnings
import matplotlib.pyplot as plt
import numpy as np
warnings.filterwarnings('ignore')
class Exp_Main(Exp_Basic):
def __init__(self, args):
super(Exp_Main, self).__init__(args)
def _build_model(self):
model_dict = {
'Informer': Informer,
'Autoformer': Autoformer,
'DLinear': DLinear,
'MSGNet': MSGNet
}
model = model_dict[self.args.model].Model(self.args).float()
if self.args.use_multi_gpu and self.args.use_gpu:
model = nn.DataParallel(model, device_ids=self.args.device_ids)
return model
#flag = 'train' or 'val' or 'test'
def _get_data(self, flag):
data_set, data_loader = data_provider(self.args, flag)
return data_set, data_loader
def _select_optimizer(self):
model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate)
return model_optim
def _select_criterion(self):
criterion = nn.MSELoss()
return criterion
def vali(self, vali_data, vali_loader, criterion):
total_loss = []
self.model.eval()
with torch.no_grad():
for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(vali_loader):
batch_x = batch_x.float().to(self.device)
batch_y = batch_y.float()
batch_x_mark = batch_x_mark.float().to(self.device)
batch_y_mark = batch_y_mark.float().to(self.device)
# decoder input
dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()
dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
# encoder - decoder
if self.args.use_amp:
with torch.cuda.amp.autocast():
if 'Linear' in self.args.model:
outputs = self.model(batch_x)
else:
if self.args.output_attention:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
else:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
else:
if 'Linear' in self.args.model:
outputs = self.model(batch_x)
else:
if self.args.output_attention:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
else:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
f_dim = -1 if self.args.features == 'MS' else 0
outputs = outputs[:, -self.args.pred_len:, f_dim:]
batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)
pred = outputs.detach().cpu()
true = batch_y.detach().cpu()
loss = criterion(pred, true)
total_loss.append(loss)
total_loss = np.average(total_loss)
self.model.train()
return total_loss
def train(self, setting):
train_data, train_loader = self._get_data(flag='train')
vali_data, vali_loader = self._get_data(flag='val')
test_data, test_loader = self._get_data(flag='test')
path = os.path.join(self.args.checkpoints, setting)
if not os.path.exists(path):
os.makedirs(path)
time_now = time.time()
train_steps = len(train_loader)
early_stopping = EarlyStopping(patience=self.args.patience, verbose=True)
model_optim = self._select_optimizer()
criterion = self._select_criterion()
#use automatic mixed precision training
if self.args.use_amp:
scaler = torch.cuda.amp.GradScaler()
for epoch in range(self.args.train_epochs):
iter_count = 0
train_loss = []
self.model.train()
epoch_time = time.time()
for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader):
iter_count += 1
model_optim.zero_grad()
batch_x = batch_x.float().to(self.device)
batch_y = batch_y.float().to(self.device)
batch_x_mark = batch_x_mark.float().to(self.device)
batch_y_mark = batch_y_mark.float().to(self.device)
dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()
dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
# encoder - decoder
if self.args.use_amp:
with torch.cuda.amp.autocast():
if 'Linear' in self.args.model:
outputs = self.model(batch_x)
else:
if self.args.output_attention:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
else:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
f_dim = -1 if self.args.features == 'MS' else 0
outputs = outputs[:, -self.args.pred_len:, f_dim:]
batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)
loss = criterion(outputs, batch_y)
train_loss.append(loss.item())
else:
if 'Linear' in self.args.model:
# print("Linear")
outputs = self.model(batch_x)
else:
if self.args.output_attention: #whether to output attention in ecoder
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
else:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
# print(outputs.shape,batch_y.shape)
f_dim = -1 if self.args.features == 'MS' else 0
outputs = outputs[:, -self.args.pred_len:, f_dim:]
batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)
loss = criterion(outputs, batch_y)
train_loss.append(loss.item())
if (i + 1) % 100 == 0:
print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
speed = (time.time() - time_now) / iter_count
left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i)
print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
iter_count = 0
time_now = time.time()
if self.args.use_amp:
scaler.scale(loss).backward()
scaler.step(model_optim)
scaler.update()
else:
with autograd.detect_anomaly():
loss.backward()
model_optim.step()
print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time))
train_loss = np.average(train_loss)
vali_loss = self.vali(vali_data, vali_loader, criterion)
test_loss = self.vali(test_data, test_loader, criterion)
print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format(
epoch + 1, train_steps, train_loss, vali_loss, test_loss))
early_stopping(vali_loss, self.model, path)
if early_stopping.early_stop:
print("Early stopping")
break
adjust_learning_rate(model_optim, epoch + 1, self.args)
best_model_path = path + '/' + 'checkpoint.pth'
self.model.load_state_dict(torch.load(best_model_path))
return self.model
def test(self, setting, test=0):
test_data, test_loader = self._get_data(flag='test')
if test:
print('loading model')
self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth')))
preds = []
trues = []
inputx = []
folder_path = './test_results/' + setting + '/'
if not os.path.exists(folder_path):
os.makedirs(folder_path)
self.model.eval()
with torch.no_grad():
for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader):
batch_x = batch_x.float().to(self.device)
batch_y = batch_y.float().to(self.device)
batch_x_mark = batch_x_mark.float().to(self.device)
batch_y_mark = batch_y_mark.float().to(self.device)
# decoder input
dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()
dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
# encoder - decoder
if self.args.use_amp:
with torch.cuda.amp.autocast():
if 'Linear' in self.args.model:
outputs = self.model(batch_x)
else:
if self.args.output_attention:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
else:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
else:
if 'Linear' in self.args.model:
outputs = self.model(batch_x)
else:
if self.args.output_attention:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
else:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
f_dim = -1 if self.args.features == 'MS' else 0
# print(outputs.shape,batch_y.shape)
outputs = outputs[:, -self.args.pred_len:, f_dim:]
batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)
outputs = outputs.detach().cpu().numpy()
batch_y = batch_y.detach().cpu().numpy()
pred = outputs # outputs.detach().cpu().numpy() # .squeeze()
true = batch_y # batch_y.detach().cpu().numpy() # .squeeze()
preds.append(pred)
trues.append(true)
inputx.append(batch_x.detach().cpu().numpy())
if i % 10 == 0:
input = batch_x.detach().cpu().numpy()
gt = np.concatenate((input[0, :, -1], true[0, :, -1]), axis=0)
pd = np.concatenate((input[0, :, -1], pred[0, :, -1]), axis=0)
visual(gt, pd, os.path.join(folder_path, str(i) + '.pdf'))
#See utils / tools for usage
if self.args.test_flop:
test_params_flop((batch_x.shape[1],batch_x.shape[2]))
exit()
# print('preds_shape:', len(preds),len(preds[0]),len(preds[1]))
preds = np.array(preds)
trues = np.array(trues)
inputx = np.array(inputx)
print('preds_shape:', preds.shape)
print('trues_shape:', trues.shape)
preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1])
trues = trues.reshape(-1, trues.shape[-2], trues.shape[-1])
inputx = inputx.reshape(-1, inputx.shape[-2], inputx.shape[-1])
# result save
folder_path = './results/' + setting + '/'
if not os.path.exists(folder_path):
os.makedirs(folder_path)
mae, mse, rmse, mape, mspe, rse, corr, nd, nrmse = metric(preds, trues)
print('nd:{}, nrmse:{}, mse:{}, mae:{}, rse:{}, mape:{}'.format(nd, nrmse,mse, mae, rse, mape))
f = open("result.txt", 'a')
f.write(setting + " \n")
f.write('nd:{}, nrmse:{}, mse:{}, mae:{}, rse:{}, mape:{}'.format(nd, nrmse,mse, mae, rse, mape))
f.write('\n')
f.write('\n')
f.close()
# np.save(folder_path + 'metrics.npy', np.array([mae, mse, rmse, mape, mspe,rse, corr]))
# np.save(folder_path + 'pred.npy', preds)
# np.save(folder_path + 'true.npy', trues)
# np.save(folder_path + 'x.npy', inputx)
return
def predict(self, setting, load=False):
pred_data, pred_loader = self._get_data(flag='pred')
if load:
path = os.path.join(self.args.checkpoints, setting)
best_model_path = path + '/' + 'checkpoint.pth'
self.model.load_state_dict(torch.load(best_model_path))
preds = []
self.model.eval()
with torch.no_grad():
for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(pred_loader):
batch_x = batch_x.float().to(self.device)
batch_y = batch_y.float()
batch_x_mark = batch_x_mark.float().to(self.device)
batch_y_mark = batch_y_mark.float().to(self.device)
# decoder input
dec_inp = torch.zeros([batch_y.shape[0], self.args.pred_len, batch_y.shape[2]]).float().to(batch_y.device)
dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
# encoder - decoder
if self.args.use_amp:
with torch.cuda.amp.autocast():
if 'Linear' in self.args.model:
outputs = self.model(batch_x)
else:
if self.args.output_attention:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
else:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
else:
if 'Linear' in self.args.model:
outputs = self.model(batch_x)
else:
if self.args.output_attention:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
else:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
pred = outputs.detach().cpu().numpy() # .squeeze()
preds.append(pred)
preds = np.array(preds)
preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1])
# result save
folder_path = './results/' + setting + '/'
if not os.path.exists(folder_path):
os.makedirs(folder_path)
np.save(folder_path + 'real_prediction.npy', preds)
return
================================================
FILE: exp/exp_stat.py
================================================
from data_provider.data_factory import data_provider
from exp.exp_basic import Exp_Basic
from utils.tools import EarlyStopping, adjust_learning_rate, visual
from utils.metrics import metric
import numpy as np
import torch
import torch.nn as nn
from torch import optim
import os
import time
import warnings
import matplotlib.pyplot as plt
from models.Stat_models import *
warnings.filterwarnings('ignore')
class Exp_Main(Exp_Basic):
def __init__(self, args):
super(Exp_Main, self).__init__(args)
def _build_model(self):
model_dict = {
'Naive': Naive_repeat,
'ARIMA': Arima,
'SARIMA': SArima,
'GBRT': GBRT,
}
model = model_dict[self.args.model](self.args).float()
return model
def _get_data(self, flag):
data_set, data_loader = data_provider(self.args, flag)
return data_set, data_loader
def test(self, setting, test=0):
test_data, test_loader = self._get_data(flag='test')
# Sample 10%
samples = max(int(self.args.sample * self.args.batch_size),1)
preds = []
trues = []
inputx = []
folder_path = './test_results/' + setting + '/'
if not os.path.exists(folder_path):
os.makedirs(folder_path)
with torch.no_grad():
for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader):
batch_x = batch_x.float().to(self.device).cpu().numpy()
batch_y = batch_y.float().to(self.device).cpu().numpy()
batch_x = batch_x[:samples]
outputs = self.model(batch_x)
f_dim = -1 if self.args.features == 'MS' else 0
# print(outputs.shape,batch_y.shape)
outputs = outputs[:, -self.args.pred_len:, f_dim:]
batch_y = batch_y[:samples, -self.args.pred_len:, f_dim:]
pred = outputs # outputs.detach().cpu().numpy() # .squeeze()
true = batch_y # batch_y.detach().cpu().numpy() # .squeeze()
preds.append(pred)
trues.append(true)
inputx.append(batch_x)
if i % 20 == 0:
input = batch_x
gt = np.concatenate((input[0, :, -1], true[0, :, -1]), axis=0)
pd = np.concatenate((input[0, :, -1], pred[0, :, -1]), axis=0)
visual(gt, pd, os.path.join(folder_path, str(i) + '.pdf'))
preds = np.array(preds)
trues = np.array(trues)
inputx = np.array(inputx)
preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1])
trues = trues.reshape(-1, trues.shape[-2], trues.shape[-1])
inputx = inputx.reshape(-1, inputx.shape[-2], inputx.shape[-1])
folder_path = './results/' + setting + '/'
if not os.path.exists(folder_path):
os.makedirs(folder_path)
mae, mse, rmse, mape, mspe, rse, corr = metric(preds, trues)
corr = []
print('mse:{}, mae:{}, rse:{}, corr:{}'.format(mse, mae, rse, corr))
f = open("result.txt", 'a')
f.write(setting + " \n")
f.write('mse:{}, mae:{}, rse:{}, corr:{}'.format(mse, mae, rse, corr))
f.write('\n')
f.write('\n')
f.close()
np.save(folder_path + 'metrics.npy', np.array([mae, mse, rmse, mape, mspe,rse, corr]))
np.save(folder_path + 'pred.npy', preds)
np.save(folder_path + 'true.npy', trues)
# np.save(folder_path + 'x.npy', inputx)
return
================================================
FILE: layers/AutoCorrelation.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
import math
from math import sqrt
import os
class AutoCorrelation(nn.Module):
"""
AutoCorrelation Mechanism with the following two phases:
(1) period-based dependencies discovery
(2) time delay aggregation
This block can replace the self-attention family mechanism seamlessly.
"""
def __init__(self, mask_flag=True, factor=1, scale=None, attention_dropout=0.1, output_attention=False):
super(AutoCorrelation, self).__init__()
self.factor = factor
self.scale = scale
self.mask_flag = mask_flag
self.output_attention = output_attention
self.dropout = nn.Dropout(attention_dropout)
def time_delay_agg_training(self, values, corr):
"""
SpeedUp version of Autocorrelation (a batch-normalization style design)
This is for the training phase.
"""
head = values.shape[1]
channel = values.shape[2]
length = values.shape[3]
# find top k
top_k = int(self.factor * math.log(length))
mean_value = torch.mean(torch.mean(corr, dim=1), dim=1)
index = torch.topk(torch.mean(mean_value, dim=0), top_k, dim=-1)[1]
weights = torch.stack([mean_value[:, index[i]] for i in range(top_k)], dim=-1)
# update corr
tmp_corr = torch.softmax(weights, dim=-1)
# aggregation
tmp_values = values
delays_agg = torch.zeros_like(values).float()
for i in range(top_k):
pattern = torch.roll(tmp_values, -int(index[i]), -1)
delays_agg = delays_agg + pattern * \
(tmp_corr[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length))
return delays_agg
def time_delay_agg_inference(self, values, corr):
"""
SpeedUp version of Autocorrelation (a batch-normalization style design)
This is for the inference phase.
"""
batch = values.shape[0]
head = values.shape[1]
channel = values.shape[2]
length = values.shape[3]
# index init
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# init_index = torch.arange(length).unsqueeze(0).unsqueeze(0).unsqueeze(0).repeat(batch, head, channel, 1).cuda()
init_index = torch.arange(length).unsqueeze(0).unsqueeze(0).unsqueeze(0).repeat(batch, head, channel, 1).to(device)
# find top k
top_k = int(self.factor * math.log(length))
mean_value = torch.mean(torch.mean(corr, dim=1), dim=1)
weights = torch.topk(mean_value, top_k, dim=-1)[0]
delay = torch.topk(mean_value, top_k, dim=-1)[1]
# update corr
tmp_corr = torch.softmax(weights, dim=-1)
# aggregation
tmp_values = values.repeat(1, 1, 1, 2)
delays_agg = torch.zeros_like(values).float()
for i in range(top_k):
tmp_delay = init_index + delay[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length)
pattern = torch.gather(tmp_values, dim=-1, index=tmp_delay)
delays_agg = delays_agg + pattern * \
(tmp_corr[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length))
return delays_agg
def time_delay_agg_full(self, values, corr):
"""
Standard version of Autocorrelation
"""
batch = values.shape[0]
head = values.shape[1]
channel = values.shape[2]
length = values.shape[3]
# index init
init_index = torch.arange(length).unsqueeze(0).unsqueeze(0).unsqueeze(0).repeat(batch, head, channel, 1).cuda()
# find top k
top_k = int(self.factor * math.log(length))
weights = torch.topk(corr, top_k, dim=-1)[0]
delay = torch.topk(corr, top_k, dim=-1)[1]
# update corr
tmp_corr = torch.softmax(weights, dim=-1)
# aggregation
tmp_values = values.repeat(1, 1, 1, 2)
delays_agg = torch.zeros_like(values).float()
for i in range(top_k):
tmp_delay = init_index + delay[..., i].unsqueeze(-1)
pattern = torch.gather(tmp_values, dim=-1, index=tmp_delay)
delays_agg = delays_agg + pattern * (tmp_corr[..., i].unsqueeze(-1))
return delays_agg
def forward(self, queries, keys, values, attn_mask):
B, L, H, E = queries.shape
_, S, _, D = values.shape
if L > S:
zeros = torch.zeros_like(queries[:, :(L - S), :]).float()
values = torch.cat([values, zeros], dim=1)
keys = torch.cat([keys, zeros], dim=1)
else:
values = values[:, :L, :, :]
keys = keys[:, :L, :, :]
# period-based dependencies (b, len//period , period , d_model) ->(b, period ,d_model, len//period)
#(b , T, h , n) ->(b, h, n, T)
q_fft = torch.fft.rfft(queries.permute(0, 2, 3, 1).contiguous(), dim=-1)
k_fft = torch.fft.rfft(keys.permute(0, 2, 3, 1).contiguous(), dim=-1)
#(b, period ,d_model, period),
res = q_fft * torch.conj(k_fft)
corr = torch.fft.irfft(res, dim=-1)
# time delay agg
if self.training:
V = self.time_delay_agg_training(values.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2)
else:
V = self.time_delay_agg_inference(values.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2)
if self.output_attention:
return (V.contiguous(), corr.permute(0, 3, 1, 2))
else:
return (V.contiguous(), None)
class AutoCorrelationLayer(nn.Module):
def __init__(self, correlation, d_model, n_heads, d_keys=None,
d_values=None):
super(AutoCorrelationLayer, self).__init__()
d_keys = d_keys or (d_model // n_heads)
d_values = d_values or (d_model // n_heads)
self.inner_correlation = correlation
self.query_projection = nn.Linear(d_model, d_keys * n_heads)
self.key_projection = nn.Linear(d_model, d_keys * n_heads)
self.value_projection = nn.Linear(d_model, d_values * n_heads)
self.out_projection = nn.Linear(d_values * n_heads, d_model)
self.n_heads = n_heads
def forward(self, queries, keys, values, attn_mask):
B, L, _ = queries.shape
_, S, _ = keys.shape
H = self.n_heads
queries = self.query_projection(queries).view(B, L, H, -1)
keys = self.key_projection(keys).view(B, S, H, -1)
values = self.value_projection(values).view(B, S, H, -1)
out, attn = self.inner_correlation(
queries,
keys,
values,
attn_mask
)
out = out.view(B, L, -1)
return self.out_projection(out), attn
================================================
FILE: layers/Autoformer_EncDec.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
class my_Layernorm(nn.Module):
"""
Special designed layernorm for the seasonal part
"""
def __init__(self, channels):
super(my_Layernorm, self).__init__()
self.layernorm = nn.LayerNorm(channels)
def forward(self, x):
x_hat = self.layernorm(x)
bias = torch.mean(x_hat, dim=1).unsqueeze(1).repeat(1, x.shape[1], 1)
return x_hat - bias
class moving_avg(nn.Module):
"""
Moving average block to highlight the trend of time series
"""
def __init__(self, kernel_size, stride):
super(moving_avg, self).__init__()
self.kernel_size = kernel_size
self.avg = nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=0)
def forward(self, x):
# padding on the both ends of time series
front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1)
end = x[:, -1:, :].repeat(1, (self.kernel_size - 1) // 2, 1)
x = torch.cat([front, x, end], dim=1)
x = self.avg(x.permute(0, 2, 1))
x = x.permute(0, 2, 1)
return x
class series_decomp(nn.Module):
"""
Series decomposition block
"""
def __init__(self, kernel_size):
super(series_decomp, self).__init__()
self.moving_avg = moving_avg(kernel_size, stride=1)
def forward(self, x):
moving_mean = self.moving_avg(x)
res = x - moving_mean
return res, moving_mean
class EncoderLayer(nn.Module):
"""
Autoformer encoder layer with the progressive decomposition architecture
"""
def __init__(self, attention, d_model, d_ff=None, moving_avg=25, dropout=0.1, activation="relu"):
super(EncoderLayer, self).__init__()
d_ff = d_ff or 4 * d_model
self.attention = attention
self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1, bias=False)
self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1, bias=False)
self.decomp1 = series_decomp(moving_avg)
self.decomp2 = series_decomp(moving_avg)
self.dropout = nn.Dropout(dropout)
self.activation = F.relu if activation == "relu" else F.gelu
def forward(self, x, attn_mask=None):
new_x, attn = self.attention(
x, x, x,
attn_mask=attn_mask
)
x = x + self.dropout(new_x)
x, _ = self.decomp1(x)
y = x
y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
y = self.dropout(self.conv2(y).transpose(-1, 1))
res, _ = self.decomp2(x + y)
return res, attn
class Encoder(nn.Module):
"""
Autoformer encoder
"""
def __init__(self, attn_layers, conv_layers=None, norm_layer=None):
super(Encoder, self).__init__()
self.attn_layers = nn.ModuleList(attn_layers)
self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None
self.norm = norm_layer
def forward(self, x, attn_mask=None):
attns = []
if self.conv_layers is not None:
for attn_layer, conv_layer in zip(self.attn_layers, self.conv_layers):
x, attn = attn_layer(x, attn_mask=attn_mask)
x = conv_layer(x)
attns.append(attn)
x, attn = self.attn_layers[-1](x)
attns.append(attn)
else:
for attn_layer in self.attn_layers:
x, attn = attn_layer(x, attn_mask=attn_mask)
attns.append(attn)
if self.norm is not None:
x = self.norm(x)
return x, attns
class DecoderLayer(nn.Module):
"""
Autoformer decoder layer with the progressive decomposition architecture
"""
def __init__(self, self_attention, cross_attention, d_model, c_out, d_ff=None,
moving_avg=25, dropout=0.1, activation="relu"):
super(DecoderLayer, self).__init__()
d_ff = d_ff or 4 * d_model
self.self_attention = self_attention
self.cross_attention = cross_attention
self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1, bias=False)
self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1, bias=False)
self.decomp1 = series_decomp(moving_avg)
self.decomp2 = series_decomp(moving_avg)
self.decomp3 = series_decomp(moving_avg)
self.dropout = nn.Dropout(dropout)
self.projection = nn.Conv1d(in_channels=d_model, out_channels=c_out, kernel_size=3, stride=1, padding=1,
padding_mode='circular', bias=False)
self.activation = F.relu if activation == "relu" else F.gelu
def forward(self, x, cross, x_mask=None, cross_mask=None):
x = x + self.dropout(self.self_attention(
x, x, x,
attn_mask=x_mask
)[0])
x, trend1 = self.decomp1(x)
x = x + self.dropout(self.cross_attention(
x, cross, cross,
attn_mask=cross_mask
)[0])
x, trend2 = self.decomp2(x)
y = x
y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
y = self.dropout(self.conv2(y).transpose(-1, 1))
x, trend3 = self.decomp3(x + y)
residual_trend = trend1 + trend2 + trend3
residual_trend = self.projection(residual_trend.permute(0, 2, 1)).transpose(1, 2)
return x, residual_trend
class Decoder(nn.Module):
"""
Autoformer encoder
"""
def __init__(self, layers, norm_layer=None, projection=None):
super(Decoder, self).__init__()
self.layers = nn.ModuleList(layers)
self.norm = norm_layer
self.projection = projection
def forward(self, x, cross, x_mask=None, cross_mask=None, trend=None):
for layer in self.layers:
x, residual_trend = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask)
trend = trend + residual_trend
if self.norm is not None:
x = self.norm(x)
if self.projection is not None:
x = self.projection(x)
return x, trend
================================================
FILE: layers/Embed.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils import weight_norm
import math
class PositionalEmbedding(nn.Module):
def __init__(self, d_model, max_len=5000):
super(PositionalEmbedding, self).__init__()
# Compute the positional encodings once in log space.
pe = torch.zeros(max_len, d_model).float()
pe.require_grad = False
position = torch.arange(0, max_len).float().unsqueeze(1)
div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp()
pe[:, 0::2] = torch.sin(position * div_term)
pe[:, 1::2] = torch.cos(position * div_term)
pe = pe.unsqueeze(0)
self.register_buffer('pe', pe)
def forward(self, x):
return self.pe[:, :x.size(1)]
class TokenEmbedding(nn.Module):
def __init__(self, c_in, d_model):
super(TokenEmbedding, self).__init__()
padding = 1 if torch.__version__ >= '1.5.0' else 2
self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model,
kernel_size=3, padding=padding, padding_mode='circular', bias=False)
for m in self.modules():
if isinstance(m, nn.Conv1d):
nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='leaky_relu')
def forward(self, x):
x = self.tokenConv(x.permute(0, 2, 1)).transpose(1, 2)
return x
class FixedEmbedding(nn.Module):
def __init__(self, c_in, d_model):
super(FixedEmbedding, self).__init__()
w = torch.zeros(c_in, d_model).float()
w.require_grad = False
position = torch.arange(0, c_in).float().unsqueeze(1)
div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp()
w[:, 0::2] = torch.sin(position * div_term)
w[:, 1::2] = torch.cos(position * div_term)
self.emb = nn.Embedding(c_in, d_model)
self.emb.weight = nn.Parameter(w, requires_grad=False)
def forward(self, x):
return self.emb(x).detach()
class TemporalEmbedding(nn.Module):
def __init__(self, d_model, embed_type='fixed', freq='h'):
super(TemporalEmbedding, self).__init__()
minute_size = 4
hour_size = 24
weekday_size = 7
day_size = 32
month_size = 13
Embed = FixedEmbedding if embed_type == 'fixed' else nn.Embedding
if freq == 't':
self.minute_embed = Embed(minute_size, d_model)
self.hour_embed = Embed(hour_size, d_model)
self.weekday_embed = Embed(weekday_size, d_model)
self.day_embed = Embed(day_size, d_model)
self.month_embed = Embed(month_size, d_model)
def forward(self, x):
x = x.long()
minute_x = self.minute_embed(x[:, :, 4]) if hasattr(self, 'minute_embed') else 0.
hour_x = self.hour_embed(x[:, :, 3])
weekday_x = self.weekday_embed(x[:, :, 2])
day_x = self.day_embed(x[:, :, 1])
month_x = self.month_embed(x[:, :, 0])
return hour_x + weekday_x + day_x + month_x + minute_x
class TimeFeatureEmbedding(nn.Module):
def __init__(self, d_model, embed_type='timeF', freq='h'):
super(TimeFeatureEmbedding, self).__init__()
freq_map = {'h': 4, 't': 5, 's': 6, 'm': 1, 'a': 1, 'w': 2, 'd': 3, 'b': 3}
d_inp = freq_map[freq]
self.embed = nn.Linear(d_inp, d_model, bias=False)
def forward(self, x):
return self.embed(x)
class DataEmbedding(nn.Module):
def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
super(DataEmbedding, self).__init__()
self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
#(batch_size, len batch_x[1], d_model )
self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type,
freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding(
d_model=d_model, embed_type=embed_type, freq=freq)
#(1, len batch_x[1], d_model)
self.position_embedding = PositionalEmbedding(d_model=d_model)
self.dropout = nn.Dropout(p=dropout)
def forward(self, x, x_mark):
if x_mark is None:
x = self.value_embedding(x) + self.position_embedding(x)
else:
x = self.value_embedding(x) + self.temporal_embedding(x_mark) + self.position_embedding(x)
return self.dropout(x)
class DataEmbedding_wo_pos(nn.Module):
def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
super(DataEmbedding_wo_pos, self).__init__()
self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
self.position_embedding = PositionalEmbedding(d_model=d_model)
self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type,
freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding(
d_model=d_model, embed_type=embed_type, freq=freq)
self.dropout = nn.Dropout(p=dropout)
def forward(self, x, x_mark):
x = self.value_embedding(x) + self.temporal_embedding(x_mark)
return self.dropout(x)
class DataEmbedding_wo_pos_temp(nn.Module):
def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
super(DataEmbedding_wo_pos_temp, self).__init__()
self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
self.position_embedding = PositionalEmbedding(d_model=d_model)
self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type,
freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding(
d_model=d_model, embed_type=embed_type, freq=freq)
self.dropout = nn.Dropout(p=dropout)
def forward(self, x, x_mark):
x = self.value_embedding(x)
return self.dropout(x)
class DataEmbedding_wo_temp(nn.Module):
def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
super(DataEmbedding_wo_temp, self).__init__()
self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
self.position_embedding = PositionalEmbedding(d_model=d_model)
self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type,
freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding(
d_model=d_model, embed_type=embed_type, freq=freq)
self.dropout = nn.Dropout(p=dropout)
def forward(self, x, x_mark):
x = self.value_embedding(x) + self.position_embedding(x)
return self.dropout(x)
================================================
FILE: layers/MSGBlock.py
================================================
from math import sqrt
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch
from torch import nn, Tensor
from einops import rearrange
from einops.layers.torch import Rearrange
from utils.masking import TriangularCausalMask
class Predict(nn.Module):
def __init__(self, individual, c_out, seq_len, pred_len, dropout):
super(Predict, self).__init__()
self.individual = individual
self.c_out = c_out
if self.individual:
self.seq2pred = nn.ModuleList()
self.dropout = nn.ModuleList()
for i in range(self.c_out):
self.seq2pred.append(nn.Linear(seq_len , pred_len))
self.dropout.append(nn.Dropout(dropout))
else:
self.seq2pred = nn.Linear(seq_len , pred_len)
self.dropout = nn.Dropout(dropout)
#(B, c_out , seq)
def forward(self, x):
if self.individual:
out = []
for i in range(self.c_out):
per_out = self.seq2pred[i](x[:,i,:])
per_out = self.dropout[i](per_out)
out.append(per_out)
out = torch.stack(out,dim=1)
else:
out = self.seq2pred(x)
out = self.dropout(out)
return out
class Attention_Block(nn.Module):
def __init__(self, d_model, d_ff=None, n_heads=8, dropout=0.1, activation="relu"):
super(Attention_Block, self).__init__()
d_ff = d_ff or 4 * d_model
self.attention = self_attention(FullAttention, d_model, n_heads=n_heads)
self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
self.norm1 = nn.LayerNorm(d_model)
self.norm2 = nn.LayerNorm(d_model)
self.dropout = nn.Dropout(dropout)
self.activation = F.relu if activation == "relu" else F.gelu
def forward(self, x, attn_mask=None):
new_x, attn = self.attention(
x, x, x,
attn_mask=attn_mask
)
x = x + self.dropout(new_x)
y = x = self.norm1(x)
y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
y = self.dropout(self.conv2(y).transpose(-1, 1))
return self.norm2(x + y)
class self_attention(nn.Module):
def __init__(self, attention, d_model ,n_heads):
super(self_attention, self).__init__()
d_keys = d_model // n_heads
d_values = d_model // n_heads
self.inner_attention = attention( attention_dropout = 0.1)
self.query_projection = nn.Linear(d_model, d_keys * n_heads)
self.key_projection = nn.Linear(d_model, d_keys * n_heads)
self.value_projection = nn.Linear(d_model, d_values * n_heads)
self.out_projection = nn.Linear(d_values * n_heads, d_model)
self.n_heads = n_heads
def forward(self, queries ,keys ,values, attn_mask= None):
B, L, _ = queries.shape
_, S, _ = keys.shape
H = self.n_heads
queries = self.query_projection(queries).view(B, L, H, -1)
keys = self.key_projection(keys).view(B, S, H, -1)
values = self.value_projection(values).view(B, S, H, -1)
out, attn = self.inner_attention(
queries,
keys,
values,
attn_mask
)
out = out.view(B, L, -1)
out = self.out_projection(out)
return out , attn
class FullAttention(nn.Module):
def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
super(FullAttention, self).__init__()
self.scale = scale
self.mask_flag = mask_flag
self.output_attention = output_attention
self.dropout = nn.Dropout(attention_dropout)
def forward(self, queries, keys, values, attn_mask):
B, L, H, E = queries.shape
_, S, _, D = values.shape
scale = self.scale or 1. / sqrt(E)
scores = torch.einsum("blhe,bshe->bhls", queries, keys)
if self.mask_flag:
if attn_mask is None:
attn_mask = TriangularCausalMask(B, L, device=queries.device)
scores.masked_fill_(attn_mask.mask, -np.inf)
A = self.dropout(torch.softmax(scale * scores, dim=-1))
V = torch.einsum("bhls,bshd->blhd", A, values)
# return V.contiguous()
if self.output_attention:
return (V.contiguous(), A)
else:
return (V.contiguous(), None)
class GraphBlock(nn.Module):
def __init__(self, c_out , d_model , conv_channel, skip_channel,
gcn_depth , dropout, propalpha ,seq_len , node_dim):
super(GraphBlock, self).__init__()
self.nodevec1 = nn.Parameter(torch.randn(c_out, node_dim), requires_grad=True)
self.nodevec2 = nn.Parameter(torch.randn(node_dim, c_out), requires_grad=True)
self.start_conv = nn.Conv2d(1 , conv_channel, (d_model - c_out + 1, 1))
self.gconv1 = mixprop(conv_channel, skip_channel, gcn_depth, dropout, propalpha)
self.gelu = nn.GELU()
self.end_conv = nn.Conv2d(skip_channel, seq_len , (1, seq_len ))
self.linear = nn.Linear(c_out, d_model)
self.norm = nn.LayerNorm(d_model)
# x in (B, T, d_model)
# Here we use a mlp to fit a complex mapping f (x)
def forward(self, x):
adp = F.softmax(F.relu(torch.mm(self.nodevec1, self.nodevec2)), dim=1)
out = x.unsqueeze(1).transpose(2, 3)
out = self.start_conv(out)
out = self.gelu(self.gconv1(out , adp))
out = self.end_conv(out).squeeze()
out = self.linear(out)
return self.norm(x + out)
class nconv(nn.Module):
def __init__(self):
super(nconv,self).__init__()
def forward(self,x, A):
x = torch.einsum('ncwl,vw->ncvl',(x,A))
# x = torch.einsum('ncwl,wv->nclv',(x,A)
return x.contiguous()
class linear(nn.Module):
def __init__(self,c_in,c_out,bias=True):
super(linear,self).__init__()
self.mlp = torch.nn.Conv2d(c_in, c_out, kernel_size=(1, 1), padding=(0,0), stride=(1,1), bias=bias)
def forward(self,x):
return self.mlp(x)
class mixprop(nn.Module):
def __init__(self,c_in,c_out,gdep,dropout,alpha):
super(mixprop, self).__init__()
self.nconv = nconv()
self.mlp = linear((gdep+1)*c_in,c_out)
self.gdep = gdep
self.dropout = dropout
self.alpha = alpha
def forward(self, x, adj):
adj = adj + torch.eye(adj.size(0)).to(x.device)
d = adj.sum(1)
h = x
out = [h]
a = adj / d.view(-1, 1)
for i in range(self.gdep):
h = self.alpha*x + (1-self.alpha)*self.nconv(h,a)
out.append(h)
ho = torch.cat(out,dim=1)
ho = self.mlp(ho)
return ho
class simpleVIT(nn.Module):
def __init__(self, in_channels, emb_size, patch_size=2, depth=1, num_heads=4, dropout=0.1,init_weight =True):
super(simpleVIT, self).__init__()
self.emb_size = emb_size
self.depth = depth
self.to_patch = nn.Sequential(
nn.Conv2d(in_channels, emb_size, 2 * patch_size + 1, padding= patch_size),
Rearrange('b e (h) (w) -> b (h w) e'),
)
self.layers = nn.ModuleList([])
for _ in range(self.depth):
self.layers.append(nn.ModuleList([
nn.LayerNorm(emb_size),
MultiHeadAttention(emb_size, num_heads, dropout),
FeedForward(emb_size, emb_size)
]))
if init_weight:
self._initialize_weights()
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias, 0)
def forward(self,x):
B , N ,_ ,P = x.shape
x = self.to_patch(x)
# x = x.permute(0, 2, 3, 1).reshape(B,-1, N)
for norm ,attn, ff in self.layers:
x = attn(norm(x)) + x
x = ff(x) + x
x = x.transpose(1,2).reshape(B, self.emb_size ,-1, P)
return x
class MultiHeadAttention(nn.Module):
def __init__(self, emb_size, num_heads, dropout):
super().__init__()
self.emb_size = emb_size
self.num_heads = num_heads
self.keys = nn.Linear(emb_size, emb_size)
self.queries = nn.Linear(emb_size, emb_size)
self.values = nn.Linear(emb_size, emb_size)
self.att_drop = nn.Dropout(dropout)
self.projection = nn.Linear(emb_size, emb_size)
def forward(self, x: Tensor, mask: Tensor = None) -> Tensor:
queries = rearrange(self.queries(x), "b n (h d) -> b h n d", h=self.num_heads)
keys = rearrange(self.keys(x), "b n (h d) -> b h n d", h=self.num_heads)
values = rearrange(self.values(x), "b n (h d) -> b h n d", h=self.num_heads)
energy = torch.einsum('bhqd, bhkd -> bhqk', queries, keys)
if mask is not None:
fill_value = torch.finfo(torch.float32).min
energy.mask_fill(~mask, fill_value)
scaling = self.emb_size ** (1 / 2)
att = F.softmax(energy, dim=-1) / scaling
att = self.att_drop(att)
# sum up over the third axis
out = torch.einsum('bhal, bhlv -> bhav ', att, values)
out = rearrange(out, "b h n d -> b n (h d)")
out = self.projection(out)
return out
class FeedForward(nn.Module):
def __init__(self, dim, hidden_dim):
super().__init__()
self.net = nn.Sequential(
nn.LayerNorm(dim),
nn.Linear(dim, hidden_dim),
nn.GELU(),
nn.Linear(hidden_dim, dim),
)
def forward(self, x):
return self.net(x)
================================================
FILE: layers/SelfAttention_Family.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
import math
from math import sqrt
from utils.masking import TriangularCausalMask, ProbMask
import os
class FullAttention(nn.Module):
def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
super(FullAttention, self).__init__()
self.scale = scale
self.mask_flag = mask_flag
self.output_attention = output_attention
self.dropout = nn.Dropout(attention_dropout)
def forward(self, queries, keys, values, attn_mask):
B, L, H, E = queries.shape
_, S, _, D = values.shape
scale = self.scale or 1. / sqrt(E)
scores = torch.einsum("blhe,bshe->bhls", queries, keys)
if self.mask_flag:
if attn_mask is None:
attn_mask = TriangularCausalMask(B, L, device=queries.device)
scores.masked_fill_(attn_mask.mask, -np.inf)
A = self.dropout(torch.softmax(scale * scores, dim=-1))
V = torch.einsum("bhls,bshd->blhd", A, values)
if self.output_attention:
return (V.contiguous(), A)
else:
return (V.contiguous(), None)
class ProbAttention(nn.Module):
def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
super(ProbAttention, self).__init__()
self.factor = factor
self.scale = scale
self.mask_flag = mask_flag
self.output_attention = output_attention
self.dropout = nn.Dropout(attention_dropout)
def _prob_QK(self, Q, K, sample_k, n_top): # n_top: c*ln(L_q)
# Q [B, H, L, D]
B, H, L_K, E = K.shape
_, _, L_Q, _ = Q.shape
# calculate the sampled Q_K
K_expand = K.unsqueeze(-3).expand(B, H, L_Q, L_K, E)
index_sample = torch.randint(L_K, (L_Q, sample_k)) # real U = U_part(factor*ln(L_k))*L_q
K_sample = K_expand[:, :, torch.arange(L_Q).unsqueeze(1), index_sample, :]
Q_K_sample = torch.matmul(Q.unsqueeze(-2), K_sample.transpose(-2, -1)).squeeze()
# find the Top_k query with sparisty measurement
M = Q_K_sample.max(-1)[0] - torch.div(Q_K_sample.sum(-1), L_K)
M_top = M.topk(n_top, sorted=False)[1]
# use the reduced Q to calculate Q_K
Q_reduce = Q[torch.arange(B)[:, None, None],
torch.arange(H)[None, :, None],
M_top, :] # factor*ln(L_q)
Q_K = torch.matmul(Q_reduce, K.transpose(-2, -1)) # factor*ln(L_q)*L_k
return Q_K, M_top
def _get_initial_context(self, V, L_Q):
B, H, L_V, D = V.shape
if not self.mask_flag:
# V_sum = V.sum(dim=-2)
V_sum = V.mean(dim=-2)
contex = V_sum.unsqueeze(-2).expand(B, H, L_Q, V_sum.shape[-1]).clone()
else: # use mask
assert (L_Q == L_V) # requires that L_Q == L_V, i.e. for self-attention only
contex = V.cumsum(dim=-2)
return contex
def _update_context(self, context_in, V, scores, index, L_Q, attn_mask):
B, H, L_V, D = V.shape
if self.mask_flag:
attn_mask = ProbMask(B, H, L_Q, index, scores, device=V.device)
scores.masked_fill_(attn_mask.mask, -np.inf)
attn = torch.softmax(scores, dim=-1) # nn.Softmax(dim=-1)(scores)
context_in[torch.arange(B)[:, None, None],
torch.arange(H)[None, :, None],
index, :] = torch.matmul(attn, V).type_as(context_in)
if self.output_attention:
attns = (torch.ones([B, H, L_V, L_V]) / L_V).type_as(attn).to(attn.device)
attns[torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], index, :] = attn
return (context_in, attns)
else:
return (context_in, None)
def forward(self, queries, keys, values, attn_mask):
B, L_Q, H, D = queries.shape
_, L_K, _, _ = keys.shape
queries = queries.transpose(2, 1)
keys = keys.transpose(2, 1)
values = values.transpose(2, 1)
U_part = self.factor * np.ceil(np.log(L_K)).astype('int').item() # c*ln(L_k)
u = self.factor * np.ceil(np.log(L_Q)).astype('int').item() # c*ln(L_q)
U_part = U_part if U_part < L_K else L_K
u = u if u < L_Q else L_Q
scores_top, index = self._prob_QK(queries, keys, sample_k=U_part, n_top=u)
# add scale factor
scale = self.scale or 1. / sqrt(D)
if scale is not None:
scores_top = scores_top * scale
# get the context
context = self._get_initial_context(values, L_Q)
# update the context with selected top_k queries
context, attn = self._update_context(context, values, scores_top, index, L_Q, attn_mask)
return context.contiguous(), attn
class AttentionLayer(nn.Module):
def __init__(self, attention, d_model, n_heads, d_keys=None,
d_values=None):
super(AttentionLayer, self).__init__()
d_keys = d_keys or (d_model // n_heads)
d_values = d_values or (d_model // n_heads)
self.inner_attention = attention
self.query_projection = nn.Linear(d_model, d_keys * n_heads)
self.key_projection = nn.Linear(d_model, d_keys * n_heads)
self.value_projection = nn.Linear(d_model, d_values * n_heads)
self.out_projection = nn.Linear(d_values * n_heads, d_model)
self.n_heads = n_heads
def forward(self, queries, keys, values, attn_mask):
B, L, _ = queries.shape
_, S, _ = keys.shape
H = self.n_heads
queries = self.query_projection(queries).view(B, L, H, -1)
keys = self.key_projection(keys).view(B, S, H, -1)
values = self.value_projection(values).view(B, S, H, -1)
out, attn = self.inner_attention(
queries,
keys,
values,
attn_mask
)
out = out.view(B, L, -1)
return self.out_projection(out), attn
================================================
FILE: layers/Transformer_EncDec.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
class ConvLayer(nn.Module):
def __init__(self, c_in):
super(ConvLayer, self).__init__()
self.downConv = nn.Conv1d(in_channels=c_in,
out_channels=c_in,
kernel_size=3,
padding=2,
padding_mode='circular')
self.norm = nn.BatchNorm1d(c_in)
self.activation = nn.ELU()
self.maxPool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)
def forward(self, x):
x = self.downConv(x.permute(0, 2, 1))
x = self.norm(x)
x = self.activation(x)
x = self.maxPool(x)
x = x.transpose(1, 2)
return x
class EncoderLayer(nn.Module):
def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"):
super(EncoderLayer, self).__init__()
d_ff = d_ff or 4 * d_model
self.attention = attention
self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
self.norm1 = nn.LayerNorm(d_model)
self.norm2 = nn.LayerNorm(d_model)
self.dropout = nn.Dropout(dropout)
self.activation = F.relu if activation == "relu" else F.gelu
def forward(self, x, attn_mask=None):
new_x, attn = self.attention(
x, x, x,
attn_mask=attn_mask
)
x = x + self.dropout(new_x)
y = x = self.norm1(x)
y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
y = self.dropout(self.conv2(y).transpose(-1, 1))
return self.norm2(x + y), attn
class Encoder(nn.Module):
def __init__(self, attn_layers, conv_layers=None, norm_layer=None):
super(Encoder, self).__init__()
self.attn_layers = nn.ModuleList(attn_layers)
self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None
self.norm = norm_layer
def forward(self, x, attn_mask=None):
attns = []
if self.conv_layers is not None:
for attn_layer, conv_layer in zip(self.attn_layers, self.conv_layers):
x, attn = attn_layer(x, attn_mask=attn_mask)
x = conv_layer(x)
attns.append(attn)
x, attn = self.attn_layers[-1](x)
attns.append(attn)
else:
for attn_layer in self.attn_layers:
x, attn = attn_layer(x, attn_mask=attn_mask)
attns.append(attn)
if self.norm is not None:
x = self.norm(x)
return x, attns
class DecoderLayer(nn.Module):
def __init__(self, self_attention, cross_attention, d_model, d_ff=None,
dropout=0.1, activation="relu"):
super(DecoderLayer, self).__init__()
d_ff = d_ff or 4 * d_model
self.self_attention = self_attention
self.cross_attention = cross_attention
self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
self.norm1 = nn.LayerNorm(d_model)
self.norm2 = nn.LayerNorm(d_model)
self.norm3 = nn.LayerNorm(d_model)
self.dropout = nn.Dropout(dropout)
self.activation = F.relu if activation == "relu" else F.gelu
# dec_out(in), enc_out
def forward(self, x, cross, x_mask=None, cross_mask=None):
x = x + self.dropout(self.self_attention(
x, x, x,
attn_mask=x_mask
)[0])
x = self.norm1(x)
x = x + self.dropout(self.cross_attention(
x, cross, cross, #q,k,v
attn_mask=cross_mask
)[0])
y = x = self.norm2(x)
y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
y = self.dropout(self.conv2(y).transpose(-1, 1))
return self.norm3(x + y)
class Decoder(nn.Module):
def __init__(self, layers, norm_layer=None, projection=None):
super(Decoder, self).__init__()
self.layers = nn.ModuleList(layers)
self.norm = norm_layer
self.projection = projection
#self.decoder(dec_out(in), enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask)
def forward(self, x, cross, x_mask=None, cross_mask=None ,external=None):
for layer in self.layers:
x = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask)
if self.norm is not None:
x = self.norm(x)
if self.projection is not None:
x = self.projection(x)
return x
================================================
FILE: models/Autoformer.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
from layers.Embed import DataEmbedding, DataEmbedding_wo_pos,DataEmbedding_wo_pos_temp,DataEmbedding_wo_temp
from layers.AutoCorrelation import AutoCorrelation, AutoCorrelationLayer
from layers.Autoformer_EncDec import Encoder, Decoder, EncoderLayer, DecoderLayer, my_Layernorm, series_decomp
import math
import numpy as np
class Model(nn.Module):
"""
Autoformer is the first method to achieve the series-wise connection,
with inherent O(LlogL) complexity
"""
def __init__(self, configs):
super(Model, self).__init__()
self.seq_len = configs.seq_len
self.label_len = configs.label_len
self.pred_len = configs.pred_len
self.output_attention = configs.output_attention
# Decomp
kernel_size = configs.moving_avg
self.decomp = series_decomp(kernel_size)
# Embedding
# The series-wise connection inherently contains the sequential information.
# Thus, we can discard the position embedding of transformers.
if configs.embed_type == 0:
self.enc_embedding = DataEmbedding_wo_pos(configs.enc_in, configs.d_model, configs.embed, configs.freq,
configs.dropout)
self.dec_embedding = DataEmbedding_wo_pos(configs.dec_in, configs.d_model, configs.embed, configs.freq,
configs.dropout)
elif configs.embed_type == 1:
self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq,
configs.dropout)
self.dec_embedding = DataEmbedding(configs.dec_in, configs.d_model, configs.embed, configs.freq,
configs.dropout)
elif configs.embed_type == 2:
self.enc_embedding = DataEmbedding_wo_pos(configs.enc_in, configs.d_model, configs.embed, configs.freq,
configs.dropout)
self.dec_embedding = DataEmbedding_wo_pos(configs.dec_in, configs.d_model, configs.embed, configs.freq,
configs.dropout)
elif configs.embed_type == 3:
self.enc_embedding = DataEmbedding_wo_temp(configs.enc_in, configs.d_model, configs.embed, configs.freq,
configs.dropout)
self.dec_embedding = DataEmbedding_wo_temp(configs.dec_in, configs.d_model, configs.embed, configs.freq,
configs.dropout)
elif configs.embed_type == 4:
self.enc_embedding = DataEmbedding_wo_pos_temp(configs.enc_in, configs.d_model, configs.embed, configs.freq,
configs.dropout)
self.dec_embedding = DataEmbedding_wo_pos_temp(configs.dec_in, configs.d_model, configs.embed, configs.freq,
configs.dropout)
# Encoder
self.encoder = Encoder(
[
EncoderLayer(
AutoCorrelationLayer(
AutoCorrelation(False, configs.factor, attention_dropout=configs.dropout,
output_attention=configs.output_attention),
configs.d_model, configs.n_heads),
configs.d_model,
configs.d_ff,
moving_avg=configs.moving_avg,
dropout=configs.dropout,
activation=configs.activation
) for l in range(configs.e_layers)
],
norm_layer=my_Layernorm(configs.d_model)
)
# Decoder
self.decoder = Decoder(
[
DecoderLayer(
AutoCorrelationLayer(
AutoCorrelation(True, configs.factor, attention_dropout=configs.dropout,
output_attention=False),
configs.d_model, configs.n_heads),
AutoCorrelationLayer(
AutoCorrelation(False, configs.factor, attention_dropout=configs.dropout,
output_attention=False),
configs.d_model, configs.n_heads),
configs.d_model,
configs.c_out,
configs.d_ff,
moving_avg=configs.moving_avg,
dropout=configs.dropout,
activation=configs.activation,
)
for l in range(configs.d_layers)
],
norm_layer=my_Layernorm(configs.d_model),
projection=nn.Linear(configs.d_model, configs.c_out, bias=True)
)
def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec,
enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None):
# decomp init
mean = torch.mean(x_enc, dim=1).unsqueeze(1).repeat(1, self.pred_len, 1)
zeros = torch.zeros([x_dec.shape[0], self.pred_len, x_dec.shape[2]], device=x_enc.device)
seasonal_init, trend_init = self.decomp(x_enc)
# decoder input
trend_init = torch.cat([trend_init[:, -self.label_len:, :], mean], dim=1)
seasonal_init = torch.cat([seasonal_init[:, -self.label_len:, :], zeros], dim=1)
# enc
enc_out = self.enc_embedding(x_enc, x_mark_enc)
enc_out, attns = self.encoder(enc_out, attn_mask=enc_self_mask)
# dec
dec_out = self.dec_embedding(seasonal_init, x_mark_dec)
seasonal_part, trend_part = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask,
trend=trend_init)
# final
dec_out = trend_part + seasonal_part
if self.output_attention:
return dec_out[:, -self.pred_len:, :], attns
else:
return dec_out[:, -self.pred_len:, :] # [B, L, D]
================================================
FILE: models/DLinear.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
class moving_avg(nn.Module):
"""
Moving average block to highlight the trend of time series
"""
def __init__(self, kernel_size, stride):
super(moving_avg, self).__init__()
self.kernel_size = kernel_size
self.avg = nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=0)
def forward(self, x):
# padding on the both ends of time series
front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1)
end = x[:, -1:, :].repeat(1, (self.kernel_size - 1) // 2, 1)
x = torch.cat([front, x, end], dim=1)
# print(x.permute(0, 2, 1))
x = self.avg(x.permute(0, 2, 1))
x = x.permute(0, 2, 1)
return x
class series_decomp(nn.Module):
"""
Series decomposition block
"""
def __init__(self, kernel_size):
super(series_decomp, self).__init__()
self.moving_avg = moving_avg(kernel_size, stride=1)
def forward(self, x):
moving_mean = self.moving_avg(x)
res = x - moving_mean
return res, moving_mean
class Model(nn.Module):
"""
Decomposition-Linear
"""
def __init__(self, configs):
super(Model, self).__init__()
self.seq_len = configs.seq_len
self.pred_len = configs.pred_len
# Decompsition Kernel Size
kernel_size = 25
self.decompsition = series_decomp(kernel_size) #return res, moving_mean
self.individual = configs.individual
self.channels = configs.enc_in
if self.individual:
self.Linear_Seasonal = nn.ModuleList()
self.Linear_Trend = nn.ModuleList()
for i in range(self.channels):
self.Linear_Seasonal.append(nn.Linear(self.seq_len,self.pred_len))
self.Linear_Trend.append(nn.Linear(self.seq_len,self.pred_len))
# Use this two lines if you want to visualize the weights
# self.Linear_Seasonal[i].weight = nn.Parameter((1/self.seq_len)*torch.ones([self.pred_len,self.seq_len]))
# self.Linear_Trend[i].weight = nn.Parameter((1/self.seq_len)*torch.ones([self.pred_len,self.seq_len]))
else:
self.Linear_Seasonal = nn.Linear(self.seq_len,self.pred_len)
self.Linear_Trend = nn.Linear(self.seq_len,self.pred_len)
# Use this two lines if you want to visualize the weights
# self.Linear_Seasonal.weight = nn.Parameter((1/self.seq_len)*torch.ones([self.pred_len,self.seq_len]))
# self.Linear_Trend.weight = nn.Parameter((1/self.seq_len)*torch.ones([self.pred_len,self.seq_len]))
def forward(self, x):
# x: [Batch, Input length, Channel]
seasonal_init, trend_init = self.decompsition(x) #return res, moving_mean
seasonal_init, trend_init = seasonal_init.permute(0,2,1), trend_init.permute(0,2,1)
if self.individual:
seasonal_output = torch.zeros([seasonal_init.size(0),seasonal_init.size(1),self.pred_len],dtype=seasonal_init.dtype).to(seasonal_init.device)
trend_output = torch.zeros([trend_init.size(0),trend_init.size(1),self.pred_len],dtype=trend_init.dtype).to(trend_init.device)
for i in range(self.channels):
seasonal_output[:,i,:] = self.Linear_Seasonal[i](seasonal_init[:,i,:])
trend_output[:,i,:] = self.Linear_Trend[i](trend_init[:,i,:])
else:
seasonal_output = self.Linear_Seasonal(seasonal_init)
trend_output = self.Linear_Trend(trend_init)
x = seasonal_output + trend_output
return x.permute(0,2,1) # to [Batch, Output length, Channel]
================================================
FILE: models/Informer.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
from utils.masking import TriangularCausalMask, ProbMask
from layers.Transformer_EncDec import Decoder, DecoderLayer, Encoder, EncoderLayer, ConvLayer
from layers.SelfAttention_Family import FullAttention, ProbAttention, AttentionLayer
from layers.Embed import DataEmbedding,DataEmbedding_wo_pos,DataEmbedding_wo_temp,DataEmbedding_wo_pos_temp
import numpy as np
class Model(nn.Module):
"""
Informer with Propspare attention in O(LlogL) complexity
"""
def __init__(self, configs):
super(Model, self).__init__()
self.pred_len = configs.pred_len
self.output_attention = configs.output_attention
# Embedding
if configs.embed_type == 0:
self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq,
configs.dropout)
self.dec_embedding = DataEmbedding(configs.dec_in, configs.d_model, configs.embed, configs.freq,
configs.dropout)
elif configs.embed_type == 1:
self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq,
configs.dropout)
self.dec_embedding = DataEmbedding(configs.dec_in, configs.d_model, configs.embed, configs.freq,
configs.dropout)
elif configs.embed_type == 2:
self.enc_embedding = DataEmbedding_wo_pos(configs.enc_in, configs.d_model, configs.embed, configs.freq,
configs.dropout)
self.dec_embedding = DataEmbedding_wo_pos(configs.dec_in, configs.d_model, configs.embed, configs.freq,
configs.dropout)
elif configs.embed_type == 3:
self.enc_embedding = DataEmbedding_wo_temp(configs.enc_in, configs.d_model, configs.embed, configs.freq,
configs.dropout)
self.dec_embedding = DataEmbedding_wo_temp(configs.dec_in, configs.d_model, configs.embed, configs.freq,
configs.dropout)
elif configs.embed_type == 4:
self.enc_embedding = DataEmbedding_wo_pos_temp(configs.enc_in, configs.d_model, configs.embed, configs.freq,
configs.dropout)
self.dec_embedding = DataEmbedding_wo_pos_temp(configs.dec_in, configs.d_model, configs.embed, configs.freq,
configs.dropout)
# Encoder
self.encoder = Encoder(
[
EncoderLayer(
AttentionLayer(
ProbAttention(False, configs.factor, attention_dropout=configs.dropout,
output_attention=configs.output_attention),
configs.d_model, configs.n_heads),
configs.d_model,
configs.d_ff,
dropout=configs.dropout,
activation=configs.activation
) for l in range(configs.e_layers)
],
[
ConvLayer(
configs.d_model
) for l in range(configs.e_layers - 1)
] if configs.distil else None,
norm_layer=torch.nn.LayerNorm(configs.d_model)
)
# Decoder
self.decoder = Decoder(
[
DecoderLayer(
AttentionLayer(
ProbAttention(True, configs.factor, attention_dropout=configs.dropout, output_attention=False),
configs.d_model, configs.n_heads),
AttentionLayer(
ProbAttention(False, configs.factor, attention_dropout=configs.dropout, output_attention=False),
configs.d_model, configs.n_heads),
configs.d_model,
configs.d_ff,
dropout=configs.dropout,
activation=configs.activation,
)
for l in range(configs.d_layers)
],
norm_layer=torch.nn.LayerNorm(configs.d_model),
projection=nn.Linear(configs.d_model, configs.c_out, bias=True)
)
def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec,
enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None):
enc_out = self.enc_embedding(x_enc, x_mark_enc)
enc_out, attns = self.encoder(enc_out, attn_mask=enc_self_mask)
dec_out = self.dec_embedding(x_dec, x_mark_dec)
dec_out = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask)
if self.output_attention:
return dec_out[:, -self.pred_len:, :], attns
else:
return dec_out[:, -self.pred_len:, :] # [B, L, D]
================================================
FILE: models/MSGNet.py
================================================
import numpy as np
# import pywt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.fft
from layers.Embed import DataEmbedding
from layers.MSGBlock import GraphBlock, simpleVIT, Attention_Block, Predict
def FFT_for_Period(x, k=2):
# [B, T, C]
xf = torch.fft.rfft(x, dim=1)
frequency_list = abs(xf).mean(0).mean(-1)
frequency_list[0] = 0
_, top_list = torch.topk(frequency_list, k)
top_list = top_list.detach().cpu().numpy()
period = x.shape[1] // top_list
return period, abs(xf).mean(-1)[:, top_list]
class ScaleGraphBlock(nn.Module):
def __init__(self, configs):
super(ScaleGraphBlock, self).__init__()
self.seq_len = configs.seq_len
self.pred_len = configs.pred_len
self.k = configs.top_k
self.att0 = Attention_Block(configs.d_model, configs.d_ff,
n_heads=configs.n_heads, dropout=configs.dropout, activation="gelu")
self.norm = nn.LayerNorm(configs.d_model)
self.gelu = nn.GELU()
self.gconv = nn.ModuleList()
for i in range(self.k):
self.gconv.append(
GraphBlock(configs.c_out , configs.d_model , configs.conv_channel, configs.skip_channel,
configs.gcn_depth , configs.dropout, configs.propalpha ,configs.seq_len,
configs.node_dim))
def forward(self, x):
B, T, N = x.size()
scale_list, scale_weight = FFT_for_Period(x, self.k)
res = []
for i in range(self.k):
scale = scale_list[i]
#Gconv
x = self.gconv[i](x)
# paddng
if (self.seq_len) % scale != 0:
length = (((self.seq_len) // scale) + 1) * scale
padding = torch.zeros([x.shape[0], (length - (self.seq_len)), x.shape[2]]).to(x.device)
out = torch.cat([x, padding], dim=1)
else:
length = self.seq_len
out = x
out = out.reshape(B, length // scale, scale, N)
#for Mul-attetion
out = out.reshape(-1 , scale , N)
out = self.norm(self.att0(out))
out = self.gelu(out)
out = out.reshape(B, -1 , scale , N).reshape(B ,-1 ,N)
# #for simpleVIT
# out = self.att(out.permute(0, 3, 1, 2).contiguous()) #return
# out = out.permute(0, 2, 3, 1).reshape(B, -1 ,N)
out = out[:, :self.seq_len, :]
res.append(out)
res = torch.stack(res, dim=-1)
# adaptive aggregation
scale_weight = F.softmax(scale_weight, dim=1)
scale_weight = scale_weight.unsqueeze(1).unsqueeze(1).repeat(1, T, N, 1)
res = torch.sum(res * scale_weight, -1)
# residual connection
res = res + x
return res
class Model(nn.Module):
def __init__(self, configs):
super(Model, self).__init__()
self.configs = configs
self.task_name = configs.task_name
self.seq_len = configs.seq_len
self.label_len = configs.label_len
self.pred_len = configs.pred_len
self.device = "cuda" if torch.cuda.is_available() else "cpu"
# for graph
# self.num_nodes = configs.c_out
# self.subgraph_size = configs.subgraph_size
# self.node_dim = configs.node_dim
# to return adj (node , node)
# self.graph = constructor_graph()
self.model = nn.ModuleList([ScaleGraphBlock(configs) for _ in range(configs.e_layers)])
self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model,
configs.embed, configs.freq, configs.dropout)
self.layer = configs.e_layers
self.layer_norm = nn.LayerNorm(configs.d_model)
self.predict_linear = nn.Linear(
self.seq_len, self.pred_len + self.seq_len)
self.projection = nn.Linear(
configs.d_model, configs.c_out, bias=True)
self.seq2pred = Predict(configs.individual ,configs.c_out,
configs.seq_len, configs.pred_len, configs.dropout)
def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
# Normalization from Non-stationary Transformer
means = x_enc.mean(1, keepdim=True).detach()
x_enc = x_enc - means
stdev = torch.sqrt(
torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
x_enc /= stdev
# embedding
enc_out = self.enc_embedding(x_enc, x_mark_enc) # [B,T,C]
# adp = self.graph(torch.arange(self.num_nodes).to(self.device))
for i in range(self.layer):
enc_out = self.layer_norm(self.model[i](enc_out))
# porject back
dec_out = self.projection(enc_out)
dec_out = self.seq2pred(dec_out.transpose(1, 2)).transpose(1, 2)
# De-Normalization from Non-stationary Transformer
dec_out = dec_out * \
(stdev[:, 0, :].unsqueeze(1).repeat(
1, self.pred_len, 1))
dec_out = dec_out + \
(means[:, 0, :].unsqueeze(1).repeat(
1, self.pred_len, 1))
return dec_out[:, -self.pred_len:, :]
================================================
FILE: run_longExp.py
================================================
import argparse
import os
import time
from multiprocessing import freeze_support
import torch
from exp.exp_main import Exp_Main
import random
import numpy as np
fix_seed = 2021
random.seed(fix_seed)
torch.manual_seed(fix_seed)
np.random.seed(fix_seed)
parser = argparse.ArgumentParser(description='MSGNet for Time Series Forecasting')
# basic config
parser.add_argument('--task_name', type=str, required=False, default='long_term_forecast',
help='task name, options:[long_term_forecast, mask, short_term_forecast, imputation, classification, anomaly_detection]')
parser.add_argument('--is_training', type=int, required=True, default=1, help='status')
parser.add_argument('--model_id', type=str, required=True, default='test', help='model id')
parser.add_argument('--model', type=str, required=True, default='Autoformer',
help='model name, options: [Autoformer, Informer, Transformer]')
# data loader
parser.add_argument('--data', type=str, required=True, default='ETTm1', help='dataset type')
parser.add_argument('--root_path', type=str, default='./data/ETT/', help='root path of the data file')
parser.add_argument('--data_path', type=str, default='ETTh1.csv', help='data file')
parser.add_argument('--features', type=str, default='M',
help='forecasting task, options:[M, S, MS]; M:multivariate predict multivariate,'
' S:univariate predict univariate, MS:multivariate predict univariate')
parser.add_argument('--target', type=str, default='OT', help='target feature in S or MS task')
parser.add_argument('--freq', type=str, default='h',
help='freq for time features encoding, '
'options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], '
'you can also use more detailed freq like 15min or 3h')
parser.add_argument('--checkpoints', type=str, default='./checkpoints/', help='location of model checkpoints')
# forecasting task
parser.add_argument('--seq_len', type=int, default=96, help='input sequence length')
parser.add_argument('--label_len', type=int, default=48, help='start token length')
parser.add_argument('--pred_len', type=int, default=96, help='prediction sequence length')
parser.add_argument('--seasonal_patterns', type=str, default='Monthly', help='subset for M4')
parser.add_argument('--top_k', type=int, default=5, help='for TimesBlock/ScaleGraphBlock')
parser.add_argument('--num_kernels', type=int, default=6, help='for Inception')
parser.add_argument('--num_nodes', type=int, default=7, help='to create Graph')
parser.add_argument('--subgraph_size', type=int, default=3, help='neighbors number')
parser.add_argument('--tanhalpha', type=float, default=3, help='')
#GCN
parser.add_argument('--node_dim', type=int, default=10, help='each node embbed to dim dimentions')
parser.add_argument('--gcn_depth', type=int, default=2, help='')
parser.add_argument('--gcn_dropout', type=float, default=0.3, help='')
parser.add_argument('--propalpha', type=float, default=0.3, help='')
parser.add_argument('--conv_channel', type=int, default=32, help='')
parser.add_argument('--skip_channel', type=int, default=32, help='')
# DLinear
parser.add_argument('--individual', action='store_true', default=False, help='DLinear: a linear layer for each variate(channel) individually')
# Formers
parser.add_argument('--embed_type', type=int, default=0, help='0: default '
'1: value embedding + temporal embedding + positional embedding '
'2: value embedding + temporal embedding '
'3: value embedding + positional embedding '
'4: value embedding')
parser.add_argument('--enc_in', type=int, default=7, help='encoder input size')
parser.add_argument('--dec_in', type=int, default=7, help='decoder input size')
parser.add_argument('--c_out', type=int, default=7, help='output size')
parser.add_argument('--d_model', type=int, default=512, help='dimension of model')
parser.add_argument('--n_heads', type=int, default=8, help='num of heads')
parser.add_argument('--e_layers', type=int, default=2, help='num of encoder layers')
parser.add_argument('--d_layers', type=int, default=1, help='num of decoder layers')
parser.add_argument('--d_ff', type=int, default=2048, help='dimension of fcn')
parser.add_argument('--moving_avg', type=int, default=25, help='window size of moving average')
parser.add_argument('--factor', type=int, default=1, help='attn factor')
parser.add_argument('--distil', action='store_false',
help='whether to use distilling in encoder, using this argument means not using distilling',
default=True)
parser.add_argument('--dropout', type=float, default=0.05, help='dropout')
parser.add_argument('--embed', type=str, default='timeF',
help='time features encoding, options:[timeF, fixed, learned]')
parser.add_argument('--activation', type=str, default='gelu', help='activation')
parser.add_argument('--output_attention', action='store_true', help='whether to output attention in encoder')
parser.add_argument('--do_predict', action='store_true', help='whether to predict unseen future data')
# optimization
parser.add_argument('--num_workers', type=int, default=8, help='data loader num workers')
parser.add_argument('--itr', type=int, default=2, help='experiments times')
parser.add_argument('--train_epochs', type=int, default=10, help='train epochs')
parser.add_argument('--batch_size', type=int, default=32, help='batch size of train input data')
parser.add_argument('--patience', type=int, default=3, help='early stopping patience')
parser.add_argument('--learning_rate', type=float, default=0.0001, help='optimizer learning rate')
parser.add_argument('--des', type=str, default='test', help='exp description')
parser.add_argument('--loss', type=str, default='MSE', help='loss function')
parser.add_argument('--lradj', type=str, default='type1', help='adjust learning rate')
parser.add_argument('--use_amp', action='store_true', help='use automatic mixed precision training', default=False)
# GPU
parser.add_argument('--use_gpu', type=bool, default=True, help='use gpu')
parser.add_argument('--gpu', type=int, default=0, help='gpu')
parser.add_argument('--use_multi_gpu', action='store_true', help='use multiple gpus', default=False)
parser.add_argument('--devices', type=str, default='0,1,2,3', help='device ids of multile gpus')
parser.add_argument('--test_flop', action='store_true', default=False, help='See utils/tools for usage')
args = parser.parse_args()
args.use_gpu = True if torch.cuda.is_available() and args.use_gpu else False
if args.use_gpu and args.use_multi_gpu:
args.dvices = args.devices.replace(' ', '')
device_ids = args.devices.split(',')
args.device_ids = [int(id_) for id_ in device_ids]
args.gpu = args.device_ids[0]
print('Args in experiment:')
print(args)
Exp = Exp_Main
if args.is_training:
start = time.time()
for ii in range(args.itr):
# setting record of experiments
setting = '{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_fc{}_eb{}_dt{}_{}_{}'.format(
args.model_id,
args.model,
args.data,
args.features,
args.seq_len,
args.label_len,
args.pred_len,
args.d_model,
args.n_heads,
args.e_layers,
args.d_layers,
args.d_ff,
args.factor,
args.embed,
args.distil,
args.des, ii)
exp = Exp(args) # set experiments
print('>>>>>>>start training : {}>>>>>>>>>>>>>>>>>>>>>>>>>>'.format(setting))
exp.train(setting)
print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))
exp.test(setting)
# if args.do_predict:
# print('>>>>>>>predicting : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))
# exp.predict(setting, True)
torch.cuda.empty_cache()
end = time.time()
used_time = end -start
print("time:",used_time)
f = open("result.txt", 'a')
f.write('time:{}'.format(used_time))
f.write('\n')
f.write('\n')
f.close()
else:
ii = 0
setting = '{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_fc{}_eb{}_dt{}_{}_{}'.format(args.model_id,
args.model,
args.data,
args.features,
args.seq_len,
args.label_len,
args.pred_len,
args.d_model,
args.n_heads,
args.e_layers,
args.d_layers,
args.d_ff,
args.factor,
args.embed,
args.distil,
args.des, ii)
exp = Exp(args) # set experiments
print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))
exp.test(setting, test=1)
torch.cuda.empty_cache()
================================================
FILE: scripts/ETTh1.sh
================================================
if [ ! -d "./logs" ]; then
mkdir ./logs
fi
if [ ! -d "./logs/ETTh1" ]; then
mkdir ./logs/ETTh1
fi
export CUDA_VISIBLE_DEVICES=0
seq_len=96
label_len=48
model_name=MSGNet
pred_len=96
python -u run_longExp.py \
--is_training 1 \
--root_path ./dataset/ \
--data_path ETTh1.csv \
--model_id ETTh1'_'$seq_len'_'$pred_len \
--model $model_name \
--data ETTh1 \
--features M \
--freq h \
--target 'OT' \
--seq_len $seq_len \
--label_len $label_len \
--pred_len $pred_len \
--e_layers 1 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--d_model 32 \
--d_ff 64 \
--top_k 3 \
--conv_channel 32 \
--skip_channel 32 \
--dropout 0.1 \
--batch_size 32 \
--itr 1 #>logs/ETTh1/$model_name'_'ETTh1_$seq_len'_'$pred_len.log
pred_len=192
python -u run_longExp.py \
--is_training 1 \
--root_path ./dataset/ \
--data_path ETTh1.csv \
--model_id ETTh1'_'$seq_len'_'$pred_len \
--model $model_name \
--data ETTh1 \
--features M \
--freq h \
--target 'OT' \
--seq_len $seq_len \
--label_len $label_len \
--pred_len $pred_len \
--e_layers 1 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--d_model 32 \
--d_ff 64 \
--top_k 3 \
--conv_channel 32 \
--skip_channel 32 \
--dropout 0.1 \
--batch_size 32 \
--itr 1 #>logs/ETTh1/$model_name'_'ETTh1_$seq_len'_'$pred_len.log
pred_len=336
python -u run_longExp.py \
--is_training 1 \
--root_path ./dataset/ \
--data_path ETTh1.csv \
--model_id ETTh1'_'$seq_len'_'$pred_len \
--model $model_name \
--data ETTh1 \
--features M \
--freq h \
--target 'OT' \
--seq_len $seq_len \
--label_len $label_len \
--pred_len $pred_len \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--d_model 32 \
--d_ff 64 \
--top_k 3 \
--conv_channel 32 \
--skip_channel 32 \
--dropout 0.1 \
--batch_size 32 \
--itr 1 #>logs/ETTh1/$model_name'_'ETTh1_$seq_len'_'$pred_len.log
pred_len=720
python -u run_longExp.py \
--is_training 1 \
--root_path ./dataset/ \
--data_path ETTh1.csv \
--model_id ETTh1'_'$seq_len'_'$pred_len \
--model $model_name \
--data ETTh1 \
--features M \
--freq h \
--target 'OT' \
--seq_len $seq_len \
--label_len $label_len \
--pred_len $pred_len \
--e_layers 1 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--d_model 16 \
--d_ff 32 \
--top_k 3 \
--conv_channel 32 \
--skip_channel 32 \
--dropout 0.1 \
--batch_size 32 \
--itr 1 #>logs/ETTh1/$model_name'_'ETTh1_$seq_len'_'$pred_len.log
================================================
FILE: scripts/ETTh2.sh
================================================
if [ ! -d "./logs" ]; then
mkdir ./logs
fi
if [ ! -d "./logs/ETTh2" ]; then
mkdir ./logs/ETTh2
fi
export CUDA_VISIBLE_DEVICES=1
seq_len=96
label_len=48
model_name=MSGNet
pred_len=96
python -u run_longExp.py \
--is_training 1 \
--root_path ./dataset/ \
--data_path ETTh2.csv \
--model_id ETTh2'_'$seq_len'_'$pred_len \
--model $model_name \
--data ETTh2 \
--features M \
--freq h \
--target 'OT' \
--seq_len $seq_len \
--label_len $label_len \
--pred_len $pred_len \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--d_model 16 \
--d_ff 32 \
--conv_channel 32 \
--skip_channel 32 \
--top_k 5 \
--batch_size 32 \
--itr 1 #>logs/ETTh2/$model_name'_'ETTh2_$seq_len'_'$pred_len.log
pred_len=192
python -u run_longExp.py \
--is_training 1 \
--root_path ./dataset/ \
--data_path ETTh2.csv \
--model_id ETTh2'_'$seq_len'_'$pred_len \
--model $model_name \
--data ETTh2 \
--features M \
--freq h \
--target 'OT' \
--seq_len $seq_len \
--label_len $label_len \
--pred_len $pred_len \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--d_model 16 \
--d_ff 32 \
--conv_channel 32 \
--skip_channel 32 \
--top_k 5 \
--batch_size 32 \
--itr 1 #>logs/ETTh2/$model_name'_'ETTh2_$seq_len'_'$pred_len.log
pred_len=336
python -u run_longExp.py \
--is_training 1 \
--root_path ./dataset/ \
--data_path ETTh2.csv \
--model_id ETTh2'_'$seq_len'_'$pred_len \
--model $model_name \
--data ETTh2 \
--features M \
--freq h \
--target 'OT' \
--seq_len $seq_len \
--label_len $label_len \
--pred_len $pred_len \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--d_model 16 \
--d_ff 32 \
--conv_channel 32 \
--skip_channel 32 \
--top_k 5 \
--batch_size 32 \
--itr 1 #>logs/ETTh2/$model_name'_'ETTh2_$seq_len'_'$pred_len.log
pred_len=720
python -u run_longExp.py \
--is_training 1 \
--root_path ./dataset/ \
--data_path ETTh2.csv \
--model_id ETTh2'_'$seq_len'_'$pred_len \
--model $model_name \
--data ETTh2 \
--features M \
--freq h \
--target 'OT' \
--seq_len $seq_len \
--label_len $label_len \
--pred_len $pred_len \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--d_model 16 \
--d_ff 32 \
--conv_channel 32 \
--skip_channel 32 \
--top_k 5 \
--batch_size 32 \
--itr 1 #>logs/ETTh2/$model_name'_'ETTh2_$seq_len'_'$pred_len.log
================================================
FILE: scripts/ETTm1.sh
================================================
if [ ! -d "./logs" ]; then
mkdir ./logs
fi
if [ ! -d "./logs/ETTm1" ]; then
mkdir ./logs/ETTm1
fi
export CUDA_VISIBLE_DEVICES=2
seq_len=96
label_len=48
model_name=MSGNet
pred_len=96
python -u run_longExp.py \
--is_training 1 \
--root_path ./dataset/ \
--data_path ETTm1.csv \
--model_id ETTm1'_'$seq_len'_'$pred_len \
--model $model_name \
--data ETTm1 \
--features M \
--target 'OT' \
--seq_len $seq_len \
--label_len $label_len \
--pred_len $pred_len \
--e_layers 1 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--d_model 32 \
--d_ff 32 \
--top_k 3 \
--conv_channel 32 \
--skip_channel 32 \
--batch_size 32 \
--itr 1 #>logs/ETTm1/$model_name'_'ETTm1_$seq_len'_'$pred_len.log
pred_len=192
python -u run_longExp.py \
--is_training 1 \
--root_path ./dataset/ \
--data_path ETTm1.csv \
--model_id ETTm1'_'$seq_len'_'$pred_len \
--model $model_name \
--data ETTm1 \
--features M \
--target 'OT' \
--seq_len $seq_len \
--label_len $label_len \
--pred_len $pred_len \
--e_layers 1 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--d_model 32 \
--d_ff 32 \
--top_k 3 \
--conv_channel 16 \
--skip_channel 32 \
--batch_size 32 \
--itr 1 #>logs/ETTm1/$model_name'_'ETTm1_$seq_len'_'$pred_len.log
pred_len=336
python -u run_longExp.py \
--is_training 1 \
--root_path ./dataset/ \
--data_path ETTm1.csv \
--model_id ETTm1'_'$seq_len'_'$pred_len \
--model $model_name \
--data ETTm1 \
--features M \
--target 'OT' \
--seq_len $seq_len \
--label_len $label_len \
--pred_len $pred_len \
--e_layers 1 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--d_model 32 \
--d_ff 32 \
--top_k 3 \
--conv_channel 16 \
--skip_channel 32 \
--batch_size 32 \
--itr 1 #>logs/ETTm1/$model_name'_'ETTm1_$seq_len'_'$pred_len.log
pred_len=720
python -u run_longExp.py \
--is_training 1 \
--root_path ./dataset/ \
--data_path ETTm1.csv \
--model_id ETTm1'_'$seq_len'_'$pred_len \
--model $model_name \
--data ETTm1 \
--features M \
--target 'OT' \
--seq_len $seq_len \
--label_len $label_len \
--pred_len $pred_len \
--e_layers 1 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--d_model 32 \
--d_ff 32 \
--top_k 3 \
--conv_channel 16 \
--skip_channel 32 \
--batch_size 32 \
--itr 1 #>logs/ETTm1/$model_name'_'ETTm1_$seq_len'_'$pred_len.log
================================================
FILE: scripts/ETTm2.sh
================================================
if [ ! -d "./logs" ]; then
mkdir ./logs
fi
if [ ! -d "./logs/ETTm2" ]; then
mkdir ./logs/ETTm2
fi
export CUDA_VISIBLE_DEVICES=3
seq_len=96
label_len=48
model_name=MSGNet
pred_len=96
python -u run_longExp.py \
--is_training 1 \
--root_path ./dataset/ \
--data_path ETTm2.csv \
--model_id ETTm2'_'$seq_len'_'$pred_len \
--model $model_name \
--data ETTm2 \
--features M \
--target 'OT' \
--seq_len $seq_len \
--label_len $label_len \
--pred_len $pred_len \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--d_model 32 \
--d_ff 32 \
--top_k 3 \
--conv_channel 32 \
--skip_channel 32 \
--dropout 0.3 \
--batch_size 32 \
--itr 1 #>logs/ETTm2/$model_name'_'ETTm2_$seq_len'_'$pred_len.log
pred_len=192
python -u run_longExp.py \
--is_training 1 \
--root_path ./dataset/ \
--data_path ETTm2.csv \
--model_id ETTm2'_'$seq_len'_'$pred_len \
--model $model_name \
--data ETTm2 \
--features M \
--target 'OT' \
--seq_len $seq_len \
--label_len $label_len \
--pred_len $pred_len \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--d_model 32 \
--d_ff 64 \
--top_k 3 \
--conv_channel 32 \
--skip_channel 32 \
--dropout 0.3 \
--batch_size 32 \
--itr 1 #>logs/ETTm2/$model_name'_'ETTm2_$seq_len'_'$pred_len.log
pred_len=336
python -u run_longExp.py \
--is_training 1 \
--root_path ./dataset/ \
--data_path ETTm2.csv \
--model_id ETTm2'_'$seq_len'_'$pred_len \
--model $model_name \
--data ETTm2 \
--features M \
--target 'OT' \
--seq_len $seq_len \
--label_len $label_len \
--pred_len $pred_len \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--d_model 32 \
--d_ff 32 \
--top_k 3 \
--conv_channel 32 \
--skip_channel 32 \
--dropout 0.3 \
--batch_size 32 \
--itr 1 #>logs/ETTm2/$model_name'_'ETTm2_$seq_len'_'$pred_len.log
pred_len=720
python -u run_longExp.py \
--is_training 1 \
--root_path ./dataset/ \
--data_path ETTm2.csv \
--model_id ETTm2'_'$seq_len'_'$pred_len \
--model $model_name \
--data ETTm2 \
--features M \
--target 'OT' \
--seq_len $seq_len \
--label_len $label_len \
--pred_len $pred_len \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--d_model 32 \
--d_ff 64 \
--top_k 3 \
--conv_channel 32 \
--skip_channel 32 \
--dropout 0.3 \
--batch_size 32 \
--itr 1 #>logs/ETTm2/$model_name'_'ETTm2_$seq_len'_'$pred_len.log
================================================
FILE: scripts/Flight.sh
================================================
if [ ! -d "./logs" ]; then
mkdir ./logs
fi
if [ ! -d "./logs/Flight" ]; then
mkdir ./logs/Flight
fi
export CUDA_VISIBLE_DEVICES=2
seq_len=96
label_len=48
model_name=MSGNet
for pred_len in 96 192 336 720
do
python -u run_longExp.py \
--is_training 1 \
--root_path ./dataset/ \
--data_path Flight.csv \
--model_id Flight'_'$seq_len'_'$pred_len \
--model $model_name \
--data custom \
--features M \
--freq h \
--target 'UUEE' \
--seq_len $seq_len \
--label_len $label_len \
--pred_len $pred_len \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1 \
--d_model 16 \
--d_ff 32 \
--top_k 5 \
--conv_channel 32 \
--skip_channel 32 \
--node_dim 100 \
--batch_size 32 #>logs/Flight/$model_name'_'Flight_$seq_len'_'$pred_len.log
done
================================================
FILE: scripts/electricity.sh
================================================
if [ ! -d "./logs" ]; then
mkdir ./logs
fi
if [ ! -d "./logs/electricity" ]; then
mkdir ./logs/electricity
fi
export CUDA_VISIBLE_DEVICES=3
seq_len=96
label_len=48
model_name=MSGNet
pred_len=96
python -u run_longExp.py \
--is_training 1 \
--root_path ./dataset/ \
--data_path electricity.csv \
--model_id electricity'_'$seq_len'_'$pred_len \
--model $model_name \
--data custom \
--features M \
--freq h \
--target 'OT' \
--seq_len $seq_len \
--label_len $label_len \
--pred_len $pred_len \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--d_model 1024 \
--d_ff 512 \
--top_k 5 \
--conv_channel 16 \
--skip_channel 32 \
--node_dim 100 \
--batch_size 32 \
--itr 1 #>logs/electricity/$model_name'_'electricity_$seq_len'_'$pred_len.log
pred_len=192
python -u run_longExp.py \
--is_training 1 \
--root_path ./dataset/ \
--data_path electricity.csv \
--model_id electricity'_'$seq_len'_'$pred_len \
--model $model_name \
--data custom \
--features M \
--freq h \
--target 'OT' \
--seq_len $seq_len \
--label_len $label_len \
--pred_len $pred_len \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--d_model 1024 \
--d_ff 512 \
--top_k 5 \
--conv_channel 16 \
--skip_channel 32 \
--node_dim 100 \
--batch_size 32 \
--itr 1 #>logs/electricity/$model_name'_'electricity_$seq_len'_'$pred_len.log
pred_len=336
python -u run_longExp.py \
--is_training 1 \
--root_path ./dataset/ \
--data_path electricity.csv \
--model_id electricity'_'$seq_len'_'$pred_len \
--model $model_name \
--data custom \
--features M \
--freq h \
--target 'OT' \
--seq_len $seq_len \
--label_len $label_len \
--pred_len $pred_len \
--e_layers 3 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--d_model 1024 \
--d_ff 512 \
--top_k 5 \
--conv_channel 16 \
--skip_channel 32 \
--node_dim 100 \
--batch_size 32 \
--itr 1 #>logs/electricity/$model_name'_'electricity_$seq_len'_'$pred_len.log
pred_len=720
python -u run_longExp.py \
--is_training 1 \
--root_path ./dataset/ \
--data_path electricity.csv \
--model_id electricity'_'$seq_len'_'$pred_len \
--model $model_name \
--data custom \
--features M \
--freq h \
--target 'OT' \
--seq_len $seq_len \
--label_len $label_len \
--pred_len $pred_len \
--e_layers 3 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--d_model 1024 \
--d_ff 512 \
--top_k 5 \
--conv_channel 16 \
--skip_channel 32 \
--node_dim 100 \
--batch_size 32 \
--itr 1 #>logs/electricity/$model_name'_'electricity_$seq_len'_'$pred_len.log
================================================
FILE: scripts/exchange.sh
================================================
if [ ! -d "./logs" ]; then
mkdir ./logs
fi
if [ ! -d "./logs/exchange" ]; then
mkdir ./logs/exchange
fi
export CUDA_VISIBLE_DEVICES=2
seq_len=96
label_len=48
model_name=MSGNet
pred_len=96
python -u run_longExp.py \
--is_training 1 \
--root_path ./dataset/ \
--data_path exchange_rate.csv \
--model_id exchange'_'$seq_len'_'$pred_len \
--model $model_name \
--data custom \
--features M \
--freq h \
--target 'OT' \
--seq_len $seq_len \
--label_len $label_len \
--pred_len $pred_len \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 8 \
--dec_in 8 \
--c_out 8 \
--des 'Exp' \
--d_model 64 \
--d_ff 128 \
--top_k 3 \
--dropout 0.2 \
--conv_channel 16 \
--skip_channel 32 \
--batch_size 32 \
--itr 1 #>logs/exchange/$model_name'_'exchange_$seq_len'_'$pred_len.log
pred_len=192
python -u run_longExp.py \
--is_training 1 \
--root_path ./dataset/ \
--data_path exchange_rate.csv \
--model_id exchange'_'$seq_len'_'$pred_len \
--model $model_name \
--data custom \
--features M \
--freq h \
--target 'OT' \
--seq_len $seq_len \
--label_len $label_len \
--pred_len $pred_len \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 8 \
--dec_in 8 \
--c_out 8 \
--des 'Exp' \
--d_model 64 \
--d_ff 128 \
--top_k 5 \
--node_dim 30 \
--conv_channel 16 \
--skip_channel 32 \
--batch_size 32 \
--itr 1 #>logs/exchange/$model_name'_'exchange_$seq_len'_'$pred_len.log
pred_len=336
python -u run_longExp.py \
--is_training 1 \
--root_path ./dataset/ \
--data_path exchange_rate.csv \
--model_id exchange'_'$seq_len'_'$pred_len \
--model $model_name \
--data custom \
--features M \
--freq h \
--target 'OT' \
--seq_len $seq_len \
--label_len $label_len \
--pred_len $pred_len \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 8 \
--dec_in 8 \
--c_out 8 \
--des 'Exp' \
--d_model 64 \
--d_ff 128 \
--top_k 5 \
--node_dim 30 \
--conv_channel 16 \
--skip_channel 32 \
--batch_size 32 \
--itr 1 #>logs/exchange/$model_name'_'exchange_$seq_len'_'$pred_len.log
pred_len=720
python -u run_longExp.py \
--is_training 1 \
--root_path ./dataset/ \
--data_path exchange_rate.csv \
--model_id exchange'_'$seq_len'_'$pred_len \
--model $model_name \
--data custom \
--features M \
--freq h \
--target 'OT' \
--seq_len $seq_len \
--label_len $label_len \
--pred_len $pred_len \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 8 \
--dec_in 8 \
--c_out 8 \
--des 'Exp' \
--d_model 64 \
--d_ff 128 \
--top_k 5 \
--conv_channel 16 \
--skip_channel 32 \
--batch_size 32 \
--itr 1 #>logs/exchange/$model_name'_'exchange_$seq_len'_'$pred_len.log
================================================
FILE: scripts/weather.sh
================================================
if [ ! -d "./logs" ]; then
mkdir ./logs
fi
if [ ! -d "./logs/weather" ]; then
mkdir ./logs/weather
fi
export CUDA_VISIBLE_DEVICES=2
seq_len=96
label_len=48
model_name=MSGNet
pred_len=96
python -u run_longExp.py \
--is_training 1 \
--root_path ./dataset/ \
--data_path weather.csv \
--model_id weather'_'$seq_len'_'$pred_len \
--model $model_name \
--data custom \
--features M \
--freq h \
--target 'OT' \
--seq_len $seq_len \
--label_len $label_len \
--pred_len $pred_len \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--des 'Exp' \
--d_model 64 \
--d_ff 128 \
--top_k 5 \
--conv_channel 32 \
--skip_channel 32 \
--batch_size 32 \
--train_epochs 3 \
--itr 1 #>logs/weather/$model_name'_'weather_$seq_len'_'$pred_len.log
pred_len=192
python -u run_longExp.py \
--is_training 1 \
--root_path ./dataset/ \
--data_path weather.csv \
--model_id weather'_'$seq_len'_'$pred_len \
--model $model_name \
--data custom \
--features M \
--freq h \
--target 'OT' \
--seq_len $seq_len \
--label_len $label_len \
--pred_len $pred_len \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--des 'Exp' \
--d_model 64 \
--d_ff 128 \
--top_k 5 \
--conv_channel 32 \
--skip_channel 32 \
--batch_size 32 \
--itr 1 #>logs/weather/$model_name'_'weather_$seq_len'_'$pred_len.log
pred_len=336
python -u run_longExp.py \
--is_training 1 \
--root_path ./dataset/ \
--data_path weather.csv \
--model_id weather'_'$seq_len'_'$pred_len \
--model $model_name \
--data custom \
--features M \
--freq h \
--target 'OT' \
--seq_len $seq_len \
--label_len $label_len \
--pred_len $pred_len \
--e_layers 1 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--des 'Exp' \
--d_model 64 \
--d_ff 128 \
--top_k 5 \
--conv_channel 32 \
--skip_channel 32 \
--batch_size 32 \
--itr 1 #>logs/weather/$model_name'_'weather_$seq_len'_'$pred_len.log
pred_len=720
python -u run_longExp.py \
--is_training 1 \
--root_path ./dataset/ \
--data_path weather.csv \
--model_id weather'_'$seq_len'_'$pred_len \
--model $model_name \
--data custom \
--features M \
--freq h \
--target 'OT' \
--seq_len $seq_len \
--label_len $label_len \
--pred_len $pred_len \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--des 'Exp' \
--d_model 64 \
--d_ff 128 \
--top_k 5 \
--conv_channel 32 \
--skip_channel 32 \
--batch_size 32 \
--itr 1 #>logs/weather/$model_name'_'weather_$seq_len'_'$pred_len.log
================================================
FILE: utils/masking.py
================================================
import torch
class TriangularCausalMask():
def __init__(self, B, L, device="cpu"):
mask_shape = [B, 1, L, L]
with torch.no_grad():
self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device)
@property
def mask(self):
return self._mask
class ProbMask():
def __init__(self, B, H, L, index, scores, device="cpu"):
_mask = torch.ones(L, scores.shape[-1], dtype=torch.bool).to(device).triu(1)
_mask_ex = _mask[None, None, :].expand(B, H, L, scores.shape[-1])
indicator = _mask_ex[torch.arange(B)[:, None, None],
torch.arange(H)[None, :, None],
index, :].to(device)
self._mask = indicator.view(scores.shape).to(device)
@property
def mask(self):
return self._mask
================================================
FILE: utils/metrics.py
================================================
import numpy as np
def MAE(pred, true):
return np.mean(np.abs(pred - true))
def MAPE(pred, true):
return np.mean(np.abs((pred - true) / true))
def ND(pred, true):
return np.mean(np.abs(true - pred)) / np.mean(np.abs(true))
def MSE(pred, true):
return np.mean((pred - true) ** 2)
def RMSE(pred, true):
return np.sqrt(MSE(pred, true))
def NRMSE(pred, true):
return np.sqrt(np.mean(np.power((pred - true), 2))) / (np.mean(np.abs(true)))
def RSE(pred, true):
return np.sqrt(np.sum((true - pred) ** 2)) / np.sqrt(np.sum((true - true.mean()) ** 2))
def CORR(pred, true):
u = ((true - true.mean(0)) * (pred - pred.mean(0))).sum(0)
d = np.sqrt(((true - true.mean(0)) ** 2 * (pred - pred.mean(0)) ** 2).sum(0))
d += 1e-12
return 0.01*(u / d).mean(-1)
def MSPE(pred, true):
return np.mean(np.square((pred - true) / true))
def metric(pred, true):
mae = MAE(pred, true)
mse = MSE(pred, true)
rmse = RMSE(pred, true)
mape = MAPE(pred, true)
mspe = MSPE(pred, true)
rse = RSE(pred, true)
corr = CORR(pred, true)
nd = ND(pred,true)
nrmse = NRMSE(pred,true)
return mae, mse, rmse, mape, mspe, rse , corr, nd, nrmse
def metric2(pred, true):
mae = MAE(pred, true)
mse = MSE(pred, true)
rmse = RMSE(pred, true)
mape = MAPE(pred, true)
mspe = MSPE(pred, true)
rse = RSE(pred, true)
nd = ND(pred,true)
nrmse = NRMSE(pred,true)
return mae, mse, rmse, mape, mspe, rse , nd, nrmse
================================================
FILE: utils/timefeatures.py
================================================
from typing import List
import numpy as np
import pandas as pd
from pandas.tseries import offsets
from pandas.tseries.frequencies import to_offset
class TimeFeature:
def __init__(self):
pass
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
pass
def __repr__(self):
return self.__class__.__name__ + "()"
class SecondOfMinute(TimeFeature):
"""Minute of hour encoded as value between [-0.5, 0.5]"""
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
return index.second / 59.0 - 0.5
class MinuteOfHour(TimeFeature):
"""Minute of hour encoded as value between [-0.5, 0.5]"""
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
return index.minute / 59.0 - 0.5
class HourOfDay(TimeFeature):
"""Hour of day encoded as value between [-0.5, 0.5]"""
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
return index.hour / 23.0 - 0.5
class DayOfWeek(TimeFeature):
"""Hour of day encoded as value between [-0.5, 0.5]"""
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
return index.dayofweek / 6.0 - 0.5
class DayOfMonth(TimeFeature):
"""Day of month encoded as value between [-0.5, 0.5]"""
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
return (index.day - 1) / 30.0 - 0.5
class DayOfYear(TimeFeature):
"""Day of year encoded as value between [-0.5, 0.5]"""
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
return (index.dayofyear - 1) / 365.0 - 0.5
class MonthOfYear(TimeFeature):
"""Month of year encoded as value between [-0.5, 0.5]"""
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
return (index.month - 1) / 11.0 - 0.5
class WeekOfYear(TimeFeature):
"""Week of year encoded as value between [-0.5, 0.5]"""
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
return (index.isocalendar().week - 1) / 52.0 - 0.5
def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]:
"""
Returns a list of time features that will be appropriate for the given frequency string.
Parameters
----------
freq_str
Frequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc.
"""
features_by_offsets = {
offsets.YearEnd: [],
offsets.QuarterEnd: [MonthOfYear],
offsets.MonthEnd: [MonthOfYear],
offsets.Week: [DayOfMonth, WeekOfYear],
offsets.Day: [DayOfWeek, DayOfMonth, DayOfYear],
offsets.BusinessDay: [DayOfWeek, DayOfMonth, DayOfYear],
offsets.Hour: [HourOfDay, DayOfWeek, DayOfMonth, DayOfYear],
offsets.Minute: [
MinuteOfHour,
HourOfDay,
DayOfWeek,
DayOfMonth,
DayOfYear,
],
offsets.Second: [
SecondOfMinute,
MinuteOfHour,
HourOfDay,
DayOfWeek,
DayOfMonth,
DayOfYear,
],
}
offset = to_offset(freq_str)
for offset_type, feature_classes in features_by_offsets.items():
if isinstance(offset, offset_type):
return [cls() for cls in feature_classes]
supported_freq_msg = f"""
Unsupported frequency {freq_str}
The following frequencies are supported:
Y - yearly
alias: A
M - monthly
W - weekly
D - daily
B - business days
H - hourly
T - minutely
alias: min
S - secondly
"""
raise RuntimeError(supported_freq_msg)
def time_features(dates, freq='h'):
return np.vstack([feat(dates) for feat in time_features_from_frequency_str(freq)])
================================================
FILE: utils/tools.py
================================================
import numpy as np
import torch
import matplotlib.pyplot as plt
import time
plt.switch_backend('agg')
def adjust_learning_rate(optimizer, epoch, args):
# lr = args.learning_rate * (0.2 ** (epoch // 2))
if args.lradj == 'type1':
lr_adjust = {epoch: args.learning_rate * (0.5 ** ((epoch - 1) // 1))}
elif args.lradj == 'type2':
lr_adjust = {
2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6,
10: 5e-7, 15: 1e-7, 20: 5e-8
}
elif args.lradj == '3':
lr_adjust = {epoch: args.learning_rate if epoch < 10 else args.learning_rate*0.1}
elif args.lradj == '4':
lr_adjust = {epoch: args.learning_rate if epoch < 15 else args.learning_rate*0.1}
elif args.lradj == '5':
lr_adjust = {epoch: args.learning_rate if epoch < 25 else args.learning_rate*0.1}
elif args.lradj == '6':
lr_adjust = {epoch: args.learning_rate if epoch < 5 else args.learning_rate*0.1}
if epoch in lr_adjust.keys():
lr = lr_adjust[epoch]
for param_group in optimizer.param_groups:
param_group['lr'] = lr
print('Updating learning rate to {}'.format(lr))
class EarlyStopping:
def __init__(self, patience=7, verbose=False, delta=0):
self.patience = patience
self.verbose = verbose
self.counter = 0
self.best_score = None
self.early_stop = False
self.val_loss_min = np.Inf
self.delta = delta
def __call__(self, val_loss, model, path):
score = -val_loss
if self.best_score is None:
self.best_score = score
self.save_checkpoint(val_loss, model, path)
elif score < self.best_score + self.delta:
self.counter += 1
print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
if self.counter >= self.patience:
self.early_stop = True
else:
self.best_score = score
self.save_checkpoint(val_loss, model, path)
self.counter = 0
def save_checkpoint(self, val_loss, model, path):
if self.verbose:
print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...')
torch.save(model.state_dict(), path + '/' + 'checkpoint.pth')
self.val_loss_min = val_loss
class dotdict(dict):
"""dot.notation access to dictionary attributes"""
__getattr__ = dict.get
__setattr__ = dict.__setitem__
__delattr__ = dict.__delitem__
class StandardScaler():
def __init__(self, mean, std):
self.mean = mean
self.std = std
def transform(self, data):
return (data - self.mean) / self.std
def inverse_transform(self, data):
return (data * self.std) + self.mean
def visual(true, preds=None, name='./pic/test.pdf'):
"""
Results visualization
"""
plt.figure()
plt.plot(true, label='GroundTruth', linewidth=2)
if preds is not None:
plt.plot(preds, label='Prediction', linewidth=2)
plt.legend()
plt.show()
plt.savefig(name, bbox_inches='tight')
def test_params_flop(model,x_shape):
"""
If you want to thest former's flop, you need to give default value to inputs in model.forward(), the following code can only pass one argument to forward()
"""
model_params = 0
for parameter in model.parameters():
model_params += parameter.numel()
print('INFO: Trainable parameter count: {:.2f}M'.format(model_params / 1000000.0))
from ptflops import get_model_complexity_info
with torch.cuda.device(0):
macs, params = get_model_complexity_info(model.cuda(), x_shape, as_strings=True, print_per_layer_stat=True)
# print('Flops:' + flops)
# print('Params:' + params)
print('{:<30} {:<8}'.format('Computational complexity: ', macs))
print('{:<30} {:<8}'.format('Number of parameters: ', params))
gitextract_1z7x72rx/
├── README.md
├── data_provider/
│ ├── data_factory.py
│ └── data_loader.py
├── exp/
│ ├── exp_basic.py
│ ├── exp_main.py
│ └── exp_stat.py
├── layers/
│ ├── AutoCorrelation.py
│ ├── Autoformer_EncDec.py
│ ├── Embed.py
│ ├── MSGBlock.py
│ ├── SelfAttention_Family.py
│ └── Transformer_EncDec.py
├── models/
│ ├── Autoformer.py
│ ├── DLinear.py
│ ├── Informer.py
│ └── MSGNet.py
├── run_longExp.py
├── scripts/
│ ├── ETTh1.sh
│ ├── ETTh2.sh
│ ├── ETTm1.sh
│ ├── ETTm2.sh
│ ├── Flight.sh
│ ├── electricity.sh
│ ├── exchange.sh
│ └── weather.sh
└── utils/
├── masking.py
├── metrics.py
├── timefeatures.py
└── tools.py
SYMBOL INDEX (245 symbols across 19 files)
FILE: data_provider/data_factory.py
function data_provider (line 16) | def data_provider(args, flag):
FILE: data_provider/data_loader.py
class Dataset_Flight (line 14) | class Dataset_Flight(Dataset):
method __init__ (line 15) | def __init__(self, root_path, flag='train', size=None,
method __read_data__ (line 43) | def __read_data__(self):
method __getitem__ (line 93) | def __getitem__(self, index):
method __len__ (line 106) | def __len__(self):
method inverse_transform (line 109) | def inverse_transform(self, data):
class Dataset_Custom (line 112) | class Dataset_Custom(Dataset):
method __init__ (line 113) | def __init__(self, root_path, flag='train', size=None,
method __read_data__ (line 141) | def __read_data__(self):
method __getitem__ (line 193) | def __getitem__(self, index):
method __len__ (line 206) | def __len__(self):
method inverse_transform (line 209) | def inverse_transform(self, data):
class Dataset_Pred (line 212) | class Dataset_Pred(Dataset):
method __init__ (line 213) | def __init__(self, root_path, flag='pred', size=None,
method __read_data__ (line 240) | def __read_data__(self):
method __getitem__ (line 294) | def __getitem__(self, index):
method __len__ (line 310) | def __len__(self):
method inverse_transform (line 313) | def inverse_transform(self, data):
class Dataset_ETT_hour (line 316) | class Dataset_ETT_hour(Dataset):
method __init__ (line 317) | def __init__(self, root_path, flag='train', size=None,
method __read_data__ (line 346) | def __read_data__(self):
method __getitem__ (line 381) | def __getitem__(self, index):
method __len__ (line 394) | def __len__(self):
method inverse_transform (line 397) | def inverse_transform(self, data):
class Dataset_ETT_minute (line 400) | class Dataset_ETT_minute(Dataset):
method __init__ (line 401) | def __init__(self, root_path, flag='train', size=None,
method __read_data__ (line 429) | def __read_data__(self):
method __getitem__ (line 473) | def __getitem__(self, index):
method __len__ (line 486) | def __len__(self):
method inverse_transform (line 489) | def inverse_transform(self, data):
FILE: exp/exp_basic.py
class Exp_Basic (line 6) | class Exp_Basic(object):
method __init__ (line 7) | def __init__(self, args):
method _build_model (line 12) | def _build_model(self):
method _acquire_device (line 16) | def _acquire_device(self):
method _get_data (line 27) | def _get_data(self):
method vali (line 30) | def vali(self):
method train (line 33) | def train(self):
method test (line 36) | def test(self):
FILE: exp/exp_main.py
class Exp_Main (line 20) | class Exp_Main(Exp_Basic):
method __init__ (line 21) | def __init__(self, args):
method _build_model (line 24) | def _build_model(self):
method _get_data (line 38) | def _get_data(self, flag):
method _select_optimizer (line 42) | def _select_optimizer(self):
method _select_criterion (line 46) | def _select_criterion(self):
method vali (line 51) | def vali(self, vali_data, vali_loader, criterion):
method train (line 96) | def train(self, setting):
method test (line 202) | def test(self, setting, test=0):
method predict (line 302) | def predict(self, setting, load=False):
FILE: exp/exp_stat.py
class Exp_Main (line 20) | class Exp_Main(Exp_Basic):
method __init__ (line 21) | def __init__(self, args):
method _build_model (line 24) | def _build_model(self):
method _get_data (line 35) | def _get_data(self, flag):
method test (line 39) | def test(self, setting, test=0):
FILE: layers/AutoCorrelation.py
class AutoCorrelation (line 11) | class AutoCorrelation(nn.Module):
method __init__ (line 18) | def __init__(self, mask_flag=True, factor=1, scale=None, attention_dro...
method time_delay_agg_training (line 26) | def time_delay_agg_training(self, values, corr):
method time_delay_agg_inference (line 50) | def time_delay_agg_inference(self, values, corr):
method time_delay_agg_full (line 81) | def time_delay_agg_full(self, values, corr):
method forward (line 106) | def forward(self, queries, keys, values, attn_mask):
class AutoCorrelationLayer (line 137) | class AutoCorrelationLayer(nn.Module):
method __init__ (line 138) | def __init__(self, correlation, d_model, n_heads, d_keys=None,
method forward (line 152) | def forward(self, queries, keys, values, attn_mask):
FILE: layers/Autoformer_EncDec.py
class my_Layernorm (line 6) | class my_Layernorm(nn.Module):
method __init__ (line 10) | def __init__(self, channels):
method forward (line 14) | def forward(self, x):
class moving_avg (line 20) | class moving_avg(nn.Module):
method __init__ (line 24) | def __init__(self, kernel_size, stride):
method forward (line 29) | def forward(self, x):
class series_decomp (line 39) | class series_decomp(nn.Module):
method __init__ (line 43) | def __init__(self, kernel_size):
method forward (line 47) | def forward(self, x):
class EncoderLayer (line 53) | class EncoderLayer(nn.Module):
method __init__ (line 57) | def __init__(self, attention, d_model, d_ff=None, moving_avg=25, dropo...
method forward (line 68) | def forward(self, x, attn_mask=None):
class Encoder (line 82) | class Encoder(nn.Module):
method __init__ (line 86) | def __init__(self, attn_layers, conv_layers=None, norm_layer=None):
method forward (line 92) | def forward(self, x, attn_mask=None):
class DecoderLayer (line 112) | class DecoderLayer(nn.Module):
method __init__ (line 116) | def __init__(self, self_attention, cross_attention, d_model, c_out, d_...
method forward (line 132) | def forward(self, x, cross, x_mask=None, cross_mask=None):
class Decoder (line 153) | class Decoder(nn.Module):
method __init__ (line 157) | def __init__(self, layers, norm_layer=None, projection=None):
method forward (line 163) | def forward(self, x, cross, x_mask=None, cross_mask=None, trend=None):
FILE: layers/Embed.py
class PositionalEmbedding (line 8) | class PositionalEmbedding(nn.Module):
method __init__ (line 9) | def __init__(self, d_model, max_len=5000):
method forward (line 23) | def forward(self, x):
class TokenEmbedding (line 27) | class TokenEmbedding(nn.Module):
method __init__ (line 28) | def __init__(self, c_in, d_model):
method forward (line 37) | def forward(self, x):
class FixedEmbedding (line 42) | class FixedEmbedding(nn.Module):
method __init__ (line 43) | def __init__(self, c_in, d_model):
method forward (line 58) | def forward(self, x):
class TemporalEmbedding (line 62) | class TemporalEmbedding(nn.Module):
method __init__ (line 63) | def __init__(self, d_model, embed_type='fixed', freq='h'):
method forward (line 80) | def forward(self, x):
class TimeFeatureEmbedding (line 91) | class TimeFeatureEmbedding(nn.Module):
method __init__ (line 92) | def __init__(self, d_model, embed_type='timeF', freq='h'):
method forward (line 98) | def forward(self, x):
class DataEmbedding (line 102) | class DataEmbedding(nn.Module):
method __init__ (line 103) | def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropou...
method forward (line 115) | def forward(self, x, x_mark):
class DataEmbedding_wo_pos (line 123) | class DataEmbedding_wo_pos(nn.Module):
method __init__ (line 124) | def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropou...
method forward (line 134) | def forward(self, x, x_mark):
class DataEmbedding_wo_pos_temp (line 138) | class DataEmbedding_wo_pos_temp(nn.Module):
method __init__ (line 139) | def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropou...
method forward (line 149) | def forward(self, x, x_mark):
class DataEmbedding_wo_temp (line 153) | class DataEmbedding_wo_temp(nn.Module):
method __init__ (line 154) | def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropou...
method forward (line 164) | def forward(self, x, x_mark):
FILE: layers/MSGBlock.py
class Predict (line 11) | class Predict(nn.Module):
method __init__ (line 12) | def __init__(self, individual, c_out, seq_len, pred_len, dropout):
method forward (line 28) | def forward(self, x):
class Attention_Block (line 43) | class Attention_Block(nn.Module):
method __init__ (line 44) | def __init__(self, d_model, d_ff=None, n_heads=8, dropout=0.1, activa...
method forward (line 55) | def forward(self, x, attn_mask=None):
class self_attention (line 69) | class self_attention(nn.Module):
method __init__ (line 70) | def __init__(self, attention, d_model ,n_heads):
method forward (line 83) | def forward(self, queries ,keys ,values, attn_mask= None):
class FullAttention (line 102) | class FullAttention(nn.Module):
method __init__ (line 103) | def __init__(self, mask_flag=True, factor=5, scale=None, attention_dro...
method forward (line 110) | def forward(self, queries, keys, values, attn_mask):
class GraphBlock (line 128) | class GraphBlock(nn.Module):
method __init__ (line 129) | def __init__(self, c_out , d_model , conv_channel, skip_channel,
method forward (line 144) | def forward(self, x):
class nconv (line 155) | class nconv(nn.Module):
method __init__ (line 156) | def __init__(self):
method forward (line 159) | def forward(self,x, A):
class linear (line 165) | class linear(nn.Module):
method __init__ (line 166) | def __init__(self,c_in,c_out,bias=True):
method forward (line 170) | def forward(self,x):
class mixprop (line 174) | class mixprop(nn.Module):
method __init__ (line 175) | def __init__(self,c_in,c_out,gdep,dropout,alpha):
method forward (line 183) | def forward(self, x, adj):
class simpleVIT (line 197) | class simpleVIT(nn.Module):
method __init__ (line 198) | def __init__(self, in_channels, emb_size, patch_size=2, depth=1, num_h...
method _initialize_weights (line 217) | def _initialize_weights(self):
method forward (line 224) | def forward(self,x):
class MultiHeadAttention (line 235) | class MultiHeadAttention(nn.Module):
method __init__ (line 236) | def __init__(self, emb_size, num_heads, dropout):
method forward (line 246) | def forward(self, x: Tensor, mask: Tensor = None) -> Tensor:
class FeedForward (line 264) | class FeedForward(nn.Module):
method __init__ (line 265) | def __init__(self, dim, hidden_dim):
method forward (line 273) | def forward(self, x):
FILE: layers/SelfAttention_Family.py
class FullAttention (line 13) | class FullAttention(nn.Module):
method __init__ (line 14) | def __init__(self, mask_flag=True, factor=5, scale=None, attention_dro...
method forward (line 21) | def forward(self, queries, keys, values, attn_mask):
class ProbAttention (line 38) | class ProbAttention(nn.Module):
method __init__ (line 39) | def __init__(self, mask_flag=True, factor=5, scale=None, attention_dro...
method _prob_QK (line 47) | def _prob_QK(self, Q, K, sample_k, n_top): # n_top: c*ln(L_q)
method _get_initial_context (line 69) | def _get_initial_context(self, V, L_Q):
method _update_context (line 80) | def _update_context(self, context_in, V, scores, index, L_Q, attn_mask):
method forward (line 99) | def forward(self, queries, keys, values, attn_mask):
class AttentionLayer (line 127) | class AttentionLayer(nn.Module):
method __init__ (line 128) | def __init__(self, attention, d_model, n_heads, d_keys=None,
method forward (line 142) | def forward(self, queries, keys, values, attn_mask):
FILE: layers/Transformer_EncDec.py
class ConvLayer (line 6) | class ConvLayer(nn.Module):
method __init__ (line 7) | def __init__(self, c_in):
method forward (line 18) | def forward(self, x):
class EncoderLayer (line 26) | class EncoderLayer(nn.Module):
method __init__ (line 27) | def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activat...
method forward (line 37) | def forward(self, x, attn_mask=None):
class Encoder (line 51) | class Encoder(nn.Module):
method __init__ (line 52) | def __init__(self, attn_layers, conv_layers=None, norm_layer=None):
method forward (line 58) | def forward(self, x, attn_mask=None):
class DecoderLayer (line 78) | class DecoderLayer(nn.Module):
method __init__ (line 79) | def __init__(self, self_attention, cross_attention, d_model, d_ff=None,
method forward (line 93) | def forward(self, x, cross, x_mask=None, cross_mask=None):
class Decoder (line 112) | class Decoder(nn.Module):
method __init__ (line 113) | def __init__(self, layers, norm_layer=None, projection=None):
method forward (line 120) | def forward(self, x, cross, x_mask=None, cross_mask=None ,external=None):
FILE: models/Autoformer.py
class Model (line 11) | class Model(nn.Module):
method __init__ (line 16) | def __init__(self, configs):
method forward (line 99) | def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec,
FILE: models/DLinear.py
class moving_avg (line 6) | class moving_avg(nn.Module):
method __init__ (line 10) | def __init__(self, kernel_size, stride):
method forward (line 15) | def forward(self, x):
class series_decomp (line 26) | class series_decomp(nn.Module):
method __init__ (line 30) | def __init__(self, kernel_size):
method forward (line 34) | def forward(self, x):
class Model (line 39) | class Model(nn.Module):
method __init__ (line 43) | def __init__(self, configs):
method forward (line 73) | def forward(self, x):
FILE: models/Informer.py
class Model (line 11) | class Model(nn.Module):
method __init__ (line 15) | def __init__(self, configs):
method forward (line 89) | def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec,
FILE: models/MSGNet.py
function FFT_for_Period (line 11) | def FFT_for_Period(x, k=2):
class ScaleGraphBlock (line 22) | class ScaleGraphBlock(nn.Module):
method __init__ (line 23) | def __init__(self, configs):
method forward (line 41) | def forward(self, x):
class Model (line 81) | class Model(nn.Module):
method __init__ (line 82) | def __init__(self, configs):
method forward (line 111) | def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
FILE: utils/masking.py
class TriangularCausalMask (line 4) | class TriangularCausalMask():
method __init__ (line 5) | def __init__(self, B, L, device="cpu"):
method mask (line 11) | def mask(self):
class ProbMask (line 15) | class ProbMask():
method __init__ (line 16) | def __init__(self, B, H, L, index, scores, device="cpu"):
method mask (line 25) | def mask(self):
FILE: utils/metrics.py
function MAE (line 3) | def MAE(pred, true):
function MAPE (line 6) | def MAPE(pred, true):
function ND (line 9) | def ND(pred, true):
function MSE (line 12) | def MSE(pred, true):
function RMSE (line 15) | def RMSE(pred, true):
function NRMSE (line 18) | def NRMSE(pred, true):
function RSE (line 21) | def RSE(pred, true):
function CORR (line 25) | def CORR(pred, true):
function MSPE (line 32) | def MSPE(pred, true):
function metric (line 36) | def metric(pred, true):
function metric2 (line 49) | def metric2(pred, true):
FILE: utils/timefeatures.py
class TimeFeature (line 9) | class TimeFeature:
method __init__ (line 10) | def __init__(self):
method __call__ (line 13) | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
method __repr__ (line 16) | def __repr__(self):
class SecondOfMinute (line 20) | class SecondOfMinute(TimeFeature):
method __call__ (line 23) | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
class MinuteOfHour (line 27) | class MinuteOfHour(TimeFeature):
method __call__ (line 30) | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
class HourOfDay (line 34) | class HourOfDay(TimeFeature):
method __call__ (line 37) | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
class DayOfWeek (line 41) | class DayOfWeek(TimeFeature):
method __call__ (line 44) | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
class DayOfMonth (line 48) | class DayOfMonth(TimeFeature):
method __call__ (line 51) | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
class DayOfYear (line 55) | class DayOfYear(TimeFeature):
method __call__ (line 58) | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
class MonthOfYear (line 62) | class MonthOfYear(TimeFeature):
method __call__ (line 65) | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
class WeekOfYear (line 69) | class WeekOfYear(TimeFeature):
method __call__ (line 72) | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
function time_features_from_frequency_str (line 76) | def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]:
function time_features (line 133) | def time_features(dates, freq='h'):
FILE: utils/tools.py
function adjust_learning_rate (line 9) | def adjust_learning_rate(optimizer, epoch, args):
class EarlyStopping (line 33) | class EarlyStopping:
method __init__ (line 34) | def __init__(self, patience=7, verbose=False, delta=0):
method __call__ (line 43) | def __call__(self, val_loss, model, path):
method save_checkpoint (line 58) | def save_checkpoint(self, val_loss, model, path):
class dotdict (line 65) | class dotdict(dict):
class StandardScaler (line 72) | class StandardScaler():
method __init__ (line 73) | def __init__(self, mean, std):
method transform (line 77) | def transform(self, data):
method inverse_transform (line 80) | def inverse_transform(self, data):
function visual (line 84) | def visual(true, preds=None, name='./pic/test.pdf'):
function test_params_flop (line 96) | def test_params_flop(model,x_shape):
Condensed preview — 29 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (152K chars).
[
{
"path": "README.md",
"chars": 2095,
"preview": "# MSGNet (AAAI2024)\n\nPaper Link:[MSGNet: Learning Multi-Scale Inter-Series Correlations for Multivariate Time Series For"
},
{
"path": "data_provider/data_factory.py",
"chars": 1561,
"preview": "from .data_loader import Dataset_ETT_hour, Dataset_ETT_minute, \\\n Dataset_Custom, Dataset_Pred,Dataset_Flight\nfrom to"
},
{
"path": "data_provider/data_loader.py",
"chars": 18680,
"preview": "import os\nimport numpy as np\nimport pandas as pd\nimport os\nimport torch\nfrom torch.utils.data import Dataset, DataLoader"
},
{
"path": "exp/exp_basic.py",
"chars": 885,
"preview": "import os\nimport torch\nimport numpy as np\n\n\nclass Exp_Basic(object):\n def __init__(self, args):\n self.args = a"
},
{
"path": "exp/exp_main.py",
"chars": 15599,
"preview": "from data_provider.data_factory import data_provider\nfrom .exp_basic import Exp_Basic\nfrom models import Informer, Autof"
},
{
"path": "exp/exp_stat.py",
"chars": 3572,
"preview": "from data_provider.data_factory import data_provider\nfrom exp.exp_basic import Exp_Basic\nfrom utils.tools import EarlySt"
},
{
"path": "layers/AutoCorrelation.py",
"chars": 6896,
"preview": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport matplotlib.pyplot as plt\nimport numpy as np\nim"
},
{
"path": "layers/Autoformer_EncDec.py",
"chars": 6162,
"preview": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\n\nclass my_Layernorm(nn.Module):\n \"\"\"\n Special "
},
{
"path": "layers/Embed.py",
"chars": 6778,
"preview": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom torch.nn.utils import weight_norm\nimport math\n\n\n"
},
{
"path": "layers/MSGBlock.py",
"chars": 10227,
"preview": "from math import sqrt\r\nimport numpy as np\r\nimport torch.nn as nn\r\nimport torch.nn.functional as F\r\nimport torch\r\nfrom to"
},
{
"path": "layers/SelfAttention_Family.py",
"chars": 6071,
"preview": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nimport matplotlib.pyplot as plt\n\nimport numpy as np\n"
},
{
"path": "layers/Transformer_EncDec.py",
"chars": 4715,
"preview": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\n\nclass ConvLayer(nn.Module):\n def __init__(self, "
},
{
"path": "models/Autoformer.py",
"chars": 6165,
"preview": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom layers.Embed import DataEmbedding, DataEmbedding"
},
{
"path": "models/DLinear.py",
"chars": 3748,
"preview": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport numpy as np\n\nclass moving_avg(nn.Module):\n "
},
{
"path": "models/Informer.py",
"chars": 5080,
"preview": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom utils.masking import TriangularCausalMask, ProbM"
},
{
"path": "models/MSGNet.py",
"chars": 5387,
"preview": "import numpy as np\r\n# import pywt\r\nimport torch\r\nimport torch.nn as nn\r\nimport torch.nn.functional as F\r\nimport torch.ff"
},
{
"path": "run_longExp.py",
"chars": 10434,
"preview": "import argparse\nimport os\nimport time\nfrom multiprocessing import freeze_support\nimport torch\nfrom exp.exp_main import E"
},
{
"path": "scripts/ETTh1.sh",
"chars": 2903,
"preview": "if [ ! -d \"./logs\" ]; then\n mkdir ./logs\nfi\n\nif [ ! -d \"./logs/ETTh1\" ]; then\n mkdir ./logs/ETTh1\nfi\nexport CUDA_V"
},
{
"path": "scripts/ETTh2.sh",
"chars": 2818,
"preview": "if [ ! -d \"./logs\" ]; then\n mkdir ./logs\nfi\n\nif [ ! -d \"./logs/ETTh2\" ]; then\n mkdir ./logs/ETTh2\nfi\nexport CUDA_V"
},
{
"path": "scripts/ETTm1.sh",
"chars": 2761,
"preview": "if [ ! -d \"./logs\" ]; then\n mkdir ./logs\nfi\n\nif [ ! -d \"./logs/ETTm1\" ]; then\n mkdir ./logs/ETTm1\nfi\n\nexport CUDA_"
},
{
"path": "scripts/ETTm2.sh",
"chars": 2843,
"preview": "if [ ! -d \"./logs\" ]; then\n mkdir ./logs\nfi\n\nif [ ! -d \"./logs/ETTm2\" ]; then\n mkdir ./logs/ETTm2\nfi\n\nexport CUDA_"
},
{
"path": "scripts/Flight.sh",
"chars": 956,
"preview": "if [ ! -d \"./logs\" ]; then\n mkdir ./logs\nfi\n\nif [ ! -d \"./logs/Flight\" ]; then\n mkdir ./logs/Flight\nfi\n\nexport CUD"
},
{
"path": "scripts/electricity.sh",
"chars": 2972,
"preview": "if [ ! -d \"./logs\" ]; then\n mkdir ./logs\nfi\n\nif [ ! -d \"./logs/electricity\" ]; then\n mkdir ./logs/electricity\nfi\n\n"
},
{
"path": "scripts/exchange.sh",
"chars": 2964,
"preview": "if [ ! -d \"./logs\" ]; then\n mkdir ./logs\nfi\n\nif [ ! -d \"./logs/exchange\" ]; then\n mkdir ./logs/exchange\nfi\n\nexport"
},
{
"path": "scripts/weather.sh",
"chars": 2825,
"preview": "if [ ! -d \"./logs\" ]; then\n mkdir ./logs\nfi\n\nif [ ! -d \"./logs/weather\" ]; then\n mkdir ./logs/weather\nfi\nexport CU"
},
{
"path": "utils/masking.py",
"chars": 832,
"preview": "import torch\n\n\nclass TriangularCausalMask():\n def __init__(self, B, L, device=\"cpu\"):\n mask_shape = [B, 1, L, "
},
{
"path": "utils/metrics.py",
"chars": 1496,
"preview": "import numpy as np\n\ndef MAE(pred, true):\n return np.mean(np.abs(pred - true))\n\ndef MAPE(pred, true):\n return np.me"
},
{
"path": "utils/timefeatures.py",
"chars": 3743,
"preview": "from typing import List\n\nimport numpy as np\nimport pandas as pd\nfrom pandas.tseries import offsets\nfrom pandas.tseries.f"
},
{
"path": "utils/tools.py",
"chars": 3928,
"preview": "import numpy as np\nimport torch\nimport matplotlib.pyplot as plt\nimport time\n\nplt.switch_backend('agg')\n\n\ndef adjust_lear"
}
]
About this extraction
This page contains the full source code of the YoZhibo/MSGNet GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 29 files (141.7 KB), approximately 37.8k tokens, and a symbol index with 245 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.