Repository: eugenelet/Meta-rPPG
Branch: master
Commit: fe9d526fdd9c
Files: 13
Total size: 55.8 KB
Directory structure:
gitextract_uds3fg4s/
├── LICENSE
├── README.md
├── data/
│ ├── __init__.py
│ ├── data_utils.py
│ ├── dataload.py
│ └── pre_dataload.py
├── model/
│ ├── __init__.py
│ ├── loss.py
│ ├── main_model.py
│ └── sub_model.py
├── requirements.txt
├── settings.py
└── train.py
================================================
FILE CONTENTS
================================================
================================================
FILE: LICENSE
================================================
MIT License
Copyright (c) 2020 Eugene Lee
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: README.md
================================================
[](https://github.com/eugenelet/NeuralScale-Private/blob/master/LICENSE)

# Meta-rPPG: Remote Heart Rate Estimation Using a Transductive Meta-Learner
This repository is the official implementation of *Meta-rPPG: Remote Heart Rate Estimation Using a Transductive Meta-Learner* that has been accepted to ECCV 2020.
## Heatmap Visualization
Left to right:
1. Cropped input image
2. End-to-end trained model (baseline)
3. Meta-rPPG (transducive inference)
4. Top to down: rPPG signal, Power Spectral Density (PSD), Predicted and ground truth heart rate
## Requirements
To install requirements:
```setup
pip install -r requirements.txt
```
All experiments can be run on a single NVIDIA GTX1080Ti GPU.
The code was tested with python3.6 the following software versions:
| Software | version |
| ------------- |-------------|
| cuDNN | 7.6.5 |
| Pytorch | 1.5.0 |
| CUDA | 10.2 |
## Training
### Training Data Preparation
Download training data ([example.pth](https://drive.google.com/file/d/1Z4GWiYjoQSXMYBhxBRZK9gUa1mYP0JsN/view?usp=sharing)) from Google Drive. Due to privacy issue (face images), provided data contains only a subset of the entire training data, i.e. contains faces of the authors of this paper.
Move `example.pth` to `data/` directory:
```
mv example.pth data/
```
### Begin Training
To begin training, run:
```
python3 train.py
```
## Validation Data
Validation data can be requested from:
[MAHNOB-HCI](https://mahnob-db.eu/hci-tagging/)
[UBFC-rPPG](https://sites.google.com/view/ybenezeth/ubfcrppg)
## Contributing
If you find this work useful, consider citing our work using the following bibTex:
```
@inproceedings{lee2020meta,
title={Meta-rPPG: Remote Heart Rate Estimation Using a Transductive Meta-Learner},
author={Lee, Eugene and Chen, Evan and Lee, Chen-Yi},
booktitle={European Conference on Computer Vision (ECCV)},
year={2020}
}
```
================================================
FILE: data/__init__.py
================================================
"""This package includes all the modules related to data loading and preprocessing
To add a custom dataset class called 'dummy', you need to add a file called 'dummy_dataset.py' and define a subclass 'DummyDataset' inherited from BaseDataset.
You need to implement four functions:
-- <__init__>: initialize the class, first call BaseDataset.__init__(self, opt).
-- <__len__>: return the size of dataset.
-- <__getitem__>: get a data point from data loader.
-- : (optionally) add dataset-specific options and set default options.
Now you can use the dataset class by specifying flag '--dataset_mode dummy'.
See our template dataset class 'template_dataset.py' for more details.
"""
from .data_utils import testing
from .dataload import SlideWindowDataLoader
================================================
FILE: data/data_utils.py
================================================
from __future__ import print_function
import numpy as np
import os
import matplotlib.pyplot as plt
import pickle
import itertools
import torch
from scipy import signal
from scipy.signal import butter, lfilter
class FunctionSet():
def __init__(self, sample_rate=30.0, display_port=8093):
self.fps = sample_rate
def CHROM_method(self, data):
'''CHROM matrix'''
project_matrix = np.array([[3, -2, 0], [1.5, 1, -1.5]])
frames = data['frame'].copy()
mask = data['mask'].copy()
mask /= 255
mask = mask.astype(float)
rgb_mean = self.spatial_mean(frames, mask)
rgb_mean = rgb_mean.transpose()
rgb_mean = rgb_mean[[2, 1, 0], :]
win_size = rgb_mean.shape[1]
C_norm = np.zeros([3, win_size])
for i in range(win_size):
C_norm[:, i] = rgb_mean[:, i] / np.mean(rgb_mean, axis=1)
S = np.matmul(project_matrix, C_norm)
S1 = S[0,:]
S2 = S[1,:]
alpha = np.std(S1)/np.std(S2)
h = S1 + alpha*S2 # POS
h = butter_bandpass_filter(h, 0.4, 5, self.fps, order=6)
return h - np.mean(h)
def POS_method(self, data):
'''POS matrix'''
project_matrix = np.array([[0, 1, -1], [-2, 1, 1]])
frames = data['frame'].copy()
mask = data['mask'].copy()
mask /= 255
mask = mask.astype(float)
rgb_mean = self.spatial_mean(frames, mask)
rgb_mean = rgb_mean.transpose()
rgb_mean = rgb_mean[[2, 1, 0], :]
win_size = rgb_mean.shape[1]
C_norm = np.zeros([3, win_size])
for i in range(win_size):
C_norm[:, i] = rgb_mean[:, i] / np.mean(rgb_mean, axis=1)
S = np.matmul(project_matrix, C_norm)
S1 = S[0,:]
S2 = S[1,:]
alpha = np.std(S1)/np.std(S2)
h = S1 + alpha*S2 # POS
h = butter_bandpass_filter(h, 0.4, 5, self.fps, order=6)
return h - np.mean(h)
def spatial_mean(self, frames, mask):
t0 = np.sum(frames, axis=(0, 2, 3))
t1 = np.sum(mask, axis=(0,2,3))
mean = t0/t1
return mean
# pdb.set_trace()
def butter_bandpass(lowcut, highcut, fs, order=5):
nyq = 0.5 * fs
low = lowcut / nyq
high = highcut / nyq
b, a = butter(order, [low, high], btype='band')
return b, a
def butter_bandpass_filter(data, lowcut, highcut, fs, order=5):
b, a = butter_bandpass(lowcut, highcut, fs, order=order)
# y = lfilter(b, a, data)
y = signal.filtfilt(b, a, data, method="pad")
return y
def normed(a):
amin, amax = np.min(a), np.max(a)
t = a.copy()
# pdb.set_trace()
for i in range(a.shape[0]):
t[i] = (a[i]-amin) / (amax-amin)
return t
def testing(opt, model, testset, data_idx, epoch):
results, true_rPPG = model.get_current_results(0)
loss = model.get_current_losses(0)
test_data = testset[0, 0]
# model.eval() rnn can't be adapted in eval mode
model.set_input(test_data)
model.fewshot_test(epoch)
t_results, t_true_rPPG = model.get_current_results(1)
test_loss = model.get_current_losses(1)
model.train()
return loss[2], test_loss
def amp_equalize(sig):
# sig = Sig.clone()
mean = sig.mean()
min = sig.min()
max = sig.max()
ans = (sig - mean)/(max-min)*10
yhat = torch.from_numpy(signal.savgol_filter(ans, 11, 5))
# pdb.set_trace()
return yhat
def get_bpm(Sig, rate= 30.0):
sig = Sig.copy()
n = len(sig)
# print(n)
fps = rate
win = signal.hann(sig.size)
sig = sig - np.expand_dims(np.mean(sig, -1), -1)
sig = sig * win
filtered_sig = butter_bandpass_filter(sig, 0.4, 4, fps, order=3)
f, Pxx_den = signal.welch(sig, fps, nperseg=n)
index = np.argmax(Pxx_den)
HR_estimate = round(f[index]*60.0)
return HR_estimate
================================================
FILE: data/dataload.py
================================================
import torch
from data.pre_dataload import BaselineDataset
# from Visualize.visualizer import Visualizer
import random
from scipy import signal
import numpy as np
import pdb
# pdb.set_trace()
class SlideWindowDataLoader():
"""Wrapper class of Dataset class that performs multi-threaded data loading.
The class is only a container of the dataset.
There are two ways to get a data out of the Loader.
1) feed in a list of videos: input = dataset[[0,3,5,10], 2020]. This gets the data starting at 2020 frame from 0, 3, 5, 10th video.
2) feed a single value of videos: input = dataset[0, 2020]. This gets a batch of data starting at 2020 from the 0th video.
"""
def __init__(self, opt, isTrain):
"""Initialize this class
"""
# self.visualizer = Visualizer(opt, isTrain=True)
# self.visualizer.reset()
self.opt = opt
self.isTrain = isTrain
self.dataset = BaselineDataset(opt, isTrain)
if self.isTrain:
print("dataset [%s-%s] was created" % ('rPPGDataset', 'train'))
else:
print("dataset [%s-%s] was created" % ('rPPGDataset', 'test'))
self.length = int(len(self.dataset))
self.num_tasks = self.dataset.num_tasks
self.task_len = self.dataset.task_len
def load_data(self):
return self
def __len__(self):
"""Return the number of data in the dataset"""
return self.length
def __getitem__(self, items):
"""Return a batch of data
items -- [task_num, index of data for specified task]
"""
inputs = []
ppg = []
frame = []
mask = []
if self.isTrain:
batch = self.opt.batch_size
else:
batch = self.opt.batch_size + self.opt.fewshots
if not isinstance(items[0], list):
for i in range(batch):
dat = self.dataset[items[0], items[1]+60*i]
inputs.append(dat['input'])
ppg.append(dat['PPG'])
else:
for idx in items[0]:
dat = self.dataset[idx, items[1]]
inputs.append(dat['input'])
ppg.append(dat['PPG'])
# pdb.set_trace()
inputs = torch.stack(inputs)
ppg = torch.stack(ppg)
return {'input': inputs, 'rPPG': ppg}
def quantify(self,rppg):
quantified = torch.empty(rppg.shape[0], dtype=torch.long)
binary = torch.ones(rppg.shape[0], dtype=torch.long)
tmax = rppg.max()
tmin = rppg.min()
interval = (tmax - tmin)/39
for i in range(len(quantified)):
quantified[i] = ((rppg[i] - tmin)/interval).round().long()
return quantified
def __call__(self):
output_list = []
for idx in range(self.num_tasks):
tmp = self.dataset(idx)
tmp['rPPG'] = tmp.pop('PPG')
output_list.append(tmp)
return output_list
# pdb.set_trace()
================================================
FILE: data/pre_dataload.py
================================================
from __future__ import print_function
import torch
import os
# import pickle
import numpy as np
import sys
from sklearn.preprocessing import normalize
from scipy import signal
import matplotlib.pyplot as plt
from scipy.signal import butter, lfilter
from data.data_utils import butter_bandpass_filter
import pdb
class BaselineDataset():
"""Preprocessing class of Dataset class that performs multi-threaded data loading
"""
def __init__(self, opt, isTrain):
"""Initialize this dataset class.
Parameters:
opt (Option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions
The self.dataset is a list of facial data, the length of the list is 18, and each element is a torch tensor of shape [2852, 3, 64, 64]
The self.maskset is the corresponding mask data, constructed of 0 and 255, so it determines the landmarks we're using in self.dataset
"""
# get the image directory
self.isTrain = isTrain
self.opt = opt
temp_data = torch.load('data/example.pth')
if self.isTrain:
self.maskset = temp_data['mask'][:5]
self.dataset = temp_data['image'][:5]
self.ppg_dataset = temp_data['ppg'][:5]
self.num_tasks = len(self.dataset)
self.task_len = [self.dataset[i].shape[0]
for i in range(len(self.dataset))]
else:
self.maskset = temp_data['mask'][-1:]
self.dataset = temp_data['image'][-1:]
self.ppg_dataset = temp_data['ppg'][-1:]
self.num_tasks = 1
self.task_len = self.dataset[0].shape[0]
# pdb.set_trace()
self.length = 0
for i in range(len(self.ppg_dataset)):
self.length += self.ppg_dataset[i].shape[0] - self.opt.win_size
def __getitem__(self, items):
"""Return a data point and its metadata information.
Parameters:
items -- [task_number, index of data for specified task]
items[0] -- a integer in range 0 to 4 in train mode, only 0 available in test mode
items[1] -- determined by the length of the video
Returns a dictionary that contains input, PPG, diff and orig
input - - a set of frames from the pickle file (60 x 3 x 64 x 64)
PPG - - the corresponding signal (60)
"""
inputs = []
masks = []
if not self.isTrain:
# pdb.set_trace()
for i in range(items[1], items[1] + self.opt.win_size):
frame = self.dataset[items[0]][i].clone()
mask = self.maskset[items[0]][i].clone()
inputs.append(frame)
masks.append(mask)
ppg = self.ppg_dataset[items[0]][items[1]: items[1] + self.opt.win_size].clone()
else:
for i in range(items[1], items[1] + self.opt.win_size):
frame = self.dataset[items[0]][i].clone()
mask = self.maskset[items[0]][i].clone()
inputs.append(frame)
masks.append(mask)
ppg = self.ppg_dataset[items[0]][items[1]
: items[1] + self.opt.win_size].clone()
inputs = np.stack(inputs)
inputs = torch.from_numpy(inputs)
masks = np.stack(masks)
masks = torch.from_numpy(masks)
self.baseline_procress(inputs, masks.clone())
ppg = self.quantify(ppg)
return {'input': inputs, 'PPG': ppg}
def __len__(self):
"""Return the total number of images in the dataset."""
return self.length
def quantify(self, rppg):
quantified = torch.empty(rppg.shape[0], dtype=torch.long)
tmax = rppg.max()
tmin = rppg.min()
interval = (tmax - tmin)/39
for i in range(len(quantified)):
quantified[i] = ((rppg[i] - tmin)/interval).round().long()
return quantified
def baseline_procress(self, data, mask):
mask /= 255
mask = mask.float()
# pdb.set_trace()
input_mean = data.sum(dim=(0, 2, 3), keepdim=False) / \
mask.sum(dim=(0, 2, 3), keepdim=False) # mean of W H T
for i in range(data.shape[1]):
data[:, i, :, :] = data[:, i, :, :] - input_mean[i] # minus the total mean
data = data*mask
x_hat = data.sum(dim=(2, 3), keepdim=False)/ \
mask.sum(dim=(2, 3), keepdim=False) # mean of H T
G_x = np.empty(x_hat.size()) # filtered x_hat
for i in range(data.shape[1]): # shape 1 is RGB channels
# pdb.set_trace()
G_x[:, i] = butter_bandpass_filter(x_hat[:, i], 1, 8, 30, order=3)
for j in range(data.shape[0]):
data[j, i, :, :] = data[j, i, :, :] - \
(x_hat[j, i] - G_x[j, i])
data = data*mask
# pdb.set_trace()
return data
def __call__(self, idx):
inputs = []
masks = []
items = [idx, 0]
if not self.isTrain:
# pdb.set_trace()
decision = 0
new_index = items[1] % (
self.task_len - (self.opt.batch_size + self.opt.fewshots)*self.opt.win_size)
for i in range(new_index, new_index + 15*self.opt.win_size):
frame = self.dataset[items[0]][i].clone()
mask = self.maskset[items[0]][i].clone()
inputs.append(frame)
masks.append(mask)
ppg = self.ppg_dataset[items[0]
][new_index: new_index + 15*self.opt.win_size].clone()
orig = self.original[items[0]
][new_index: new_index + 15*self.opt.win_size].clone()
else:
for i in range(items[1], items[1] + 15*self.opt.win_size):
frame = self.dataset[items[0]][i].clone()
mask = self.maskset[items[0]][i].clone()
inputs.append(frame)
masks.append(mask)
ppg = self.ppg_dataset[items[0]][items[1]: items[1] + 15*self.opt.win_size].clone()
orig = self.original[items[0]][items[1]: items[1] + 15*self.opt.win_size].clone()
inputs = np.stack(inputs)
inputs = torch.from_numpy(inputs)
masks = np.stack(masks)
masks = torch.from_numpy(masks)
self.baseline_procress(inputs, masks.clone())
ppg = self.quantify(ppg)
return {'input': inputs, 'PPG': ppg}
================================================
FILE: model/__init__.py
================================================
"""This package includes all the modules related to data loading and preprocessing
To add a custom dataset class called 'dummy', you need to add a file called 'dummy_dataset.py' and define a subclass 'DummyDataset' inherited from BaseDataset.
You need to implement four functions:
-- <__init__>: initialize the class, first call BaseDataset.__init__(self, opt).
-- <__len__>: return the size of dataset.
-- <__getitem__>: get a data point from data loader.
-- : (optionally) add dataset-specific options and set default options.
Now you can use the dataset class by specifying flag '--dataset_mode dummy'.
See our template dataset class 'template_dataset.py' for more details.
"""
from .main_model import meta_rPPG
================================================
FILE: model/loss.py
================================================
import torch
import numpy as np
import torch.nn as nn
from torch.nn import init
import torch.optim as optim
import os
from torch.autograd import Variable
from torch.nn.functional import conv1d
from scipy import signal
import torch.nn.functional as F
import pdb
class ordLoss(nn.Module):
"""
Ordinal loss is defined as the average of pixelwise ordinal loss F(h, w, X, O)
over the entire image domain:
"""
def __init__(self):
super(ordLoss, self).__init__()
self.loss = 0.0
def forward(self, orig_ord_labels, orig_target):
"""
:param ord_labels: ordinal labels for each position of Image I.
:param target: the ground_truth discreted using SID strategy.
:return: ordinal loss
"""
device = orig_ord_labels.device
ord_labels = orig_ord_labels.clone()
# ord_labels = ord_labels.unsqueeze(0)
ord_labels = torch.transpose(ord_labels, 1, 2)
N, C, W = ord_labels.size()
ord_num = C
self.loss = 0.0
# faster version
if torch.cuda.is_available():
K = torch.zeros((N, C, W), dtype=torch.int).to(device)
for i in range(ord_num):
K[:, i, :] = K[:, i, :] + i * \
torch.ones((N, W), dtype=torch.int).to(device)
else:
K = torch.zeros((N, C, W), dtype=torch.int)
for i in range(ord_num):
K[:, i, :] = K[:, i, :] + i * \
torch.ones((N, W), dtype=torch.int)
# pdb.set_trace()
# target = orig_target.clone().type(torch.DoubleTensor)
if device == torch.device('cpu'):
target = orig_target.clone().type(torch.IntTensor)
else:
target = orig_target.clone().type(torch.cuda.IntTensor)
mask_0 = torch.zeros((N, C, W), dtype=torch.bool)
mask_1 = torch.zeros((N, C, W), dtype=torch.bool)
for i in range(N):
mask_0[i] = (K[i] <= target[i]).detach()
mask_1[i] = (K[i] > target[i]).detach()
one = torch.ones(ord_labels[mask_1].size())
if torch.cuda.is_available():
one = one.to(device)
self.loss += torch.sum(torch.log(torch.clamp(ord_labels[mask_0], min=1e-8, max=1e8))) \
+ torch.sum(torch.log(torch.clamp(one - ord_labels[mask_1], min=1e-8, max=1e8)))
N = N * W
self.loss /= (-N) # negative
# pdb.set_trace()
return self.loss
class customLoss(nn.Module):
"""
This customize loss is contained of Ordloss and MSELoss of the frequency magnitude
"""
def __init__(self, device):
super(customLoss, self).__init__()
self.loss = 0.0
self.ord = ordLoss()
self.vis = Visdom(port=8093, env='main')
# self.cross = torch.nn.CrossEntropyLoss()
# self.cross = torch.nn.NLLLoss()
# self.cross = torch.nn.MSELoss()
self.reg = regressLoss()
# self.weight = torch.autograd.Variable(torch.tensor(1.0), requires_grad=True).to(device)
self.weight = nn.Linear(2,1).to(device)
with torch.no_grad():
self.weight.weight.copy_(torch.tensor([1.0,1.0]))
pdb.set_trace()
self.t = torch.tensor([2.0,2.0]).to(device)
self.device = device
def forward(self, predict, true_rPPG):
self.loss1 = self.ord(predict[0], true_rPPG)
self.true_fft = self.torch_style_fft(true_rPPG) # (batch size x 60)
self.predict_fft = self.torch_style_fft(predict[1]) # (batch size x 60)
self.loss2 = self.reg(self.predict_fft, self.true_fft)
if torch.isnan(self.loss2):
pdb.set_trace()
# self.loss = self.loss1 + self.weight * self.loss2
# pdb.set_trace()
self.t1 = self.weight(self.t)
self.loss = self.weight(torch.stack([self.loss1, self.loss2]))
pdb.set_trace()
return self.loss
# pdb.set_trace()
def torch_style_fft(self, sig):
# pdb.set_trace()
S, _ = torch_welch(sig, fps = 30)
return S
class regressLoss(nn.Module):
def __init__(self):
super(regressLoss, self).__init__()
self.softmax = nn.Softmax(dim=1)
# self.weight = weight
def forward(self, outputs, targets):
preoutput = outputs.clone()
if torch.isnan(preoutput.cpu().detach()).any():
pdb.set_trace()
# small_number = torch.tensor(1e-45).to(targets.get_device())
targets = self.softmax(targets)
outputs = self.softmax(outputs)
if torch.isnan(outputs.cpu().detach()).any():
pdb.set_trace()
# outputs = outputs + small_number
loss = -targets.float() * torch.log(outputs)
# if np.isnan(torch.mean(loss).cpu().detach().numpy()):
# pdb.set_trace()
return torch.mean(loss)
class KLDivLoss(nn.Module):
def __init__(self, reduction="mean"):
super(KLDivLoss, self).__init__()
self.criterion = torch.nn.KLDivLoss(reduction=reduction)
# self.weight = weight
def forward(self, outputs, targets):
out = outputs.clone()
tar = targets.clone()
out.uniform_(0, 1)
tar.uniform_(0, 1)
# loss = self.criterion(F.log_softmax(out, -1), tar)
loss = self.criterion(F.log_softmax(outputs, dim=1), F.softmax(targets, dim=1))
return loss
def torch_welch(sig, fps):
nperseg = sig.size(1)
nfft = sig.size(1)
noverlap = nperseg//2
# pdb.set_trace()
sig = sig.type(torch.cuda.FloatTensor)
win = torch.from_numpy(signal.hann(sig.size(1))).to(sig.get_device()).type(torch.cuda.FloatTensor)
sig = sig.unsqueeze(1)
# pdb.set_trace()
'''detrend'''
sig = sig - torch.from_numpy(np.expand_dims(np.mean(sig.detach().cpu().numpy(), -1), -1)).to(sig.get_device())
sig = sig * win
S = torch.rfft(sig, 1, normalized=True, onesided=True)
S = torch.sqrt(S[..., 0]**2 + S[..., 1]**2)
freqs = torch.from_numpy(np.fft.rfftfreq(nfft, 1/float(fps)))
S = S.squeeze(1)
return S, freqs
================================================
FILE: model/main_model.py
================================================
import torch
import numpy as np
import torch.nn as nn
from torch.nn import init
import torch.optim as optim
import os
import itertools
from model.sub_model import rPPG_Estimator, Convolutional_Encoder, Synthetic_Gradient_Generator
from model.loss import ordLoss, KLDivLoss
from scipy import signal
import pickle
from data.data_utils import butter_bandpass_filter
import time
import pdb
class meta_rPPG(nn.Module):
"""
You can name your own checkpoint directory (opt.checkpoints_dir).
A_net refers to Conv_Encoder, B_net refers to rPPG_Estimator, Grad_net refers to Synth_Grad_Gen.
The loading directory can be changed to opt.checkpoints_dir if some other checkpoints are in need.
"""
def __init__(self, opt, isTrain, continue_train=False, norm_layer=nn.BatchNorm2d):
"""
Attention_ResNet -- using EfficientNet with LSTM
AttentionNet -- using a attention strcture without a LSTM
"""
super(meta_rPPG, self).__init__()
self.save_dir = os.path.join(os.getcwd(), opt.checkpoints_dir)
self.load_dir = os.path.join(os.getcwd(), opt.checkpoints_dir)
if os.path.exists(self.save_dir) == False:
os.makedirs(self.save_dir)
self.isTrain = isTrain
self.opt = opt
self.gpu_ids = opt.gpu_ids
self.thres = 0.5
self.continue_train = continue_train
self.device = torch.device('cuda:{}'.format(self.gpu_ids[0])) if self.gpu_ids else torch.device('cpu')
# self.device = torch.device('cpu')
self.prototype = torch.zeros(120)
self.h = torch.zeros(2*opt.lstm_num_layers, opt.batch_size, 60).to(self.device)
self.c = torch.zeros(2*opt.lstm_num_layers, opt.batch_size, 60).to(self.device)
self.A_net = Convolutional_Encoder(input_channel=3, isTrain=self.isTrain, device=self.device)
self.B_net = rPPG_Estimator(input_channel=120, num_layers=opt.lstm_num_layers,
isTrain=self.isTrain, device=self.device, h=self.h, c=self.c)
self.Grad_net = Synthetic_Gradient_Generator(input_channel=120, isTrain=self.isTrain, device=self.device)
self.A_net.to(self.device)
self.B_net.to(self.device)
self.Grad_net.to(self.device)
self.model = [self.A_net, self.B_net, self.Grad_net]
self.fewloss = 0.0
self.ordloss = 0.0
self.gradloss = 0.0
self.criterion1 = torch.nn.MSELoss()
self.criterion2 = ordLoss()
self.criterion3 = torch.nn.MSELoss()
self.optimizerA = torch.optim.SGD(self.A_net.parameters(), opt.lr, momentum=0.9, weight_decay=5e-4)
self.optimizerB = torch.optim.SGD(self.B_net.parameters(), opt.lr, momentum=0.9, weight_decay=5e-4)
self.optimizerGrad = torch.optim.SGD(self.Grad_net.parameters(), opt.lr, momentum=0.9, weight_decay=5e-4)
if self.opt.adapt_position == "extractor":
self.optimizerPsi = torch.optim.SGD(self.A_net.parameters(), opt.lr*1e-2, momentum=0.9, weight_decay=5e-4)
elif self.opt.adapt_position == "estimator":
self.optimizerPsi = torch.optim.SGD(self.B_net.parameters(), opt.lr*1e-2, momentum=0.9, weight_decay=5e-4)
elif self.opt.adapt_position == "both":
self.optimizerPsi = torch.optim.SGD(itertools.chain(self.A_net.parameters(),
self.B_net.parameters()), opt.lr*1e-2, momentum=0.9, weight_decay=5e-4)
self.schedulerA = optim.lr_scheduler.CosineAnnealingLR(self.optimizerA, T_max=5, eta_min=0.1*opt.lr)
self.schedulerB = optim.lr_scheduler.CosineAnnealingLR(self.optimizerB, T_max=5, eta_min=0.1*opt.lr)
self.schedulerGrad = optim.lr_scheduler.CosineAnnealingLR(self.optimizerGrad, T_max=5, eta_min=0.1*opt.lr)
self.schedulerPsi = optim.lr_scheduler.CosineAnnealingLR(self.optimizerPsi, T_max=5, eta_min=0.1*1e-2*opt.lr)
# pdb.set_trace()
def print_networks(self, print_net):
"""Print the total number of parameters in the network and (if verbose) network architecture
Parameters:
verbose (bool) -- if verbose: print the network architecture
"""
print('----------- Networks initialized -------------')
num_params = 0
for param in self.A_net.parameters():
num_params += param.numel()
for param in self.B_net.parameters():
num_params += param.numel()
for param in self.Grad_net.parameters():
num_params += param.numel()
if print_net:
print(self.model)
print('Total number of parameters : %.3f M' %
(num_params / 1e6))
# pdb.set_trace()
print('---------------------end----------------------')
def set_input(self, input):
self.input = input['input']
self.true_rPPG = input['rPPG']
if 'center' in input:
self.center = input['center']
def set_input_for_test(self, input):
self.input = input.to(self.device)
# if self.opt.lstm_hc_usage:
self.B_net.feed_hc([self.h, self.c])
def forward(self, x):
"""Run forward pass; called by both functions and ."""
# if not self.opt.branch:
self.inter = self.A_net(x)
self.decision, self.predict = self.B_net(self.inter)
if self.opt.adapt_position == "extractor":
self.gradient = self.Grad_net(self.inter.detach())
elif self.opt.adapt_position == "estimator":
self.gradient = self.Grad_net(self.predict.detach())
elif self.opt.adapt_position == "both":
self.gradient1 = self.Grad_net(self.inter.detach())
self.gradient2 = self.Grad_net(self.predict.detach())
def new_theta_update(self, epoch):
inter = self.A_net(self.input.to(self.device))
decision, predict = self.B_net(inter)
fewloss = self.criterion1(self.prototype.expand(self.opt.batch_size,60,120), inter)
ordloss = self.criterion2(predict, self.true_rPPG.to(self.device))
self.optimizerA.zero_grad()
loss = fewloss + ordloss
loss.backward()
self.optimizerA.step()
if self.opt.adapt_position == "extractor":
for i in range(self.opt.fewshots):
inter = self.A_net(self.input.to(self.device))
decision, predict = self.B_net(inter)
inter_grad = self.Grad_net(inter.detach())
# self.optimizerA.zero_grad()
self.optimizerPsi.zero_grad()
grad = torch.autograd.grad(outputs=inter, inputs=self.A_net.parameters(),
grad_outputs=inter_grad, create_graph=False, retain_graph=False)
torch.autograd.backward(self.A_net.parameters(), grad_tensors=grad, retain_graph=False, create_graph=False)
self.optimizerPsi.step()
self.gradient = inter_grad.detach().clone()
elif self.opt.adapt_position == "estimator":
for i in range(self.opt.fewshots):
inter = self.A_net(self.input.to(self.device))
decision, predict = self.B_net(inter)
predict_grad = self.Grad_net(predict.detach())
# self.optimizerA.zero_grad()
self.optimizerPsi.zero_grad()
grad = torch.autograd.grad(outputs=predict, inputs=self.B_net.parameters(),
grad_outputs=predict_grad, create_graph=False, retain_graph=False)
torch.autograd.backward(self.B_net.parameters(), grad_tensors=grad, retain_graph=False, create_graph=False)
self.optimizerPsi.step()
self.gradient = predict_grad.detach().clone()
elif self.opt.adapt_position == "both":
for i in range(self.opt.fewshots):
inter = self.A_net(self.input.to(self.device))
decision, predict = self.B_net(inter)
inter_grad = self.Grad_net(inter.detach())
predict_grad = self.Grad_net(predict.detach())
self.optimizerPsi.zero_grad()
grad = torch.autograd.grad(outputs=inter, inputs=self.A_net.parameters(),
grad_outputs=inter_grad, create_graph=False, retain_graph=False)
torch.autograd.backward(self.A_net.parameters(), grad_tensors=grad, retain_graph=False, create_graph=False)
grad = torch.autograd.grad(outputs=predict, inputs=self.B_net.parameters(),
grad_outputs=predict_grad, create_graph=False, retain_graph=False)
torch.autograd.backward(self.B_net.parameters(), grad_tensors=grad, retain_graph=False, create_graph=False)
self.optimizerPsi.step()
self.gradient = predict_grad.detach().clone()
'''release the retained graph, free all the variables'''
self.fewloss = fewloss.detach().clone()
self.ordloss = ordloss.detach().clone()
self.inter = inter.detach().clone()
def new_psi_phi_update(self, epoch):
if self.opt.adapt_position == "extractor":
inter = self.A_net(self.input.to(self.device))
decision, predict = self.B_net(inter)
inter_grad = self.Grad_net(inter.detach())
inter.retain_grad()
ordloss = self.criterion2(predict, self.true_rPPG.to(self.device))
fewloss = self.criterion1(self.prototype.expand(self.opt.batch_size,60,120), inter)
loss = ordloss + fewloss
self.optimizerB.zero_grad()
self.optimizerA.zero_grad()
loss.backward()
self.optimizerA.step()
self.optimizerB.step()
# pdb.set_trace()
gradloss = self.criterion3(inter_grad, inter.grad)
self.optimizerGrad.zero_grad()
gradloss.backward()
self.optimizerGrad.step()
self.gradloss = gradloss.detach().clone()
elif self.opt.adapt_position == "estimator":
inter = self.A_net(self.input.to(self.device))
decision, predict = self.B_net(inter)
predict_grad = self.Grad_net(predict.detach())
predict.retain_grad()
ordloss = self.criterion2(predict, self.true_rPPG.to(self.device))
fewloss = self.criterion1(self.prototype.expand(
self.opt.batch_size, 60, 120), inter)
loss = ordloss + fewloss
self.optimizerB.zero_grad()
self.optimizerA.zero_grad()
loss.backward()
self.optimizerA.step()
self.optimizerB.step()
gradloss = self.criterion3(predict_grad, predict.grad)
self.optimizerGrad.zero_grad()
gradloss.backward()
self.optimizerGrad.step()
self.gradloss = gradloss.detach().clone()
elif self.opt.adapt_position == "both":
inter = self.A_net(self.input.to(self.device))
decision, predict = self.B_net(inter)
predict_grad = self.Grad_net(predict.detach())
inter_grad = self.Grad_net(inter.detach())
predict.retain_grad()
inter.retain_grad()
ordloss = self.criterion2(predict, self.true_rPPG.to(self.device))
fewloss = self.criterion1(self.prototype.expand(
self.opt.batch_size, 60, 120), inter)
loss = ordloss + fewloss
self.optimizerB.zero_grad()
self.optimizerA.zero_grad()
loss.backward()
self.optimizerA.step()
self.optimizerB.step()
gradloss = self.criterion3(
predict_grad, predict.grad) + self.criterion3(inter_grad, inter.grad)
self.optimizerGrad.zero_grad()
gradloss.backward()
self.optimizerGrad.step()
self.gradloss = gradloss.detach().clone()
self.decision = decision.detach().clone()
self.predict = predict.detach().clone()
self.ordloss = ordloss.detach().clone()
def update_prototype(self):
proto_tmp = torch.zeros(120).to(self.device)
h_tmp = torch.zeros(2*self.opt.lstm_num_layers, self.opt.batch_size, 60).to(self.device)
c_tmp = torch.zeros(2*self.opt.lstm_num_layers, self.opt.batch_size, 60).to(self.device)
self.B_net.feed_hc([self.h, self.c])
# pdb.set_trace()
self.forward(self.input.to(self.device))
# pdb.set_trace()
proto_tmp += self.inter.data.mean(axis=[0,1])
h_tmp += self.B_net.h.data
c_tmp += self.B_net.c.data
if torch.sum(self.prototype) == 0: # first update
self.prototype = proto_tmp
(self.h, self.c) = (h_tmp, c_tmp)
else:
self.prototype = 0.8*self.prototype + 0.2*proto_tmp
(self.h, self.c) = (0.8*self.h + 0.2*h_tmp, 0.8*self.c + 0.2*c_tmp)
def setup(self, opt):
self.init_weights(self.A_net, self.B_net)
# pdb.set_trace()
if self.continue_train:
self.load_networks(opt.load_file)
self.thres = 0.01
if not self.isTrain:
# load_suffix = 'latest'
# load_suffix = 'iter_%d' % opt.load_iter if opt.load_iter > 0 else opt.epoch
self.load_networks(opt.load_file)
# self.progress = 1.45
# pdb.set_trace()
self.print_networks(opt.print_net)
def init_weights(net1, net2, init_type='normal', init_gain=0.02):
net1.apply(init_func)
net2.apply(init_func)
def save_networks(self, suffix):
"""Save all the networks to the disk.
Parameters:
epoch (int) -- current epoch; used in the file name '%s_%s.pth' % (epoch, name)
"""
save_filename1 = '%s_%s.pth' % (suffix, self.opt.name)
save_path1 = os.path.join(self.save_dir, save_filename1)
# pdb.set_trace()
torch.save({'A': self.A_net.state_dict(),
'B': self.B_net.state_dict(),
'Grad': self.Grad_net.state_dict(),
'proto': self.prototype.cpu(),
'h': self.h.data.cpu(),
'c': self.c.data.cpu()},
save_path1)
def get_current_losses(self, istest):
# return [self.criterion.loss1.clone(), self.criterion.loss2.clone()]
if istest:
return self.t_ordloss
else:
return [self.fewloss, self.gradloss, self.ordloss]
def eval(self):
"""Make models eval mode during test time"""
self.A_net.eval()
self.B_net.eval()
self.Grad_net.eval()
# self.attention.eval()
# self.lstm.eval()
# self.fc.eval()
def train(self):
"""Make models train mode after test time"""
self.A_net.train()
self.B_net.train()
self.Grad_net.train()
# self.attention.train()
# self.lstm.train()
# self.fc.train()
def test(self):
"""Forward function used in test time. """
with torch.no_grad():
self.forward(self.input[len(self.input)-1].unsqueeze(0).to(self.device))
self.t_ordloss = self.criterion2(self.predict, self.true_rPPG[len(self.true_rPPG)-1].unsqueeze(0).to(self.device))
def fewshot_test(self, epoch):
A = pickle.loads(pickle.dumps(self.A_net))
optim = torch.optim.SGD(A.parameters(), self.opt.lr*1e-2, momentum=0.9, weight_decay=5e-4)
for i in range(self.opt.fewshots):
optim.zero_grad()
inter = A(self.input[i].unsqueeze(0).to(self.device))
inter_grad = self.Grad_net(inter)
grad = torch.autograd.grad(outputs=inter, inputs=A.parameters(),
grad_outputs=inter_grad, create_graph=False, retain_graph=False)
torch.autograd.backward(A.parameters(), grad_tensors=grad, retain_graph=False, create_graph=False)
optim.step()
for i in range(self.opt.fewshots):
optim.zero_grad()
inter = A(self.input[i].unsqueeze(0).to(self.device))
loss = self.criterion1(inter, self.prototype.expand(1, 60, 120))
loss.backward()
optim.step()
with torch.no_grad():
tmp_h = self.B_net.h
tmp_c = self.B_net.c
# if self.opt.lstm_hc_usage:
self.B_net.feed_hc([self.h, self.c])
data = self.input[self.opt.fewshots:]
inter = A(data.to(self.device))
self.decision, self.predict = self.B_net(inter)
self.B_net.feed_hc([tmp_h, tmp_c])
self.t_ordloss = self.criterion2(self.predict[0].unsqueeze(0), self.true_rPPG[0].unsqueeze(0).to(self.device))
def get_current_results(self, istest):
if istest:
return self.decision[-1].cpu().clone(), self.true_rPPG[-1].cpu().clone()
else:
return self.decision[-1].cpu().clone(), self.true_rPPG[-1].cpu().clone()
# return self.decision[0].cpu().clone(), self.true_rPPG[len(self.input)-1][0].cpu().clone()
# def get_freq_results(self):
# return self.criterion.true_fft[0].cpu().clone(), self.criterion.predict_fft[0].detach().cpu().clone()
def get_current_results_of_test(self):
# pdb.set_trace()
return self.decision[0].cpu().clone()
def load_networks(self, suffix):
"""Load all the networks from the disk.
Parameters:
suffix (str) -- current epoch; used in the file name '%s_%s.pth' % (suffix, name)
"""
load_filename1 = '%s_%s.pth' % (suffix, self.opt.name)
load_path1 = os.path.join(self.load_dir, load_filename1)
print('loading model from %s' % load_path1)
model_dict = torch.load(load_path1)
self.A_net.load_state_dict(model_dict['A'])
self.B_net.load_state_dict(model_dict['B'])
self.Grad_net.load_state_dict(model_dict['Grad'])
self.prototype = model_dict['proto'].to(self.device)
self.h = model_dict['h'].to(self.device)
self.c = model_dict['c'].to(self.device)
# self.A_net.eval()
# self.B_net.eval()
# self.Grad_net.eval()
def __patch_instance_norm_state_dict(self, state_dict, module, keys, i=0):
"""Fix InstanceNorm checkpoints incompatibility (prior to 0.4)"""
key = keys[i]
if i + 1 == len(keys): # at the end, pointing to a parameter/buffer
if module.__class__.__name__.startswith('InstanceNorm') and \
(key == 'running_mean' or key == 'running_var'):
if getattr(module, key) is None:
state_dict.pop('.'.join(keys))
if module.__class__.__name__.startswith('InstanceNorm') and \
(key == 'num_batches_tracked'):
state_dict.pop('.'.join(keys))
else:
self.__patch_instance_norm_state_dict(
state_dict, getattr(module, key), keys, i + 1)
def get_param(self):
return [self.A_net.get_param(), self.B_net.get_param()]
def update_learning_rate(self, epoch):
"""Update learning rates for all the networks; called at the end of every epoch"""
self.schedulerA.step()
self.schedulerB.step()
self.schedulerGrad.step()
self.schedulerPsi.step()
# pdb.set_trace()
lr = self.optimizerB.param_groups[0]['lr']
return lr
# print('\nlearning rate = %.7f' % lr)
def init_func(m): # define the initialization function
classname = m.__class__.__name__
if hasattr(m, 'weight') and (classname.find('Conv') != -1 or classname.find('Linear') != -1):
init.normal_(m.weight.data, 0.0, 0.02)
# if hasattr(m, 'bias') and m.bias is not None:
# init.constant_(m.bias.data, 0.0)
# BatchNorm Layer's weight is not a matrix; only normal distribution applies.
elif classname.find('BatchNorm2d') != -1:
init.normal_(m.weight.data, 1.0, 0.02)
init.constant_(m.bias.data, 0.0)
================================================
FILE: model/sub_model.py
================================================
import torch
import numpy as np
import torch.nn as nn
from torch.nn import init
import torch.optim as optim
import os
import math
# from model.sub_models import ResNet, BasicBlock
# from model.sub_models import OrdinalRegressionLayer
import itertools
from collections import OrderedDict
import torch.nn.functional as F
import pdb
class Synthetic_Gradient_Generator(nn.Module):
def __init__(self, input_channel, isTrain, device):
super(Synthetic_Gradient_Generator, self).__init__()
self.layer1 = nn.Sequential(
nn.Conv1d(60, 40, kernel_size=3, padding=1),
nn.BatchNorm1d(40),
nn.ReLU()
)
self.layer2 = nn.Sequential(
nn.Conv1d(40, 20, kernel_size=3, padding=1),
nn.BatchNorm1d(20),
nn.ReLU()
)
self.layer3 = nn.Sequential(
nn.ConvTranspose1d(20, 40, kernel_size=3, padding=1),
nn.BatchNorm1d(40),
nn.ReLU()
)
self.layer4 = nn.Sequential(
nn.ConvTranspose1d(40, 60, kernel_size=3, padding=1)
)
def forward(self, x):
# x's shape = [6, 60, 120]
res_x1 = self.layer1(x) # res_x1's shape = [6, 40, 120]
res_x2 = self.layer2(res_x1) # res_x2's shape = [6, 20, 120]
res_x3 = self.layer3(res_x2) + res_x1 # res_x3's shape = [6, 40, 120]
out = self.layer4(res_x3) # out's shape = [6, 60, 120]
# pdb.set_trace()
return out
class Convolutional_Encoder(nn.Module):
def __init__(self, input_channel, isTrain, device):
super(Convolutional_Encoder, self).__init__()
self.conv = nn.Conv3d
self.conv1 = self.conv(input_channel, 32, kernel_size=(1, 3, 3), stride=(1, 1, 1), padding=(0, 1, 1))
self.conv2 = self.conv(32, 48, kernel_size=(1, 3, 3), stride=(1, 1, 1), padding=(0, 1, 1))
self.conv3 = self.conv(48, 64, kernel_size=(1, 3, 3), stride=(1, 1, 1), padding=(0, 1, 1))
self.conv4 = self.conv(64, 80, kernel_size=(1, 3, 3), stride=(1, 1, 1), padding=(0, 1, 1))
self.conv5 = self.conv(80, 120, kernel_size=(1, 3, 3), stride=(1, 1, 1), padding=(0, 1, 1))
self.bn1 = nn.BatchNorm3d(32)
self.bn2 = nn.BatchNorm3d(48)
self.bn3 = nn.BatchNorm3d(64)
self.bn4 = nn.BatchNorm3d(80)
self.bn5 = nn.BatchNorm3d(120)
self.cnn = {'c1': self.conv1, 'c2': self.conv2, 'c3': self.conv3, 'c4': self.conv4,
'c5': self.conv5, 'b1': self.bn1, 'b2': self.bn2, 'b3': self.bn3,
'b4': self.bn4, 'b5': self.bn5}
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
win_size = x.shape[1]
x = x.permute(0, 2, 1, 3, 4)
x = self.conv1(x)
# pdb.set_trace()
x = self.bn1(x)
x = F.avg_pool3d(x,(1,2,2))
x = self.relu(x)
x = self.conv2(x)
x = self.bn2(x)
x = F.avg_pool3d(x,(1,2,2))
x = self.relu(x)
x = self.conv3(x)
x = self.bn3(x)
x = F.avg_pool3d(x,(1,2,2))
x = self.relu(x)
x = self.conv4(x)
x = self.bn4(x)
x = F.avg_pool3d(x,(1,2,2))
x = self.relu(x)
x = self.conv5(x)
x = self.bn5(x)
x = F.avg_pool3d(x,(1,2,2))
x = self.relu(x)
x = F.adaptive_avg_pool3d(x, (win_size, 1, 1))
x = x.permute(0, 2, 1, 3, 4)
x = x.reshape(x.size(0), x.size(1), - 1)
return x
def return_grad(self):
# pdb.set_trace()
c1 = self.conv1.weight.grad.data.clone()
c2 = self.conv2.weight.grad.data.clone()
c3 = self.conv3.weight.grad.data.clone()
c4 = self.conv4.weight.grad.data.clone()
c5 = self.conv5.weight.grad.data.clone()
b1 = self.bn1.weight.grad.data.clone()
b2 = self.bn2.weight.grad.data.clone()
b3 = self.bn3.weight.grad.data.clone()
b4 = self.bn4.weight.grad.data.clone()
b5 = self.bn5.weight.grad.data.clone()
return {'c1': c1, 'c2': c2, 'c3': c3, 'c4': c4, 'c5': c5,
'b1': b1, 'b2': b2, 'b3': b3, 'b4': b4, 'b5': b5}
class rPPG_Estimator(nn.Module):
def __init__(self, input_channel, num_layers, isTrain, device, num_classes=40, h=None, c=None):
super(rPPG_Estimator, self).__init__()
self.lstm = nn.LSTM(input_size=120, hidden_size=60,
num_layers=num_layers, batch_first=True, bidirectional=True)
self.fc = nn.Linear(120, 80)
self.h, self.c = h, c
self.orl = OrdinalRegressionLayer()
def forward(self, x):
self.lstm.flatten_parameters()
# pdb.set_trace()
if self.h is not None:
x, (self.h, self.c) = self.lstm(x, (self.h.data, self.c.data))
else:
x, _ = self.lstm(x)
# pdb.set_trace()
x = self.fc(x)
decision, prob = self.orl(x)
decision = decision.squeeze(2)
# pdb.set_trace()
return decision, prob
def feed_hc(self, data):
# pdb.set_trace()
self.h = data[0].data
self.c = data[1].data
# pdb.set_trace()
def return_grad(self):
fc_grad = self.fc.weight.grad.data.clone()
lstm_list = self.lstm._all_weights
lstm_dict = {}
for sublist in lstm_list:
for name in sublist:
# pdb.set_trace()
lstm_dict[name] = self.lstm._parameters[name].grad.data.clone()
return {'fc': fc_grad, 'lstm': lstm_dict}
class OrdinalRegressionLayer(nn.Module):
def __init__(self):
super(OrdinalRegressionLayer, self).__init__()
def forward(self, x):
"""
:param x: N X H X W X C, N is batch_size, C is channels of features
:return: ord_labels is ordinal outputs for each spatial locations , size is N x H X W X C (C = 2K, K is interval of SID)
decode_label is the ordinal labels for each position of Image I
"""
# pdb.set_trace()
x = x.permute(0, 2, 1)
N, C, W = x.size()
# N, W, C = x.size()
ord_num = C // 2
"""
replace iter with matrix operation
fast speed methods
"""
A = x[:, ::2, :].clone()
B = x[:, 1::2, :].clone()
# pdb.set_trace()
A = A.view(N, 1, ord_num * W)
B = B.view(N, 1, ord_num * W)
# pdb.set_trace()
C = torch.cat((A, B), dim=1)
C = torch.clamp(C, min=1e-8, max=1e8) # prevent nans
# pdb.set_trace()
ord_c = nn.functional.softmax(C, dim=1)
# pdb.set_trace()
ord_c1 = ord_c[:, 1, :].clone()
ord_c1 = ord_c1.view(-1, ord_num, W)
decode_c = torch.sum((ord_c1 > 0.5), dim=1).view(-1, 1, W)
ord_c1 = ord_c1.permute(0, 2, 1)
decode_c = decode_c.permute(0, 2, 1)
# pdb.set_trace()
return decode_c, ord_c1
================================================
FILE: requirements.txt
================================================
tensorboardX
easydict
tqdm
bypy
================================================
FILE: settings.py
================================================
import argparse
import torch.nn as nn
import torch
from torch.optim import lr_scheduler
import numpy as np
import random
import pdb
class TrainOptions():
def __init__(self):
self.parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
self.parser.add_argument('--name', type=str, default='meta_rPPG')
self.parser.add_argument('--network', type=str, default='MAML')
self.parser.add_argument('--continue_train', action="store_true")
self.parser.add_argument('--load_file', type=str, default='smallest')
self.parser.add_argument("--delay", type=int, default=48)
self.parser.add_argument('--fewshots', type=int, default=1)
self.parser.add_argument('--lr_ratio', type=float, default=0.1)
self.parser.add_argument('--per_iter_task', type=int, default=3)
self.parser.add_argument('--lstm_num_layers', type=int, default=2)
self.parser.add_argument('--valid_ratio', type=float, default=0.75)
self.parser.add_argument('--batch_size', type=int, default=3)
self.parser.add_argument('--lr', type=float, default=1e-3)
self.parser.add_argument('--train_epoch', type=int, default=1)
self.parser.add_argument('--gpu_ids', type=str, default='0')
self.parser.add_argument('--print_net', action="store_true")
self.parser.add_argument('--epoch_count', type=int, default=1)
# self.parser.add_argument('--lr_policy', type=str, default='cosine')
# self.parser.add_argument('--lr_decay_iters', type=int, default=1)
# self.parser.add_argument('--lr_update_iter', type=int, default=5000)
self.parser.add_argument('--print_freq', type=int, default=10)
self.parser.add_argument('--save_latest_freq', type=int, default=100)
self.parser.add_argument('--save_epoch_freq', type=int, default=50)
self.parser.add_argument('--save_by_iter', action="store_true")
self.parser.add_argument('--display_id', type=int, default=1)
self.parser.add_argument(
'--display_server', type=str, default="http://localhost")
self.parser.add_argument('--display_env', type=str, default='main')
self.parser.add_argument('--display_port', type=int, default=8800)
self.parser.add_argument('--display_winsize', type=int, default=256)
self.parser.add_argument('--verbose', type=bool, default=True)
self.parser.add_argument('--no_html', type=bool, default=True)
self.parser.add_argument(
'--checkpoints_dir', type=str, default='checkpoints')
self.parser.add_argument('--save_dir', type=str, default='save')
self.parser.add_argument('--max_dataset_size',type=int, default=float("inf"))
self.parser.add_argument('--num_threads', type=int, default=4)
self.parser.add_argument('--phase', type=str, default='train')
self.parser.add_argument('--load_iter', type=int, default='0')
self.parser.add_argument('--epoch', type=str, default='latest')
self.parser.add_argument('--win_size', type=int, default=60)
self.parser.add_argument('--adapt_position', type=str, default="extractor")
def get_options(self):
return self.parser.parse_args()
def get_parser(self):
return self.parser
class custom_scheduler():
def __init__(self, optimizer, Tmax):
self.optimizer = optimizer
self.Tmax = Tmax
self.Max = optimizer.param_groups[0]['lr']
self.Min = self.Max*0.01
self.Tcur = 1
def step(self):
pi = torch.Tensor([np.pi])
for param_group in self.optimizer.param_groups:
param_group['lr'] = float(self.Min + 0.5*(self.Max - self.Min)*(1 + torch.cos(pi*self.Tcur/self.Tmax)))
if self.Tcur == 10 or self.Tcur == 30 or self.Tcur == 50 or self.Tcur == 70 or self.Tcur == 90:
self.Max = 10*self.optimizer.param_groups[0]['lr']
elif self.Tcur == 20 or self.Tcur == 40 or self.Tcur == 60 or self.Tcur == 80 or self.Tcur == 100:
self.Min = 0.01*self.optimizer.param_groups[0]['lr']
self.Tcur += 1
================================================
FILE: train.py
================================================
import torch
import torch.nn as nn
import torch.utils.data as Data
import numpy as np
import time
import os
import random
import matplotlib.pyplot as plt
from data import SlideWindowDataLoader, testing
from model import meta_rPPG
from settings import TrainOptions
import pdb
opt = TrainOptions().get_options()
iter_num = opt.batch_size
model = meta_rPPG(opt, isTrain=True, continue_train=opt.continue_train)
model.setup(opt)
dataset = SlideWindowDataLoader(opt, isTrain=True)
testset = SlideWindowDataLoader(opt, isTrain=False)
per_idx = opt.per_iter_task
dataset_size = dataset.num_tasks * (dataset.task_len[0] - (opt.win_size))
task_len = (dataset.task_len[0] - per_idx*opt.win_size)
total_iters = 0
print("Data Size: %d ||||| Batch Size: %d ||||| initial lr: %f" %
(dataset_size, opt.batch_size, opt.lr))
# pdb.set_trace()
task_list = random.sample(range(5), opt.batch_size)
model.dataset = dataset
data = dataset[task_list, 0]
# pdb.set_trace()
model.set_input(data)
model.update_prototype()
min_mae = [10, 10]
min_rmse = [10, 10]
min_merate = [10, 10]
saving = 1
for epoch in range(opt.epoch_count, opt.train_epoch + 1):
epoch_start_time = time.time()
epoch_iter = 0
i = 0
for data_idx in range(0, task_len, 1):
task_list = random.sample(range(5), opt.batch_size)
model.B_net.feed_hc([model.h, model.c])
model.progress = epoch + float(data_idx)/float(task_len)
for i in range(per_idx):
# pdb.set_trace()
data = dataset[task_list, data_idx + i*opt.win_size]
iter_start_time = time.time()
total_iters += opt.win_size
model.set_input(data)
if i == 0:
model.new_theta_update(epoch) # Adaptation phase
else:
model.new_psi_phi_update(epoch) # Learning phase
# pdb.set_trace()
loss, test_loss = testing(opt, model, testset, data_idx, epoch)
epoch_iter += 1
data = dataset[task_list, np.random.randint(task_len)]
model.set_input(data)
model.update_prototype()
model.save_networks('latest')
model.save_networks(epoch)
# pdb.set_trace()
new_lr = model.update_learning_rate(epoch)
print('Epoch %d/%d ||||| Time: %d sec ||||| Lr: %.7f ||||| Loss: %.3f/%.3f' %
(epoch, opt.train_epoch, time.time() - epoch_start_time, new_lr,
loss, test_loss))