Repository: cvlab-yonsei/MNAD
Branch: master
Commit: 4e108a898605
Files: 14
Total size: 58.8 KB
Directory structure:
gitextract_9fesjy9r/
├── Evaluate.py
├── MNAD_files/
│ └── style.css
├── README.md
├── Train.py
├── data/
│ ├── data_seqkey_all.py
│ ├── frame_labels_avenue.npy
│ ├── frame_labels_ped2.npy
│ └── frame_labels_shanghai.npy
├── model/
│ ├── Memory.py
│ ├── Reconstruction.py
│ ├── final_future_prediction_with_memory_spatial_sumonly_weight_ranking_top1.py
│ ├── memory_final_spatial_sumonly_weight_ranking_top1.py
│ └── utils.py
└── utils.py
================================================
FILE CONTENTS
================================================
================================================
FILE: Evaluate.py
================================================
import numpy as np
import os
import sys
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torch.nn.init as init
import torch.utils.data as data
import torch.utils.data.dataset as dataset
import torchvision.datasets as dset
import torchvision.transforms as transforms
from torch.autograd import Variable
import torchvision.utils as v_utils
import matplotlib.pyplot as plt
import cv2
import math
from collections import OrderedDict
import copy
import time
from model.utils import DataLoader
from model.final_future_prediction_with_memory_spatial_sumonly_weight_ranking_top1 import *
from model.Reconstruction import *
from sklearn.metrics import roc_auc_score
from utils import *
import random
import glob
import argparse
parser = argparse.ArgumentParser(description="MNAD")
parser.add_argument('--gpus', nargs='+', type=str, help='gpus')
parser.add_argument('--batch_size', type=int, default=4, help='batch size for training')
parser.add_argument('--test_batch_size', type=int, default=1, help='batch size for test')
parser.add_argument('--h', type=int, default=256, help='height of input images')
parser.add_argument('--w', type=int, default=256, help='width of input images')
parser.add_argument('--c', type=int, default=3, help='channel of input images')
parser.add_argument('--method', type=str, default='pred', help='The target task for anoamly detection')
parser.add_argument('--t_length', type=int, default=5, help='length of the frame sequences')
parser.add_argument('--fdim', type=int, default=512, help='channel dimension of the features')
parser.add_argument('--mdim', type=int, default=512, help='channel dimension of the memory items')
parser.add_argument('--msize', type=int, default=10, help='number of the memory items')
parser.add_argument('--alpha', type=float, default=0.6, help='weight for the anomality score')
parser.add_argument('--th', type=float, default=0.01, help='threshold for test updating')
parser.add_argument('--num_workers', type=int, default=2, help='number of workers for the train loader')
parser.add_argument('--num_workers_test', type=int, default=1, help='number of workers for the test loader')
parser.add_argument('--dataset_type', type=str, default='ped2', help='type of dataset: ped2, avenue, shanghai')
parser.add_argument('--dataset_path', type=str, default='./dataset', help='directory of data')
parser.add_argument('--model_dir', type=str, help='directory of model')
parser.add_argument('--m_items_dir', type=str, help='directory of model')
args = parser.parse_args()
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
if args.gpus is None:
gpus = "0"
os.environ["CUDA_VISIBLE_DEVICES"]= gpus
else:
gpus = ""
for i in range(len(args.gpus)):
gpus = gpus + args.gpus[i] + ","
os.environ["CUDA_VISIBLE_DEVICES"]= gpus[:-1]
torch.backends.cudnn.enabled = True # make sure to use cudnn for computational performance
test_folder = args.dataset_path+"/"+args.dataset_type+"/testing/frames"
# Loading dataset
test_dataset = DataLoader(test_folder, transforms.Compose([
transforms.ToTensor(),
]), resize_height=args.h, resize_width=args.w, time_step=args.t_length-1)
test_size = len(test_dataset)
test_batch = data.DataLoader(test_dataset, batch_size = args.test_batch_size,
shuffle=False, num_workers=args.num_workers_test, drop_last=False)
loss_func_mse = nn.MSELoss(reduction='none')
# Loading the trained model
model = torch.load(args.model_dir)
model.cuda()
m_items = torch.load(args.m_items_dir)
labels = np.load('./data/frame_labels_'+args.dataset_type+'.npy')
videos = OrderedDict()
videos_list = sorted(glob.glob(os.path.join(test_folder, '*')))
for video in videos_list:
video_name = video.split('/')[-1]
videos[video_name] = {}
videos[video_name]['path'] = video
videos[video_name]['frame'] = glob.glob(os.path.join(video, '*.jpg'))
videos[video_name]['frame'].sort()
videos[video_name]['length'] = len(videos[video_name]['frame'])
labels_list = []
label_length = 0
psnr_list = {}
feature_distance_list = {}
print('Evaluation of', args.dataset_type)
# Setting for video anomaly detection
for video in sorted(videos_list):
video_name = video.split('/')[-1]
if args.method == 'pred':
labels_list = np.append(labels_list, labels[0][4+label_length:videos[video_name]['length']+label_length])
else:
labels_list = np.append(labels_list, labels[0][label_length:videos[video_name]['length']+label_length])
label_length += videos[video_name]['length']
psnr_list[video_name] = []
feature_distance_list[video_name] = []
label_length = 0
video_num = 0
label_length += videos[videos_list[video_num].split('/')[-1]]['length']
m_items_test = m_items.clone()
model.eval()
for k,(imgs) in enumerate(test_batch):
if args.method == 'pred':
if k == label_length-4*(video_num+1):
video_num += 1
label_length += videos[videos_list[video_num].split('/')[-1]]['length']
else:
if k == label_length:
video_num += 1
label_length += videos[videos_list[video_num].split('/')[-1]]['length']
imgs = Variable(imgs).cuda()
if args.method == 'pred':
outputs, feas, updated_feas, m_items_test, softmax_score_query, softmax_score_memory, _, _, _, compactness_loss = model.forward(imgs[:,0:3*4], m_items_test, False)
mse_imgs = torch.mean(loss_func_mse((outputs[0]+1)/2, (imgs[0,3*4:]+1)/2)).item()
mse_feas = compactness_loss.item()
# Calculating the threshold for updating at the test time
point_sc = point_score(outputs, imgs[:,3*4:])
else:
outputs, feas, updated_feas, m_items_test, softmax_score_query, softmax_score_memory, compactness_loss = model.forward(imgs, m_items_test, False)
mse_imgs = torch.mean(loss_func_mse((outputs[0]+1)/2, (imgs[0]+1)/2)).item()
mse_feas = compactness_loss.item()
# Calculating the threshold for updating at the test time
point_sc = point_score(outputs, imgs)
if point_sc < args.th:
query = F.normalize(feas, dim=1)
query = query.permute(0,2,3,1) # b X h X w X d
m_items_test = model.memory.update(query, m_items_test, False)
psnr_list[videos_list[video_num].split('/')[-1]].append(psnr(mse_imgs))
feature_distance_list[videos_list[video_num].split('/')[-1]].append(mse_feas)
# Measuring the abnormality score and the AUC
anomaly_score_total_list = []
for video in sorted(videos_list):
video_name = video.split('/')[-1]
anomaly_score_total_list += score_sum(anomaly_score_list(psnr_list[video_name]),
anomaly_score_list_inv(feature_distance_list[video_name]), args.alpha)
anomaly_score_total_list = np.asarray(anomaly_score_total_list)
accuracy = AUC(anomaly_score_total_list, np.expand_dims(1-labels_list, 0))
print('The result of ', args.dataset_type)
print('AUC: ', accuracy*100, '%')
================================================
FILE: MNAD_files/style.css
================================================
/* Space out content a bit */
@import url('https://fonts.googleapis.com/css?family=Baloo|Bungee+Inline|Lato|Righteous|Shojumaru');
body {
padding-top: 20px;
padding-bottom: 20px;
font-family: 'Lato', cursive;
font-size: 14px;
}
/* Everything but the jumbotron gets side spacing for mobile first views */
.header,
.row,
.footer {
padding-left: 15px;
padding-right: 15px;
}
/* Custom page header */
.header {
border-bottom: 1px solid #e5e5e5;
}
/* Make the masthead heading the same height as the navigation */
.header h1 {
margin-top: 0;
margin-bottom: 0;
line-height: 40px;
padding-bottom: 19px;
font-size: 30px;
font-weight: bold;
}
.header h3 {
margin-top: 0;
margin-bottom: 0;
line-height: 40px;
padding-bottom: 19px;
font-size: 20px;
}
.header h4 {
font-family: 'Baloo', cursive;
}
/* Custom page footer */
.footer {
padding-top: 19px;
color: #777;
border-top: 1px solid #e5e5e5;
}
/* Customize container */
@media (min-width: 938px) {
.container {
max-width: 900px;
}
}
.container-narrow > hr {
margin: 20px 0;
}
/* Main marketing message and sign up button */
.container .jumbotron {
text-align: center;
border-bottom: 1px solid #e5e5e5;
padding-left: 20px;
padding: 30px;
}
.jumbotron .btn {
font-size: 21px;
padding: 14px 24px;
}
.row p + h3 {
margin-top: 28px;
}
div.row h3 {
padding-bottom: 5px;
border-bottom: 1px solid #ccc;
}
/* Responsive: Portrait tablets and up */
@media screen and (min-width: 938px) {
/* Remove the padding we set earlier */
.header,
.marketing,
.footer {
padding-left: 0;
padding-right: 0;
}
/* Space out the masthead */
.header {
margin-bottom: 30px;
}
/* Remove the bottom border on the jumbotron for visual effect */
.jumbotron {
border-bottom: 0;
}
}
.readme h1 {
display: none;
}
.left_column{
float:middle;
}
.right_column{
float:middle;
}
================================================
FILE: README.md
================================================
# PyTorch implementation of "Learning Memory-guided Normality for Anomaly Detection"


This is the implementation of the paper "Learning Memory-guided Normality for Anomaly Detection (CVPR 2020)".
For more information, checkout the project site [[website](https://cvlab.yonsei.ac.kr/projects/MNAD/)] and the paper [[PDF](http://openaccess.thecvf.com/content_CVPR_2020/papers/Park_Learning_Memory-Guided_Normality_for_Anomaly_Detection_CVPR_2020_paper.pdf)].
## Dependencies
* Python 3.6
* PyTorch 1.1.0
* Numpy
* Sklearn
## Datasets
* USCD Ped2 [[dataset](https://github.com/StevenLiuWen/ano_pred_cvpr2018)]
* CUHK Avenue [[dataset](https://github.com/StevenLiuWen/ano_pred_cvpr2018)]
* ShanghaiTech [[dataset](https://github.com/StevenLiuWen/ano_pred_cvpr2018)]
These datasets are from an official github of "Future Frame Prediction for Anomaly Detection - A New Baseline (CVPR 2018)".
Download the datasets into ``dataset`` folder, like ``./dataset/ped2/``
## Update
* 02/04/21: We uploaded the codes based on reconstruction method, and pretrained wieghts for Ped2 reconstruction, Avenue prediction and Avenue reconstruction.
## Training
* ~~The training and testing codes are based on prediction method~~
* Now you can implemnet the codes based on both prediction and reconstruction methods.
* The codes are basically based on the prediction method, and you can easily implement this as
```bash
git clone https://github.com/cvlab-yonsei/projects
cd projects/MNAD/code
python Train.py # for training
```
* You can freely define parameters with your own settings like
```bash
python Train.py --gpus 1 --dataset_path 'your_dataset_directory' --dataset_type avenue --exp_dir 'your_log_directory'
```
* For the reconstruction task, you need to newly set the parameters, *e.g,*, the target task, the weights of the losses and the number of the time sequence.
```bash
python Train.py --method recon --loss_compact 0.01 --loss_separate 0.01 --t_length 1 # for training
```
## Evaluation
* Test your own model
* Check your dataset_type (ped2, avenue or shanghai)
```bash
python Evaluate.py --dataset_type ped2 --model_dir your_model.pth --m_items_dir your_m_items.pt
```
* For the reconstruction task, you need to set the parameters as
```bash
python Evaluate.py --method recon --t_length 1 --alpha 0.7 --th 0.015 --dataset_type ped2 --model_dir your_model.pth --m_items_dir your_m_items.pt
```
* Test the model with our pre-trained model and memory items
```bash
python Evaluate.py --dataset_type ped2 --model_dir pretrained_model.pth --m_items_dir m_items.pt
```
## Pre-trained model and memory items
Will be released soon.
## Bibtex
```
@inproceedings{park2020learning,
title={Learning Memory-guided Normality for Anomaly Detection},
author={Park, Hyunjong and Noh, Jongyoun and Ham, Bumsub},
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
pages={14372--14381},
year={2020}
}
```
================================================
FILE: Train.py
================================================
import numpy as np
import os
import sys
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torch.nn.init as init
import torch.utils.data as data
import torch.utils.data.dataset as dataset
import torchvision.datasets as dset
import torchvision.transforms as transforms
from torch.autograd import Variable
import torchvision.utils as v_utils
import matplotlib.pyplot as plt
import cv2
import math
from collections import OrderedDict
import copy
import time
from model.utils import DataLoader
from sklearn.metrics import roc_auc_score
from utils import *
import random
import argparse
parser = argparse.ArgumentParser(description="MNAD")
parser.add_argument('--gpus', nargs='+', type=str, help='gpus')
parser.add_argument('--batch_size', type=int, default=4, help='batch size for training')
parser.add_argument('--test_batch_size', type=int, default=1, help='batch size for test')
parser.add_argument('--epochs', type=int, default=60, help='number of epochs for training')
parser.add_argument('--loss_compact', type=float, default=0.1, help='weight of the feature compactness loss')
parser.add_argument('--loss_separate', type=float, default=0.1, help='weight of the feature separateness loss')
parser.add_argument('--h', type=int, default=256, help='height of input images')
parser.add_argument('--w', type=int, default=256, help='width of input images')
parser.add_argument('--c', type=int, default=3, help='channel of input images')
parser.add_argument('--lr', type=float, default=2e-4, help='initial learning rate')
parser.add_argument('--method', type=str, default='pred', help='The target task for anoamly detection')
parser.add_argument('--t_length', type=int, default=5, help='length of the frame sequences')
parser.add_argument('--fdim', type=int, default=512, help='channel dimension of the features')
parser.add_argument('--mdim', type=int, default=512, help='channel dimension of the memory items')
parser.add_argument('--msize', type=int, default=10, help='number of the memory items')
parser.add_argument('--num_workers', type=int, default=2, help='number of workers for the train loader')
parser.add_argument('--num_workers_test', type=int, default=1, help='number of workers for the test loader')
parser.add_argument('--dataset_type', type=str, default='ped2', help='type of dataset: ped2, avenue, shanghai')
parser.add_argument('--dataset_path', type=str, default='./dataset', help='directory of data')
parser.add_argument('--exp_dir', type=str, default='log', help='directory of log')
args = parser.parse_args()
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
if args.gpus is None:
gpus = "0"
os.environ["CUDA_VISIBLE_DEVICES"]= gpus
else:
gpus = ""
for i in range(len(args.gpus)):
gpus = gpus + args.gpus[i] + ","
os.environ["CUDA_VISIBLE_DEVICES"]= gpus[:-1]
torch.backends.cudnn.enabled = True # make sure to use cudnn for computational performance
train_folder = args.dataset_path+"/"+args.dataset_type+"/training/frames"
test_folder = args.dataset_path+"/"+args.dataset_type+"/testing/frames"
# Loading dataset
train_dataset = DataLoader(train_folder, transforms.Compose([
transforms.ToTensor(),
]), resize_height=args.h, resize_width=args.w, time_step=args.t_length-1)
test_dataset = DataLoader(test_folder, transforms.Compose([
transforms.ToTensor(),
]), resize_height=args.h, resize_width=args.w, time_step=args.t_length-1)
train_size = len(train_dataset)
test_size = len(test_dataset)
train_batch = data.DataLoader(train_dataset, batch_size = args.batch_size,
shuffle=True, num_workers=args.num_workers, drop_last=True)
test_batch = data.DataLoader(test_dataset, batch_size = args.test_batch_size,
shuffle=False, num_workers=args.num_workers_test, drop_last=False)
# Model setting
assert args.method == 'pred' or args.method == 'recon', 'Wrong task name'
if args.method == 'pred':
from model.final_future_prediction_with_memory_spatial_sumonly_weight_ranking_top1 import *
model = convAE(args.c, args.t_length, args.msize, args.fdim, args.mdim)
else:
from model.Reconstruction import *
model = convAE(args.c, memory_size = args.msize, feature_dim = args.fdim, key_dim = args.mdim)
params_encoder = list(model.encoder.parameters())
params_decoder = list(model.decoder.parameters())
params = params_encoder + params_decoder
optimizer = torch.optim.Adam(params, lr = args.lr)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer,T_max =args.epochs)
model.cuda()
# Report the training process
log_dir = os.path.join('./exp', args.dataset_type, args.method, args.exp_dir)
if not os.path.exists(log_dir):
os.makedirs(log_dir)
orig_stdout = sys.stdout
f = open(os.path.join(log_dir, 'log.txt'),'w')
sys.stdout= f
loss_func_mse = nn.MSELoss(reduction='none')
# Training
m_items = F.normalize(torch.rand((args.msize, args.mdim), dtype=torch.float), dim=1).cuda() # Initialize the memory items
for epoch in range(args.epochs):
labels_list = []
model.train()
start = time.time()
for j,(imgs) in enumerate(train_batch):
imgs = Variable(imgs).cuda()
if args.method == 'pred':
outputs, _, _, m_items, softmax_score_query, softmax_score_memory, separateness_loss, compactness_loss = model.forward(imgs[:,0:12], m_items, True)
else:
outputs, _, _, m_items, softmax_score_query, softmax_score_memory, separateness_loss, compactness_loss = model.forward(imgs, m_items, True)
optimizer.zero_grad()
if args.method == 'pred':
loss_pixel = torch.mean(loss_func_mse(outputs, imgs[:,12:]))
else:
loss_pixel = torch.mean(loss_func_mse(outputs, imgs))
loss = loss_pixel + args.loss_compact * compactness_loss + args.loss_separate * separateness_loss
loss.backward(retain_graph=True)
optimizer.step()
scheduler.step()
print('----------------------------------------')
print('Epoch:', epoch+1)
if args.method == 'pred':
print('Loss: Prediction {:.6f}/ Compactness {:.6f}/ Separateness {:.6f}'.format(loss_pixel.item(), compactness_loss.item(), separateness_loss.item()))
else:
print('Loss: Reconstruction {:.6f}/ Compactness {:.6f}/ Separateness {:.6f}'.format(loss_pixel.item(), compactness_loss.item(), separateness_loss.item()))
print('Memory_items:')
print(m_items)
print('----------------------------------------')
print('Training is finished')
# Save the model and the memory items
torch.save(model, os.path.join(log_dir, 'model.pth'))
torch.save(m_items, os.path.join(log_dir, 'keys.pt'))
sys.stdout = orig_stdout
f.close()
================================================
FILE: data/data_seqkey_all.py
================================================
import numpy as np
import os
import torch
import torch.utils.data as data
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from PIL import Image
import os.path
import sys
def pil_loader(path):
# open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
with open(path, 'rb') as f:
img = Image.open(f)
return img.convert('RGB')
def accimage_loader(path):
import accimage
try:
return accimage.Image(path)
except IOError:
# Potentially a decoding problem, fall back to PIL.Image
return pil_loader(path)
def default_loader(path):
from torchvision import get_image_backend
if get_image_backend() == 'accimage':
return accimage_loader(path)
else:
return pil_loader(path)
def make_dataset(dir, class_to_idx):
frames = []
print(sorted(class_to_idx.keys()))
dir = os.path.expanduser(dir)
for target in sorted(class_to_idx.keys()):
print(target)
d = os.path.join(dir, target)
if not os.path.isdir(d):
continue
# new_fnames = []
for root, _, fnames in sorted(os.walk(d)):
for fname in sorted(fnames):
# fname = fname.split('.')[0]
# seq = fname.split('_')[0][1:]
# fname = fname.split('_')[1]
# fname = fname.zfill(4)
# new_fnames.append('V'+seq+'_'+fname+'.png')
path = os.path.join(root, fname)
frames.append(path)
return frames
class DatasetFolder(data.Dataset):
def __init__(self, root, loader=default_loader,transform=None, target_transform=None, length=5):
classes, class_to_idx = self._find_classes(root)
samples = make_dataset(root, class_to_idx)
if len(samples) == 0:
raise(RuntimeError("Found 0 files in subfolders of: " + root))
self.root = root
self.loader = loader
self.length = length
# self.stride = np.random.choice(3,1) + 1
self.classes = classes
self.class_to_idx = class_to_idx
# self.samples_gt = samples[self.length:]
self.samples = samples[:-(self.length-1)]
self.samples_all = samples
self.samples_pool = samples[1:]
# self.targets = [s[1] for s in samples]
self.transform = transform
self.target_transform = target_transform
def _find_classes(self, dir):
"""
Finds the class folders in a dataset.
Args:
dir (string): Root directory path.
Returns:
tuple: (classes, class_to_idx) where classes are relative to (dir), and class_to_idx is a dictionary.
Ensures:
No class is a subdirectory of another.
"""
if sys.version_info >= (3, 5):
# Faster and available in Python 3.5 and above
classes = [d.name for d in os.scandir(dir) if d.is_dir()]
else:
classes = [d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d))]
classes.sort()
class_to_idx = {classes[i]: i for i in range(len(classes))}
return classes, class_to_idx
def __getitem__(self, index):
"""
Args:
index (int): Index
Returns:
tuple: (samples, gt(+length))
"""
sample = []
path_start = self.samples[index]
sample_start = self.loader(path_start)
if self.transform is not None:
sample_start = self.transform(sample_start)
sample.append(sample_start)
for i in range(self.length - 1):
path = self.samples_all[index + (i+1)]
sample_immediate = self.loader(path)
if self.transform is not None:
sample_immediate = self.transform(sample_immediate)
sample.append(sample_immediate)
# path_gt = self.samples_gt[index]
# sample_gt = self.loader(path_gt)
# if self.transform is not None:
# sample_gt = self.transform(sample_gt)
sample_input = sample[0]
for i in range(self.length-1):
sample_input = torch.cat((sample_input,sample[i+1]), dim=0)
return sample_input
def _stride(self):
stride = int(np.random.choice(3,1) + 1)
#if stride != 1:
# self.samples_gt = self.samples_all[self.length*stride:]
# self.samples = self.samples_all[:-(self.length*stride)]
return stride
def __len__(self):
return len(self.samples)
def __repr__(self):
fmt_str = 'Dataset ' + self.__class__.__name__ + '\n'
fmt_str += ' Number of datapoints: {}\n'.format(self.__len__())
fmt_str += ' Root Location: {}\n'.format(self.root)
tmp = ' Transforms (if any): '
fmt_str += '{0}{1}\n'.format(tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
tmp = ' Target Transforms (if any): '
fmt_str += '{0}{1}'.format(tmp, self.target_transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
return fmt_str
IMG_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif', '.tiff', 'webp']
class ImageFolder(DatasetFolder):
def __init__(self, root, transform=None, target_transform=None,
loader=default_loader, length=5):
super(ImageFolder, self).__init__(root, loader,
transform=transform,
target_transform=target_transform)
self.imgs = self.samples
================================================
FILE: model/Memory.py
================================================
import torch
import torch.autograd as ag
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import math
import functools
import random
from torch.nn import functional as F
def random_uniform(shape, low, high, cuda):
x = torch.rand(*shape)
result_cpu = (high - low) * x + low
if cuda:
return result_cpu.cuda()
else:
return result_cpu
def distance(a, b):
return torch.sqrt(((a - b) ** 2).sum()).unsqueeze(0)
def distance_batch(a, b):
bs, _ = a.shape
result = distance(a[0], b)
for i in range(bs-1):
result = torch.cat((result, distance(a[i], b)), 0)
return result
def multiply(x): #to flatten matrix into a vector
return functools.reduce(lambda x,y: x*y, x, 1)
def flatten(x):
""" Flatten matrix into a vector """
count = multiply(x.size())
return x.resize_(count)
def index(batch_size, x):
idx = torch.arange(0, batch_size).long()
idx = torch.unsqueeze(idx, -1)
return torch.cat((idx, x), dim=1)
def MemoryLoss(memory):
m, d = memory.size()
memory_t = torch.t(memory)
similarity = (torch.matmul(memory, memory_t))/2 + 1/2 # 30X30
identity_mask = torch.eye(m).cuda()
sim = torch.abs(similarity - identity_mask)
return torch.sum(sim)/(m*(m-1))
class Memory(nn.Module):
def __init__(self, memory_size, feature_dim, key_dim, temp_update, temp_gather):
super(Memory, self).__init__()
# Constants
self.memory_size = memory_size
self.feature_dim = feature_dim
self.key_dim = key_dim
self.temp_update = temp_update
self.temp_gather = temp_gather
def hard_neg_mem(self, mem, i):
similarity = torch.matmul(mem,torch.t(self.keys_var))
similarity[:,i] = -1
_, max_idx = torch.topk(similarity, 1, dim=1)
return self.keys_var[max_idx]
def random_pick_memory(self, mem, max_indices):
m, d = mem.size()
output = []
for i in range(m):
flattened_indices = (max_indices==i).nonzero()
a, _ = flattened_indices.size()
if a != 0:
number = np.random.choice(a, 1)
output.append(flattened_indices[number, 0])
else:
output.append(-1)
return torch.tensor(output)
def get_update_query(self, mem, max_indices, update_indices, score, query, train):
m, d = mem.size()
if train:
query_update = torch.zeros((m,d)).cuda()
random_update = torch.zeros((m,d)).cuda()
for i in range(m):
idx = torch.nonzero(max_indices.squeeze(1)==i)
a, _ = idx.size()
#ex = update_indices[0][i]
if a != 0:
#random_idx = torch.randperm(a)[0]
#idx = idx[idx != ex]
# query_update[i] = torch.sum(query[idx].squeeze(1), dim=0)
query_update[i] = torch.sum(((score[idx,i] / torch.max(score[:,i])) *query[idx].squeeze(1)), dim=0)
#random_update[i] = query[random_idx] * (score[random_idx,i] / torch.max(score[:,i]))
else:
query_update[i] = 0
#random_update[i] = 0
return query_update
else:
query_update = torch.zeros((m,d)).cuda()
for i in range(m):
idx = torch.nonzero(max_indices.squeeze(1)==i)
a, _ = idx.size()
#ex = update_indices[0][i]
if a != 0:
#idx = idx[idx != ex]
query_update[i] = torch.sum(((score[idx,i] / torch.max(score[:,i])) *query[idx].squeeze(1)), dim=0)
# query_update[i] = torch.sum(query[idx].squeeze(1), dim=0)
else:
query_update[i] = 0
return query_update
def get_score(self, mem, query):
bs, h,w,d = query.size()
m, d = mem.size()
score = torch.matmul(query, torch.t(mem))# b X h X w X m
score = score.view(bs*h*w, m)# (b X h X w) X m
score_query = F.softmax(score, dim=0)
score_memory = F.softmax(score,dim=1)
return score_query, score_memory
def forward(self, query, keys, train=True):
batch_size, dims,h,w = query.size() # b X d X h X w
query = F.normalize(query, dim=1)
query = query.permute(0,2,3,1) # b X h X w X d
#train
if train:
#gathering loss
gathering_loss = self.gather_loss(query,keys, train)
#spreading_loss
spreading_loss = self.spread_loss(query, keys, train)
# read
updated_query, softmax_score_query,softmax_score_memory = self.read(query, keys)
#update
updated_memory = self.update(query, keys, train)
return updated_query, updated_memory, softmax_score_query, softmax_score_memory, gathering_loss, spreading_loss
#test
else:
#gathering loss
gathering_loss = self.gather_loss(query,keys, train)
# read
updated_query, softmax_score_query,softmax_score_memory = self.read(query, keys)
#update
updated_memory = keys
return updated_query, updated_memory, softmax_score_query, softmax_score_memory, gathering_loss
def update(self, query, keys,train):
batch_size, h,w,dims = query.size() # b X h X w X d
softmax_score_query, softmax_score_memory = self.get_score(keys, query)
query_reshape = query.contiguous().view(batch_size*h*w, dims)
_, gathering_indices = torch.topk(softmax_score_memory, 1, dim=1)
_, updating_indices = torch.topk(softmax_score_query, 1, dim=0)
if train:
# top-1 queries (of each memory) update (weighted sum) & random pick
query_update = self.get_update_query(keys, gathering_indices, updating_indices, softmax_score_query, query_reshape,train)
updated_memory = F.normalize(query_update + keys, dim=1)
else:
# only weighted sum update when test
query_update = self.get_update_query(keys, gathering_indices, updating_indices, softmax_score_query, query_reshape, train)
updated_memory = F.normalize(query_update + keys, dim=1)
# top-1 update
#query_update = query_reshape[updating_indices][0]
#updated_memory = F.normalize(query_update + keys, dim=1)
return updated_memory.detach()
def pointwise_gather_loss(self, query_reshape, keys, gathering_indices, train):
n,dims = query_reshape.size() # (b X h X w) X d
loss_mse = torch.nn.MSELoss(reduction='none')
pointwise_loss = loss_mse(query_reshape, keys[gathering_indices].squeeze(1).detach())
return pointwise_loss
def spread_loss(self,query, keys, train):
batch_size, h,w,dims = query.size() # b X h X w X d
loss = torch.nn.TripletMarginLoss(margin=1.0)
softmax_score_query, softmax_score_memory = self.get_score(keys, query)
query_reshape = query.contiguous().view(batch_size*h*w, dims)
_, gathering_indices = torch.topk(softmax_score_memory, 2, dim=1)
#1st, 2nd closest memories
pos = keys[gathering_indices[:,0]]
neg = keys[gathering_indices[:,1]]
spreading_loss = loss(query_reshape,pos.detach(), neg.detach())
return spreading_loss
def gather_loss(self, query, keys, train):
batch_size, h,w,dims = query.size() # b X h X w X d
loss_mse = torch.nn.MSELoss()
softmax_score_query, softmax_score_memory = self.get_score(keys, query)
query_reshape = query.contiguous().view(batch_size*h*w, dims)
_, gathering_indices = torch.topk(softmax_score_memory, 1, dim=1)
gathering_loss = loss_mse(query_reshape, keys[gathering_indices].squeeze(1).detach())
return gathering_loss
def read(self, query, updated_memory):
batch_size, h,w,dims = query.size() # b X h X w X d
softmax_score_query, softmax_score_memory = self.get_score(updated_memory, query)
query_reshape = query.contiguous().view(batch_size*h*w, dims)
concat_memory = torch.matmul(softmax_score_memory.detach(), updated_memory) # (b X h X w) X d
updated_query = torch.cat((query_reshape, concat_memory), dim = 1) # (b X h X w) X 2d
updated_query = updated_query.view(batch_size, h, w, 2*dims)
updated_query = updated_query.permute(0,3,1,2)
return updated_query, softmax_score_query, softmax_score_memory
================================================
FILE: model/Reconstruction.py
================================================
import numpy as np
import os
import sys
import torch
import torch.nn as nn
import torch.nn.functional as F
from .Memory import *
class Encoder(torch.nn.Module):
def __init__(self, t_length = 2, n_channel =3):
super(Encoder, self).__init__()
def Basic(intInput, intOutput):
return torch.nn.Sequential(
torch.nn.Conv2d(in_channels=intInput, out_channels=intOutput, kernel_size=3, stride=1, padding=1),
torch.nn.BatchNorm2d(intOutput),
torch.nn.ReLU(inplace=False),
torch.nn.Conv2d(in_channels=intOutput, out_channels=intOutput, kernel_size=3, stride=1, padding=1),
torch.nn.BatchNorm2d(intOutput),
torch.nn.ReLU(inplace=False)
)
def Basic_(intInput, intOutput):
return torch.nn.Sequential(
torch.nn.Conv2d(in_channels=intInput, out_channels=intOutput, kernel_size=3, stride=1, padding=1),
torch.nn.BatchNorm2d(intOutput),
torch.nn.ReLU(inplace=False),
torch.nn.Conv2d(in_channels=intOutput, out_channels=intOutput, kernel_size=3, stride=1, padding=1),
)
self.moduleConv1 = Basic(n_channel*(t_length-1), 64)
self.modulePool1 = torch.nn.MaxPool2d(kernel_size=2, stride=2)
self.moduleConv2 = Basic(64, 128)
self.modulePool2 = torch.nn.MaxPool2d(kernel_size=2, stride=2)
self.moduleConv3 = Basic(128, 256)
self.modulePool3 = torch.nn.MaxPool2d(kernel_size=2, stride=2)
self.moduleConv4 = Basic_(256, 512)
self.moduleBatchNorm = torch.nn.BatchNorm2d(512)
self.moduleReLU = torch.nn.ReLU(inplace=False)
def forward(self, x):
tensorConv1 = self.moduleConv1(x)
tensorPool1 = self.modulePool1(tensorConv1)
tensorConv2 = self.moduleConv2(tensorPool1)
tensorPool2 = self.modulePool2(tensorConv2)
tensorConv3 = self.moduleConv3(tensorPool2)
tensorPool3 = self.modulePool3(tensorConv3)
tensorConv4 = self.moduleConv4(tensorPool3)
return tensorConv4
class Decoder(torch.nn.Module):
def __init__(self, t_length = 2, n_channel =3):
super(Decoder, self).__init__()
def Basic(intInput, intOutput):
return torch.nn.Sequential(
torch.nn.Conv2d(in_channels=intInput, out_channels=intOutput, kernel_size=3, stride=1, padding=1),
torch.nn.BatchNorm2d(intOutput),
torch.nn.ReLU(inplace=False),
torch.nn.Conv2d(in_channels=intOutput, out_channels=intOutput, kernel_size=3, stride=1, padding=1),
torch.nn.BatchNorm2d(intOutput),
torch.nn.ReLU(inplace=False)
)
def Gen(intInput, intOutput, nc):
return torch.nn.Sequential(
torch.nn.Conv2d(in_channels=intInput, out_channels=nc, kernel_size=3, stride=1, padding=1),
torch.nn.BatchNorm2d(nc),
torch.nn.ReLU(inplace=False),
torch.nn.Conv2d(in_channels=nc, out_channels=nc, kernel_size=3, stride=1, padding=1),
torch.nn.BatchNorm2d(nc),
torch.nn.ReLU(inplace=False),
torch.nn.Conv2d(in_channels=nc, out_channels=intOutput, kernel_size=3, stride=1, padding=1),
torch.nn.Tanh()
)
def Upsample(nc, intOutput):
return torch.nn.Sequential(
torch.nn.ConvTranspose2d(in_channels = nc, out_channels=intOutput, kernel_size = 3, stride = 2, padding = 1, output_padding = 1),
torch.nn.BatchNorm2d(intOutput),
torch.nn.ReLU(inplace=False)
)
self.moduleConv = Basic(1024, 512)
self.moduleUpsample4 = Upsample(512, 512)
self.moduleDeconv3 = Basic(512, 256)
self.moduleUpsample3 = Upsample(256, 256)
self.moduleDeconv2 = Basic(256, 128)
self.moduleUpsample2 = Upsample(128, 128)
self.moduleDeconv1 = Gen(128,n_channel,64)
def forward(self, x):
tensorConv = self.moduleConv(x)
tensorUpsample4 = self.moduleUpsample4(tensorConv)
tensorDeconv3 = self.moduleDeconv3(tensorUpsample4)
tensorUpsample3 = self.moduleUpsample3(tensorDeconv3)
tensorDeconv2 = self.moduleDeconv2(tensorUpsample3)
tensorUpsample2 = self.moduleUpsample2(tensorDeconv2)
output = self.moduleDeconv1(tensorUpsample2)
return output
class convAE(torch.nn.Module):
def __init__(self, n_channel =3, t_length = 2, memory_size = 10, feature_dim = 512, key_dim = 512, temp_update = 0.1, temp_gather=0.1):
super(convAE, self).__init__()
self.encoder = Encoder(t_length, n_channel)
self.decoder = Decoder(t_length, n_channel)
self.memory = Memory(memory_size,feature_dim, key_dim, temp_update, temp_gather)
def forward(self, x, keys,train=True):
fea = self.encoder(x)
if train:
updated_fea, keys, softmax_score_query, softmax_score_memory, gathering_loss, spreading_loss = self.memory(fea, keys, train)
output = self.decoder(updated_fea)
return output, fea, updated_fea, keys, softmax_score_query, softmax_score_memory, gathering_loss, spreading_loss
#test
else:
updated_fea, keys, softmax_score_query, softmax_score_memory, gathering_loss = self.memory(fea, keys, train)
output = self.decoder(updated_fea)
return output, fea, updated_fea, keys, softmax_score_query, softmax_score_memory, gathering_loss
================================================
FILE: model/final_future_prediction_with_memory_spatial_sumonly_weight_ranking_top1.py
================================================
import numpy as np
import os
import sys
import torch
import torch.nn as nn
import torch.nn.functional as F
from .memory_final_spatial_sumonly_weight_ranking_top1 import *
class Encoder(torch.nn.Module):
def __init__(self, t_length = 5, n_channel =3):
super(Encoder, self).__init__()
def Basic(intInput, intOutput):
return torch.nn.Sequential(
torch.nn.Conv2d(in_channels=intInput, out_channels=intOutput, kernel_size=3, stride=1, padding=1),
torch.nn.BatchNorm2d(intOutput),
torch.nn.ReLU(inplace=False),
torch.nn.Conv2d(in_channels=intOutput, out_channels=intOutput, kernel_size=3, stride=1, padding=1),
torch.nn.BatchNorm2d(intOutput),
torch.nn.ReLU(inplace=False)
)
def Basic_(intInput, intOutput):
return torch.nn.Sequential(
torch.nn.Conv2d(in_channels=intInput, out_channels=intOutput, kernel_size=3, stride=1, padding=1),
torch.nn.BatchNorm2d(intOutput),
torch.nn.ReLU(inplace=False),
torch.nn.Conv2d(in_channels=intOutput, out_channels=intOutput, kernel_size=3, stride=1, padding=1),
)
self.moduleConv1 = Basic(n_channel*(t_length-1), 64)
self.modulePool1 = torch.nn.MaxPool2d(kernel_size=2, stride=2)
self.moduleConv2 = Basic(64, 128)
self.modulePool2 = torch.nn.MaxPool2d(kernel_size=2, stride=2)
self.moduleConv3 = Basic(128, 256)
self.modulePool3 = torch.nn.MaxPool2d(kernel_size=2, stride=2)
self.moduleConv4 = Basic_(256, 512)
self.moduleBatchNorm = torch.nn.BatchNorm2d(512)
self.moduleReLU = torch.nn.ReLU(inplace=False)
def forward(self, x):
tensorConv1 = self.moduleConv1(x)
tensorPool1 = self.modulePool1(tensorConv1)
tensorConv2 = self.moduleConv2(tensorPool1)
tensorPool2 = self.modulePool2(tensorConv2)
tensorConv3 = self.moduleConv3(tensorPool2)
tensorPool3 = self.modulePool3(tensorConv3)
tensorConv4 = self.moduleConv4(tensorPool3)
return tensorConv4, tensorConv1, tensorConv2, tensorConv3
class Decoder(torch.nn.Module):
def __init__(self, t_length = 5, n_channel =3):
super(Decoder, self).__init__()
def Basic(intInput, intOutput):
return torch.nn.Sequential(
torch.nn.Conv2d(in_channels=intInput, out_channels=intOutput, kernel_size=3, stride=1, padding=1),
torch.nn.BatchNorm2d(intOutput),
torch.nn.ReLU(inplace=False),
torch.nn.Conv2d(in_channels=intOutput, out_channels=intOutput, kernel_size=3, stride=1, padding=1),
torch.nn.BatchNorm2d(intOutput),
torch.nn.ReLU(inplace=False)
)
def Gen(intInput, intOutput, nc):
return torch.nn.Sequential(
torch.nn.Conv2d(in_channels=intInput, out_channels=nc, kernel_size=3, stride=1, padding=1),
torch.nn.BatchNorm2d(nc),
torch.nn.ReLU(inplace=False),
torch.nn.Conv2d(in_channels=nc, out_channels=nc, kernel_size=3, stride=1, padding=1),
torch.nn.BatchNorm2d(nc),
torch.nn.ReLU(inplace=False),
torch.nn.Conv2d(in_channels=nc, out_channels=intOutput, kernel_size=3, stride=1, padding=1),
torch.nn.Tanh()
)
def Upsample(nc, intOutput):
return torch.nn.Sequential(
torch.nn.ConvTranspose2d(in_channels = nc, out_channels=intOutput, kernel_size = 3, stride = 2, padding = 1, output_padding = 1),
torch.nn.BatchNorm2d(intOutput),
torch.nn.ReLU(inplace=False)
)
self.moduleConv = Basic(1024, 512)
self.moduleUpsample4 = Upsample(512, 256)
self.moduleDeconv3 = Basic(512, 256)
self.moduleUpsample3 = Upsample(256, 128)
self.moduleDeconv2 = Basic(256, 128)
self.moduleUpsample2 = Upsample(128, 64)
self.moduleDeconv1 = Gen(128,n_channel,64)
def forward(self, x, skip1, skip2, skip3):
tensorConv = self.moduleConv(x)
tensorUpsample4 = self.moduleUpsample4(tensorConv)
cat4 = torch.cat((skip3, tensorUpsample4), dim = 1)
tensorDeconv3 = self.moduleDeconv3(cat4)
tensorUpsample3 = self.moduleUpsample3(tensorDeconv3)
cat3 = torch.cat((skip2, tensorUpsample3), dim = 1)
tensorDeconv2 = self.moduleDeconv2(cat3)
tensorUpsample2 = self.moduleUpsample2(tensorDeconv2)
cat2 = torch.cat((skip1, tensorUpsample2), dim = 1)
output = self.moduleDeconv1(cat2)
return output
class convAE(torch.nn.Module):
def __init__(self, n_channel =3, t_length = 5, memory_size = 10, feature_dim = 512, key_dim = 512, temp_update = 0.1, temp_gather=0.1):
super(convAE, self).__init__()
self.encoder = Encoder(t_length, n_channel)
self.decoder = Decoder(t_length, n_channel)
self.memory = Memory(memory_size,feature_dim, key_dim, temp_update, temp_gather)
def forward(self, x, keys,train=True):
fea, skip1, skip2, skip3 = self.encoder(x)
if train:
updated_fea, keys, softmax_score_query, softmax_score_memory, separateness_loss, compactness_loss = self.memory(fea, keys, train)
output = self.decoder(updated_fea, skip1, skip2, skip3)
return output, fea, updated_fea, keys, softmax_score_query, softmax_score_memory, separateness_loss, compactness_loss
#test
else:
updated_fea, keys, softmax_score_query, softmax_score_memory,query, top1_keys, keys_ind, compactness_loss = self.memory(fea, keys, train)
output = self.decoder(updated_fea, skip1, skip2, skip3)
return output, fea, updated_fea, keys, softmax_score_query, softmax_score_memory, query, top1_keys, keys_ind, compactness_loss
================================================
FILE: model/memory_final_spatial_sumonly_weight_ranking_top1.py
================================================
import torch
import torch.autograd as ag
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import math
import functools
import random
from torch.nn import functional as F
def random_uniform(shape, low, high, cuda):
x = torch.rand(*shape)
result_cpu = (high - low) * x + low
if cuda:
return result_cpu.cuda()
else:
return result_cpu
def distance(a, b):
return torch.sqrt(((a - b) ** 2).sum()).unsqueeze(0)
def distance_batch(a, b):
bs, _ = a.shape
result = distance(a[0], b)
for i in range(bs-1):
result = torch.cat((result, distance(a[i], b)), 0)
return result
def multiply(x): #to flatten matrix into a vector
return functools.reduce(lambda x,y: x*y, x, 1)
def flatten(x):
""" Flatten matrix into a vector """
count = multiply(x.size())
return x.resize_(count)
def index(batch_size, x):
idx = torch.arange(0, batch_size).long()
idx = torch.unsqueeze(idx, -1)
return torch.cat((idx, x), dim=1)
def MemoryLoss(memory):
m, d = memory.size()
memory_t = torch.t(memory)
similarity = (torch.matmul(memory, memory_t))/2 + 1/2 # 30X30
identity_mask = torch.eye(m).cuda()
sim = torch.abs(similarity - identity_mask)
return torch.sum(sim)/(m*(m-1))
class Memory(nn.Module):
def __init__(self, memory_size, feature_dim, key_dim, temp_update, temp_gather):
super(Memory, self).__init__()
# Constants
self.memory_size = memory_size
self.feature_dim = feature_dim
self.key_dim = key_dim
self.temp_update = temp_update
self.temp_gather = temp_gather
def hard_neg_mem(self, mem, i):
similarity = torch.matmul(mem,torch.t(self.keys_var))
similarity[:,i] = -1
_, max_idx = torch.topk(similarity, 1, dim=1)
return self.keys_var[max_idx]
def random_pick_memory(self, mem, max_indices):
m, d = mem.size()
output = []
for i in range(m):
flattened_indices = (max_indices==i).nonzero()
a, _ = flattened_indices.size()
if a != 0:
number = np.random.choice(a, 1)
output.append(flattened_indices[number, 0])
else:
output.append(-1)
return torch.tensor(output)
def get_update_query(self, mem, max_indices, update_indices, score, query, train):
m, d = mem.size()
if train:
query_update = torch.zeros((m,d)).cuda()
# random_update = torch.zeros((m,d)).cuda()
for i in range(m):
idx = torch.nonzero(max_indices.squeeze(1)==i)
a, _ = idx.size()
if a != 0:
query_update[i] = torch.sum(((score[idx,i] / torch.max(score[:,i])) *query[idx].squeeze(1)), dim=0)
else:
query_update[i] = 0
return query_update
else:
query_update = torch.zeros((m,d)).cuda()
for i in range(m):
idx = torch.nonzero(max_indices.squeeze(1)==i)
a, _ = idx.size()
if a != 0:
query_update[i] = torch.sum(((score[idx,i] / torch.max(score[:,i])) *query[idx].squeeze(1)), dim=0)
else:
query_update[i] = 0
return query_update
def get_score(self, mem, query):
bs, h,w,d = query.size()
m, d = mem.size()
score = torch.matmul(query, torch.t(mem))# b X h X w X m
score = score.view(bs*h*w, m)# (b X h X w) X m
score_query = F.softmax(score, dim=0)
score_memory = F.softmax(score,dim=1)
return score_query, score_memory
def forward(self, query, keys, train=True):
batch_size, dims,h,w = query.size() # b X d X h X w
query = F.normalize(query, dim=1)
query = query.permute(0,2,3,1) # b X h X w X d
#train
if train:
#losses
separateness_loss, compactness_loss = self.gather_loss(query,keys, train)
# read
updated_query, softmax_score_query,softmax_score_memory = self.read(query, keys)
#update
updated_memory = self.update(query, keys, train)
return updated_query, updated_memory, softmax_score_query, softmax_score_memory, separateness_loss, compactness_loss
#test
else:
# loss
compactness_loss, query_re, top1_keys, keys_ind = self.gather_loss(query,keys, train)
# read
updated_query, softmax_score_query,softmax_score_memory = self.read(query, keys)
#update
updated_memory = keys
return updated_query, updated_memory, softmax_score_query, softmax_score_memory, query_re, top1_keys,keys_ind, compactness_loss
def update(self, query, keys,train):
batch_size, h,w,dims = query.size() # b X h X w X d
softmax_score_query, softmax_score_memory = self.get_score(keys, query)
query_reshape = query.contiguous().view(batch_size*h*w, dims)
_, gathering_indices = torch.topk(softmax_score_memory, 1, dim=1)
_, updating_indices = torch.topk(softmax_score_query, 1, dim=0)
if train:
query_update = self.get_update_query(keys, gathering_indices, updating_indices, softmax_score_query, query_reshape,train)
updated_memory = F.normalize(query_update + keys, dim=1)
else:
query_update = self.get_update_query(keys, gathering_indices, updating_indices, softmax_score_query, query_reshape, train)
updated_memory = F.normalize(query_update + keys, dim=1)
return updated_memory.detach()
def pointwise_gather_loss(self, query_reshape, keys, gathering_indices, train):
n,dims = query_reshape.size() # (b X h X w) X d
loss_mse = torch.nn.MSELoss(reduction='none')
pointwise_loss = loss_mse(query_reshape, keys[gathering_indices].squeeze(1).detach())
return pointwise_loss
def gather_loss(self,query, keys, train):
batch_size, h,w,dims = query.size() # b X h X w X d
if train:
loss = torch.nn.TripletMarginLoss(margin=1.0)
loss_mse = torch.nn.MSELoss()
softmax_score_query, softmax_score_memory = self.get_score(keys, query)
query_reshape = query.contiguous().view(batch_size*h*w, dims)
_, gathering_indices = torch.topk(softmax_score_memory, 2, dim=1)
#1st, 2nd closest memories
pos = keys[gathering_indices[:,0]]
neg = keys[gathering_indices[:,1]]
top1_loss = loss_mse(query_reshape, pos.detach())
gathering_loss = loss(query_reshape,pos.detach(), neg.detach())
return gathering_loss, top1_loss
else:
loss_mse = torch.nn.MSELoss()
softmax_score_query, softmax_score_memory = self.get_score(keys, query)
query_reshape = query.contiguous().view(batch_size*h*w, dims)
_, gathering_indices = torch.topk(softmax_score_memory, 1, dim=1)
gathering_loss = loss_mse(query_reshape, keys[gathering_indices].squeeze(1).detach())
return gathering_loss, query_reshape, keys[gathering_indices].squeeze(1).detach(), gathering_indices[:,0]
def read(self, query, updated_memory):
batch_size, h,w,dims = query.size() # b X h X w X d
softmax_score_query, softmax_score_memory = self.get_score(updated_memory, query)
query_reshape = query.contiguous().view(batch_size*h*w, dims)
concat_memory = torch.matmul(softmax_score_memory.detach(), updated_memory) # (b X h X w) X d
updated_query = torch.cat((query_reshape, concat_memory), dim = 1) # (b X h X w) X 2d
updated_query = updated_query.view(batch_size, h, w, 2*dims)
updated_query = updated_query.permute(0,3,1,2)
return updated_query, softmax_score_query, softmax_score_memory
================================================
FILE: model/utils.py
================================================
import numpy as np
from collections import OrderedDict
import os
import glob
import cv2
import torch.utils.data as data
rng = np.random.RandomState(2020)
def np_load_frame(filename, resize_height, resize_width):
"""
Load image path and convert it to numpy.ndarray. Notes that the color channels are BGR and the color space
is normalized from [0, 255] to [-1, 1].
:param filename: the full path of image
:param resize_height: resized height
:param resize_width: resized width
:return: numpy.ndarray
"""
image_decoded = cv2.imread(filename)
image_resized = cv2.resize(image_decoded, (resize_width, resize_height))
image_resized = image_resized.astype(dtype=np.float32)
image_resized = (image_resized / 127.5) - 1.0
return image_resized
class DataLoader(data.Dataset):
def __init__(self, video_folder, transform, resize_height, resize_width, time_step=4, num_pred=1):
self.dir = video_folder
self.transform = transform
self.videos = OrderedDict()
self._resize_height = resize_height
self._resize_width = resize_width
self._time_step = time_step
self._num_pred = num_pred
self.setup()
self.samples = self.get_all_samples()
def setup(self):
videos = glob.glob(os.path.join(self.dir, '*'))
for video in sorted(videos):
video_name = video.split('/')[-1]
self.videos[video_name] = {}
self.videos[video_name]['path'] = video
self.videos[video_name]['frame'] = glob.glob(os.path.join(video, '*.jpg'))
self.videos[video_name]['frame'].sort()
self.videos[video_name]['length'] = len(self.videos[video_name]['frame'])
def get_all_samples(self):
frames = []
videos = glob.glob(os.path.join(self.dir, '*'))
for video in sorted(videos):
video_name = video.split('/')[-1]
for i in range(len(self.videos[video_name]['frame'])-self._time_step):
frames.append(self.videos[video_name]['frame'][i])
return frames
def __getitem__(self, index):
video_name = self.samples[index].split('/')[-2]
frame_name = int(self.samples[index].split('/')[-1].split('.')[-2])
batch = []
for i in range(self._time_step+self._num_pred):
image = np_load_frame(self.videos[video_name]['frame'][frame_name+i], self._resize_height, self._resize_width)
if self.transform is not None:
batch.append(self.transform(image))
return np.concatenate(batch, axis=0)
def __len__(self):
return len(self.samples)
================================================
FILE: utils.py
================================================
import numpy as np
import os
import sys
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.utils as v_utils
import matplotlib.pyplot as plt
import cv2
import math
from collections import OrderedDict
import copy
import time
from sklearn.metrics import roc_auc_score
def rmse(predictions, targets):
return np.sqrt(((predictions - targets) ** 2).mean())
def psnr(mse):
return 10 * math.log10(1 / mse)
def get_lr(optimizer):
for param_group in optimizer.param_groups:
return param_group['lr']
def normalize_img(img):
img_re = copy.copy(img)
img_re = (img_re - np.min(img_re)) / (np.max(img_re) - np.min(img_re))
return img_re
def point_score(outputs, imgs):
loss_func_mse = nn.MSELoss(reduction='none')
error = loss_func_mse((outputs[0]+1)/2,(imgs[0]+1)/2)
normal = (1-torch.exp(-error))
score = (torch.sum(normal*loss_func_mse((outputs[0]+1)/2,(imgs[0]+1)/2)) / torch.sum(normal)).item()
return score
def anomaly_score(psnr, max_psnr, min_psnr):
return ((psnr - min_psnr) / (max_psnr-min_psnr))
def anomaly_score_inv(psnr, max_psnr, min_psnr):
return (1.0 - ((psnr - min_psnr) / (max_psnr-min_psnr)))
def anomaly_score_list(psnr_list):
anomaly_score_list = list()
for i in range(len(psnr_list)):
anomaly_score_list.append(anomaly_score(psnr_list[i], np.max(psnr_list), np.min(psnr_list)))
return anomaly_score_list
def anomaly_score_list_inv(psnr_list):
anomaly_score_list = list()
for i in range(len(psnr_list)):
anomaly_score_list.append(anomaly_score_inv(psnr_list[i], np.max(psnr_list), np.min(psnr_list)))
return anomaly_score_list
def AUC(anomal_scores, labels):
frame_auc = roc_auc_score(y_true=np.squeeze(labels, axis=0), y_score=np.squeeze(anomal_scores))
return frame_auc
def score_sum(list1, list2, alpha):
list_result = []
for i in range(len(list1)):
list_result.append((alpha*list1[i]+(1-alpha)*list2[i]))
return list_result