Repository: cvlab-yonsei/MNAD Branch: master Commit: 4e108a898605 Files: 14 Total size: 58.8 KB Directory structure: gitextract_9fesjy9r/ ├── Evaluate.py ├── MNAD_files/ │ └── style.css ├── README.md ├── Train.py ├── data/ │ ├── data_seqkey_all.py │ ├── frame_labels_avenue.npy │ ├── frame_labels_ped2.npy │ └── frame_labels_shanghai.npy ├── model/ │ ├── Memory.py │ ├── Reconstruction.py │ ├── final_future_prediction_with_memory_spatial_sumonly_weight_ranking_top1.py │ ├── memory_final_spatial_sumonly_weight_ranking_top1.py │ └── utils.py └── utils.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: Evaluate.py ================================================ import numpy as np import os import sys import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim import torchvision import torch.nn.init as init import torch.utils.data as data import torch.utils.data.dataset as dataset import torchvision.datasets as dset import torchvision.transforms as transforms from torch.autograd import Variable import torchvision.utils as v_utils import matplotlib.pyplot as plt import cv2 import math from collections import OrderedDict import copy import time from model.utils import DataLoader from model.final_future_prediction_with_memory_spatial_sumonly_weight_ranking_top1 import * from model.Reconstruction import * from sklearn.metrics import roc_auc_score from utils import * import random import glob import argparse parser = argparse.ArgumentParser(description="MNAD") parser.add_argument('--gpus', nargs='+', type=str, help='gpus') parser.add_argument('--batch_size', type=int, default=4, help='batch size for training') parser.add_argument('--test_batch_size', type=int, default=1, help='batch size for test') parser.add_argument('--h', type=int, default=256, help='height of input images') parser.add_argument('--w', type=int, default=256, help='width of input images') parser.add_argument('--c', type=int, default=3, help='channel of input images') parser.add_argument('--method', type=str, default='pred', help='The target task for anoamly detection') parser.add_argument('--t_length', type=int, default=5, help='length of the frame sequences') parser.add_argument('--fdim', type=int, default=512, help='channel dimension of the features') parser.add_argument('--mdim', type=int, default=512, help='channel dimension of the memory items') parser.add_argument('--msize', type=int, default=10, help='number of the memory items') parser.add_argument('--alpha', type=float, default=0.6, help='weight for the anomality score') parser.add_argument('--th', type=float, default=0.01, help='threshold for test updating') parser.add_argument('--num_workers', type=int, default=2, help='number of workers for the train loader') parser.add_argument('--num_workers_test', type=int, default=1, help='number of workers for the test loader') parser.add_argument('--dataset_type', type=str, default='ped2', help='type of dataset: ped2, avenue, shanghai') parser.add_argument('--dataset_path', type=str, default='./dataset', help='directory of data') parser.add_argument('--model_dir', type=str, help='directory of model') parser.add_argument('--m_items_dir', type=str, help='directory of model') args = parser.parse_args() os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" if args.gpus is None: gpus = "0" os.environ["CUDA_VISIBLE_DEVICES"]= gpus else: gpus = "" for i in range(len(args.gpus)): gpus = gpus + args.gpus[i] + "," os.environ["CUDA_VISIBLE_DEVICES"]= gpus[:-1] torch.backends.cudnn.enabled = True # make sure to use cudnn for computational performance test_folder = args.dataset_path+"/"+args.dataset_type+"/testing/frames" # Loading dataset test_dataset = DataLoader(test_folder, transforms.Compose([ transforms.ToTensor(), ]), resize_height=args.h, resize_width=args.w, time_step=args.t_length-1) test_size = len(test_dataset) test_batch = data.DataLoader(test_dataset, batch_size = args.test_batch_size, shuffle=False, num_workers=args.num_workers_test, drop_last=False) loss_func_mse = nn.MSELoss(reduction='none') # Loading the trained model model = torch.load(args.model_dir) model.cuda() m_items = torch.load(args.m_items_dir) labels = np.load('./data/frame_labels_'+args.dataset_type+'.npy') videos = OrderedDict() videos_list = sorted(glob.glob(os.path.join(test_folder, '*'))) for video in videos_list: video_name = video.split('/')[-1] videos[video_name] = {} videos[video_name]['path'] = video videos[video_name]['frame'] = glob.glob(os.path.join(video, '*.jpg')) videos[video_name]['frame'].sort() videos[video_name]['length'] = len(videos[video_name]['frame']) labels_list = [] label_length = 0 psnr_list = {} feature_distance_list = {} print('Evaluation of', args.dataset_type) # Setting for video anomaly detection for video in sorted(videos_list): video_name = video.split('/')[-1] if args.method == 'pred': labels_list = np.append(labels_list, labels[0][4+label_length:videos[video_name]['length']+label_length]) else: labels_list = np.append(labels_list, labels[0][label_length:videos[video_name]['length']+label_length]) label_length += videos[video_name]['length'] psnr_list[video_name] = [] feature_distance_list[video_name] = [] label_length = 0 video_num = 0 label_length += videos[videos_list[video_num].split('/')[-1]]['length'] m_items_test = m_items.clone() model.eval() for k,(imgs) in enumerate(test_batch): if args.method == 'pred': if k == label_length-4*(video_num+1): video_num += 1 label_length += videos[videos_list[video_num].split('/')[-1]]['length'] else: if k == label_length: video_num += 1 label_length += videos[videos_list[video_num].split('/')[-1]]['length'] imgs = Variable(imgs).cuda() if args.method == 'pred': outputs, feas, updated_feas, m_items_test, softmax_score_query, softmax_score_memory, _, _, _, compactness_loss = model.forward(imgs[:,0:3*4], m_items_test, False) mse_imgs = torch.mean(loss_func_mse((outputs[0]+1)/2, (imgs[0,3*4:]+1)/2)).item() mse_feas = compactness_loss.item() # Calculating the threshold for updating at the test time point_sc = point_score(outputs, imgs[:,3*4:]) else: outputs, feas, updated_feas, m_items_test, softmax_score_query, softmax_score_memory, compactness_loss = model.forward(imgs, m_items_test, False) mse_imgs = torch.mean(loss_func_mse((outputs[0]+1)/2, (imgs[0]+1)/2)).item() mse_feas = compactness_loss.item() # Calculating the threshold for updating at the test time point_sc = point_score(outputs, imgs) if point_sc < args.th: query = F.normalize(feas, dim=1) query = query.permute(0,2,3,1) # b X h X w X d m_items_test = model.memory.update(query, m_items_test, False) psnr_list[videos_list[video_num].split('/')[-1]].append(psnr(mse_imgs)) feature_distance_list[videos_list[video_num].split('/')[-1]].append(mse_feas) # Measuring the abnormality score and the AUC anomaly_score_total_list = [] for video in sorted(videos_list): video_name = video.split('/')[-1] anomaly_score_total_list += score_sum(anomaly_score_list(psnr_list[video_name]), anomaly_score_list_inv(feature_distance_list[video_name]), args.alpha) anomaly_score_total_list = np.asarray(anomaly_score_total_list) accuracy = AUC(anomaly_score_total_list, np.expand_dims(1-labels_list, 0)) print('The result of ', args.dataset_type) print('AUC: ', accuracy*100, '%') ================================================ FILE: MNAD_files/style.css ================================================ /* Space out content a bit */ @import url('https://fonts.googleapis.com/css?family=Baloo|Bungee+Inline|Lato|Righteous|Shojumaru'); body { padding-top: 20px; padding-bottom: 20px; font-family: 'Lato', cursive; font-size: 14px; } /* Everything but the jumbotron gets side spacing for mobile first views */ .header, .row, .footer { padding-left: 15px; padding-right: 15px; } /* Custom page header */ .header { border-bottom: 1px solid #e5e5e5; } /* Make the masthead heading the same height as the navigation */ .header h1 { margin-top: 0; margin-bottom: 0; line-height: 40px; padding-bottom: 19px; font-size: 30px; font-weight: bold; } .header h3 { margin-top: 0; margin-bottom: 0; line-height: 40px; padding-bottom: 19px; font-size: 20px; } .header h4 { font-family: 'Baloo', cursive; } /* Custom page footer */ .footer { padding-top: 19px; color: #777; border-top: 1px solid #e5e5e5; } /* Customize container */ @media (min-width: 938px) { .container { max-width: 900px; } } .container-narrow > hr { margin: 20px 0; } /* Main marketing message and sign up button */ .container .jumbotron { text-align: center; border-bottom: 1px solid #e5e5e5; padding-left: 20px; padding: 30px; } .jumbotron .btn { font-size: 21px; padding: 14px 24px; } .row p + h3 { margin-top: 28px; } div.row h3 { padding-bottom: 5px; border-bottom: 1px solid #ccc; } /* Responsive: Portrait tablets and up */ @media screen and (min-width: 938px) { /* Remove the padding we set earlier */ .header, .marketing, .footer { padding-left: 0; padding-right: 0; } /* Space out the masthead */ .header { margin-bottom: 30px; } /* Remove the bottom border on the jumbotron for visual effect */ .jumbotron { border-bottom: 0; } } .readme h1 { display: none; } .left_column{ float:middle; } .right_column{ float:middle; } ================================================ FILE: README.md ================================================ # PyTorch implementation of "Learning Memory-guided Normality for Anomaly Detection"

no_imageno_image

This is the implementation of the paper "Learning Memory-guided Normality for Anomaly Detection (CVPR 2020)". For more information, checkout the project site [[website](https://cvlab.yonsei.ac.kr/projects/MNAD/)] and the paper [[PDF](http://openaccess.thecvf.com/content_CVPR_2020/papers/Park_Learning_Memory-Guided_Normality_for_Anomaly_Detection_CVPR_2020_paper.pdf)]. ## Dependencies * Python 3.6 * PyTorch 1.1.0 * Numpy * Sklearn ## Datasets * USCD Ped2 [[dataset](https://github.com/StevenLiuWen/ano_pred_cvpr2018)] * CUHK Avenue [[dataset](https://github.com/StevenLiuWen/ano_pred_cvpr2018)] * ShanghaiTech [[dataset](https://github.com/StevenLiuWen/ano_pred_cvpr2018)] These datasets are from an official github of "Future Frame Prediction for Anomaly Detection - A New Baseline (CVPR 2018)". Download the datasets into ``dataset`` folder, like ``./dataset/ped2/`` ## Update * 02/04/21: We uploaded the codes based on reconstruction method, and pretrained wieghts for Ped2 reconstruction, Avenue prediction and Avenue reconstruction. ## Training * ~~The training and testing codes are based on prediction method~~ * Now you can implemnet the codes based on both prediction and reconstruction methods. * The codes are basically based on the prediction method, and you can easily implement this as ```bash git clone https://github.com/cvlab-yonsei/projects cd projects/MNAD/code python Train.py # for training ``` * You can freely define parameters with your own settings like ```bash python Train.py --gpus 1 --dataset_path 'your_dataset_directory' --dataset_type avenue --exp_dir 'your_log_directory' ``` * For the reconstruction task, you need to newly set the parameters, *e.g,*, the target task, the weights of the losses and the number of the time sequence. ```bash python Train.py --method recon --loss_compact 0.01 --loss_separate 0.01 --t_length 1 # for training ``` ## Evaluation * Test your own model * Check your dataset_type (ped2, avenue or shanghai) ```bash python Evaluate.py --dataset_type ped2 --model_dir your_model.pth --m_items_dir your_m_items.pt ``` * For the reconstruction task, you need to set the parameters as ```bash python Evaluate.py --method recon --t_length 1 --alpha 0.7 --th 0.015 --dataset_type ped2 --model_dir your_model.pth --m_items_dir your_m_items.pt ``` * Test the model with our pre-trained model and memory items ```bash python Evaluate.py --dataset_type ped2 --model_dir pretrained_model.pth --m_items_dir m_items.pt ``` ## Pre-trained model and memory items Will be released soon. ## Bibtex ``` @inproceedings{park2020learning, title={Learning Memory-guided Normality for Anomaly Detection}, author={Park, Hyunjong and Noh, Jongyoun and Ham, Bumsub}, booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, pages={14372--14381}, year={2020} } ``` ================================================ FILE: Train.py ================================================ import numpy as np import os import sys import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim import torchvision import torch.nn.init as init import torch.utils.data as data import torch.utils.data.dataset as dataset import torchvision.datasets as dset import torchvision.transforms as transforms from torch.autograd import Variable import torchvision.utils as v_utils import matplotlib.pyplot as plt import cv2 import math from collections import OrderedDict import copy import time from model.utils import DataLoader from sklearn.metrics import roc_auc_score from utils import * import random import argparse parser = argparse.ArgumentParser(description="MNAD") parser.add_argument('--gpus', nargs='+', type=str, help='gpus') parser.add_argument('--batch_size', type=int, default=4, help='batch size for training') parser.add_argument('--test_batch_size', type=int, default=1, help='batch size for test') parser.add_argument('--epochs', type=int, default=60, help='number of epochs for training') parser.add_argument('--loss_compact', type=float, default=0.1, help='weight of the feature compactness loss') parser.add_argument('--loss_separate', type=float, default=0.1, help='weight of the feature separateness loss') parser.add_argument('--h', type=int, default=256, help='height of input images') parser.add_argument('--w', type=int, default=256, help='width of input images') parser.add_argument('--c', type=int, default=3, help='channel of input images') parser.add_argument('--lr', type=float, default=2e-4, help='initial learning rate') parser.add_argument('--method', type=str, default='pred', help='The target task for anoamly detection') parser.add_argument('--t_length', type=int, default=5, help='length of the frame sequences') parser.add_argument('--fdim', type=int, default=512, help='channel dimension of the features') parser.add_argument('--mdim', type=int, default=512, help='channel dimension of the memory items') parser.add_argument('--msize', type=int, default=10, help='number of the memory items') parser.add_argument('--num_workers', type=int, default=2, help='number of workers for the train loader') parser.add_argument('--num_workers_test', type=int, default=1, help='number of workers for the test loader') parser.add_argument('--dataset_type', type=str, default='ped2', help='type of dataset: ped2, avenue, shanghai') parser.add_argument('--dataset_path', type=str, default='./dataset', help='directory of data') parser.add_argument('--exp_dir', type=str, default='log', help='directory of log') args = parser.parse_args() os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" if args.gpus is None: gpus = "0" os.environ["CUDA_VISIBLE_DEVICES"]= gpus else: gpus = "" for i in range(len(args.gpus)): gpus = gpus + args.gpus[i] + "," os.environ["CUDA_VISIBLE_DEVICES"]= gpus[:-1] torch.backends.cudnn.enabled = True # make sure to use cudnn for computational performance train_folder = args.dataset_path+"/"+args.dataset_type+"/training/frames" test_folder = args.dataset_path+"/"+args.dataset_type+"/testing/frames" # Loading dataset train_dataset = DataLoader(train_folder, transforms.Compose([ transforms.ToTensor(), ]), resize_height=args.h, resize_width=args.w, time_step=args.t_length-1) test_dataset = DataLoader(test_folder, transforms.Compose([ transforms.ToTensor(), ]), resize_height=args.h, resize_width=args.w, time_step=args.t_length-1) train_size = len(train_dataset) test_size = len(test_dataset) train_batch = data.DataLoader(train_dataset, batch_size = args.batch_size, shuffle=True, num_workers=args.num_workers, drop_last=True) test_batch = data.DataLoader(test_dataset, batch_size = args.test_batch_size, shuffle=False, num_workers=args.num_workers_test, drop_last=False) # Model setting assert args.method == 'pred' or args.method == 'recon', 'Wrong task name' if args.method == 'pred': from model.final_future_prediction_with_memory_spatial_sumonly_weight_ranking_top1 import * model = convAE(args.c, args.t_length, args.msize, args.fdim, args.mdim) else: from model.Reconstruction import * model = convAE(args.c, memory_size = args.msize, feature_dim = args.fdim, key_dim = args.mdim) params_encoder = list(model.encoder.parameters()) params_decoder = list(model.decoder.parameters()) params = params_encoder + params_decoder optimizer = torch.optim.Adam(params, lr = args.lr) scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer,T_max =args.epochs) model.cuda() # Report the training process log_dir = os.path.join('./exp', args.dataset_type, args.method, args.exp_dir) if not os.path.exists(log_dir): os.makedirs(log_dir) orig_stdout = sys.stdout f = open(os.path.join(log_dir, 'log.txt'),'w') sys.stdout= f loss_func_mse = nn.MSELoss(reduction='none') # Training m_items = F.normalize(torch.rand((args.msize, args.mdim), dtype=torch.float), dim=1).cuda() # Initialize the memory items for epoch in range(args.epochs): labels_list = [] model.train() start = time.time() for j,(imgs) in enumerate(train_batch): imgs = Variable(imgs).cuda() if args.method == 'pred': outputs, _, _, m_items, softmax_score_query, softmax_score_memory, separateness_loss, compactness_loss = model.forward(imgs[:,0:12], m_items, True) else: outputs, _, _, m_items, softmax_score_query, softmax_score_memory, separateness_loss, compactness_loss = model.forward(imgs, m_items, True) optimizer.zero_grad() if args.method == 'pred': loss_pixel = torch.mean(loss_func_mse(outputs, imgs[:,12:])) else: loss_pixel = torch.mean(loss_func_mse(outputs, imgs)) loss = loss_pixel + args.loss_compact * compactness_loss + args.loss_separate * separateness_loss loss.backward(retain_graph=True) optimizer.step() scheduler.step() print('----------------------------------------') print('Epoch:', epoch+1) if args.method == 'pred': print('Loss: Prediction {:.6f}/ Compactness {:.6f}/ Separateness {:.6f}'.format(loss_pixel.item(), compactness_loss.item(), separateness_loss.item())) else: print('Loss: Reconstruction {:.6f}/ Compactness {:.6f}/ Separateness {:.6f}'.format(loss_pixel.item(), compactness_loss.item(), separateness_loss.item())) print('Memory_items:') print(m_items) print('----------------------------------------') print('Training is finished') # Save the model and the memory items torch.save(model, os.path.join(log_dir, 'model.pth')) torch.save(m_items, os.path.join(log_dir, 'keys.pt')) sys.stdout = orig_stdout f.close() ================================================ FILE: data/data_seqkey_all.py ================================================ import numpy as np import os import torch import torch.utils.data as data import torchvision.transforms as transforms from torch.utils.data import DataLoader from PIL import Image import os.path import sys def pil_loader(path): # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835) with open(path, 'rb') as f: img = Image.open(f) return img.convert('RGB') def accimage_loader(path): import accimage try: return accimage.Image(path) except IOError: # Potentially a decoding problem, fall back to PIL.Image return pil_loader(path) def default_loader(path): from torchvision import get_image_backend if get_image_backend() == 'accimage': return accimage_loader(path) else: return pil_loader(path) def make_dataset(dir, class_to_idx): frames = [] print(sorted(class_to_idx.keys())) dir = os.path.expanduser(dir) for target in sorted(class_to_idx.keys()): print(target) d = os.path.join(dir, target) if not os.path.isdir(d): continue # new_fnames = [] for root, _, fnames in sorted(os.walk(d)): for fname in sorted(fnames): # fname = fname.split('.')[0] # seq = fname.split('_')[0][1:] # fname = fname.split('_')[1] # fname = fname.zfill(4) # new_fnames.append('V'+seq+'_'+fname+'.png') path = os.path.join(root, fname) frames.append(path) return frames class DatasetFolder(data.Dataset): def __init__(self, root, loader=default_loader,transform=None, target_transform=None, length=5): classes, class_to_idx = self._find_classes(root) samples = make_dataset(root, class_to_idx) if len(samples) == 0: raise(RuntimeError("Found 0 files in subfolders of: " + root)) self.root = root self.loader = loader self.length = length # self.stride = np.random.choice(3,1) + 1 self.classes = classes self.class_to_idx = class_to_idx # self.samples_gt = samples[self.length:] self.samples = samples[:-(self.length-1)] self.samples_all = samples self.samples_pool = samples[1:] # self.targets = [s[1] for s in samples] self.transform = transform self.target_transform = target_transform def _find_classes(self, dir): """ Finds the class folders in a dataset. Args: dir (string): Root directory path. Returns: tuple: (classes, class_to_idx) where classes are relative to (dir), and class_to_idx is a dictionary. Ensures: No class is a subdirectory of another. """ if sys.version_info >= (3, 5): # Faster and available in Python 3.5 and above classes = [d.name for d in os.scandir(dir) if d.is_dir()] else: classes = [d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d))] classes.sort() class_to_idx = {classes[i]: i for i in range(len(classes))} return classes, class_to_idx def __getitem__(self, index): """ Args: index (int): Index Returns: tuple: (samples, gt(+length)) """ sample = [] path_start = self.samples[index] sample_start = self.loader(path_start) if self.transform is not None: sample_start = self.transform(sample_start) sample.append(sample_start) for i in range(self.length - 1): path = self.samples_all[index + (i+1)] sample_immediate = self.loader(path) if self.transform is not None: sample_immediate = self.transform(sample_immediate) sample.append(sample_immediate) # path_gt = self.samples_gt[index] # sample_gt = self.loader(path_gt) # if self.transform is not None: # sample_gt = self.transform(sample_gt) sample_input = sample[0] for i in range(self.length-1): sample_input = torch.cat((sample_input,sample[i+1]), dim=0) return sample_input def _stride(self): stride = int(np.random.choice(3,1) + 1) #if stride != 1: # self.samples_gt = self.samples_all[self.length*stride:] # self.samples = self.samples_all[:-(self.length*stride)] return stride def __len__(self): return len(self.samples) def __repr__(self): fmt_str = 'Dataset ' + self.__class__.__name__ + '\n' fmt_str += ' Number of datapoints: {}\n'.format(self.__len__()) fmt_str += ' Root Location: {}\n'.format(self.root) tmp = ' Transforms (if any): ' fmt_str += '{0}{1}\n'.format(tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp))) tmp = ' Target Transforms (if any): ' fmt_str += '{0}{1}'.format(tmp, self.target_transform.__repr__().replace('\n', '\n' + ' ' * len(tmp))) return fmt_str IMG_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif', '.tiff', 'webp'] class ImageFolder(DatasetFolder): def __init__(self, root, transform=None, target_transform=None, loader=default_loader, length=5): super(ImageFolder, self).__init__(root, loader, transform=transform, target_transform=target_transform) self.imgs = self.samples ================================================ FILE: model/Memory.py ================================================ import torch import torch.autograd as ag import torch.nn as nn import torch.nn.functional as F import numpy as np import math import functools import random from torch.nn import functional as F def random_uniform(shape, low, high, cuda): x = torch.rand(*shape) result_cpu = (high - low) * x + low if cuda: return result_cpu.cuda() else: return result_cpu def distance(a, b): return torch.sqrt(((a - b) ** 2).sum()).unsqueeze(0) def distance_batch(a, b): bs, _ = a.shape result = distance(a[0], b) for i in range(bs-1): result = torch.cat((result, distance(a[i], b)), 0) return result def multiply(x): #to flatten matrix into a vector return functools.reduce(lambda x,y: x*y, x, 1) def flatten(x): """ Flatten matrix into a vector """ count = multiply(x.size()) return x.resize_(count) def index(batch_size, x): idx = torch.arange(0, batch_size).long() idx = torch.unsqueeze(idx, -1) return torch.cat((idx, x), dim=1) def MemoryLoss(memory): m, d = memory.size() memory_t = torch.t(memory) similarity = (torch.matmul(memory, memory_t))/2 + 1/2 # 30X30 identity_mask = torch.eye(m).cuda() sim = torch.abs(similarity - identity_mask) return torch.sum(sim)/(m*(m-1)) class Memory(nn.Module): def __init__(self, memory_size, feature_dim, key_dim, temp_update, temp_gather): super(Memory, self).__init__() # Constants self.memory_size = memory_size self.feature_dim = feature_dim self.key_dim = key_dim self.temp_update = temp_update self.temp_gather = temp_gather def hard_neg_mem(self, mem, i): similarity = torch.matmul(mem,torch.t(self.keys_var)) similarity[:,i] = -1 _, max_idx = torch.topk(similarity, 1, dim=1) return self.keys_var[max_idx] def random_pick_memory(self, mem, max_indices): m, d = mem.size() output = [] for i in range(m): flattened_indices = (max_indices==i).nonzero() a, _ = flattened_indices.size() if a != 0: number = np.random.choice(a, 1) output.append(flattened_indices[number, 0]) else: output.append(-1) return torch.tensor(output) def get_update_query(self, mem, max_indices, update_indices, score, query, train): m, d = mem.size() if train: query_update = torch.zeros((m,d)).cuda() random_update = torch.zeros((m,d)).cuda() for i in range(m): idx = torch.nonzero(max_indices.squeeze(1)==i) a, _ = idx.size() #ex = update_indices[0][i] if a != 0: #random_idx = torch.randperm(a)[0] #idx = idx[idx != ex] # query_update[i] = torch.sum(query[idx].squeeze(1), dim=0) query_update[i] = torch.sum(((score[idx,i] / torch.max(score[:,i])) *query[idx].squeeze(1)), dim=0) #random_update[i] = query[random_idx] * (score[random_idx,i] / torch.max(score[:,i])) else: query_update[i] = 0 #random_update[i] = 0 return query_update else: query_update = torch.zeros((m,d)).cuda() for i in range(m): idx = torch.nonzero(max_indices.squeeze(1)==i) a, _ = idx.size() #ex = update_indices[0][i] if a != 0: #idx = idx[idx != ex] query_update[i] = torch.sum(((score[idx,i] / torch.max(score[:,i])) *query[idx].squeeze(1)), dim=0) # query_update[i] = torch.sum(query[idx].squeeze(1), dim=0) else: query_update[i] = 0 return query_update def get_score(self, mem, query): bs, h,w,d = query.size() m, d = mem.size() score = torch.matmul(query, torch.t(mem))# b X h X w X m score = score.view(bs*h*w, m)# (b X h X w) X m score_query = F.softmax(score, dim=0) score_memory = F.softmax(score,dim=1) return score_query, score_memory def forward(self, query, keys, train=True): batch_size, dims,h,w = query.size() # b X d X h X w query = F.normalize(query, dim=1) query = query.permute(0,2,3,1) # b X h X w X d #train if train: #gathering loss gathering_loss = self.gather_loss(query,keys, train) #spreading_loss spreading_loss = self.spread_loss(query, keys, train) # read updated_query, softmax_score_query,softmax_score_memory = self.read(query, keys) #update updated_memory = self.update(query, keys, train) return updated_query, updated_memory, softmax_score_query, softmax_score_memory, gathering_loss, spreading_loss #test else: #gathering loss gathering_loss = self.gather_loss(query,keys, train) # read updated_query, softmax_score_query,softmax_score_memory = self.read(query, keys) #update updated_memory = keys return updated_query, updated_memory, softmax_score_query, softmax_score_memory, gathering_loss def update(self, query, keys,train): batch_size, h,w,dims = query.size() # b X h X w X d softmax_score_query, softmax_score_memory = self.get_score(keys, query) query_reshape = query.contiguous().view(batch_size*h*w, dims) _, gathering_indices = torch.topk(softmax_score_memory, 1, dim=1) _, updating_indices = torch.topk(softmax_score_query, 1, dim=0) if train: # top-1 queries (of each memory) update (weighted sum) & random pick query_update = self.get_update_query(keys, gathering_indices, updating_indices, softmax_score_query, query_reshape,train) updated_memory = F.normalize(query_update + keys, dim=1) else: # only weighted sum update when test query_update = self.get_update_query(keys, gathering_indices, updating_indices, softmax_score_query, query_reshape, train) updated_memory = F.normalize(query_update + keys, dim=1) # top-1 update #query_update = query_reshape[updating_indices][0] #updated_memory = F.normalize(query_update + keys, dim=1) return updated_memory.detach() def pointwise_gather_loss(self, query_reshape, keys, gathering_indices, train): n,dims = query_reshape.size() # (b X h X w) X d loss_mse = torch.nn.MSELoss(reduction='none') pointwise_loss = loss_mse(query_reshape, keys[gathering_indices].squeeze(1).detach()) return pointwise_loss def spread_loss(self,query, keys, train): batch_size, h,w,dims = query.size() # b X h X w X d loss = torch.nn.TripletMarginLoss(margin=1.0) softmax_score_query, softmax_score_memory = self.get_score(keys, query) query_reshape = query.contiguous().view(batch_size*h*w, dims) _, gathering_indices = torch.topk(softmax_score_memory, 2, dim=1) #1st, 2nd closest memories pos = keys[gathering_indices[:,0]] neg = keys[gathering_indices[:,1]] spreading_loss = loss(query_reshape,pos.detach(), neg.detach()) return spreading_loss def gather_loss(self, query, keys, train): batch_size, h,w,dims = query.size() # b X h X w X d loss_mse = torch.nn.MSELoss() softmax_score_query, softmax_score_memory = self.get_score(keys, query) query_reshape = query.contiguous().view(batch_size*h*w, dims) _, gathering_indices = torch.topk(softmax_score_memory, 1, dim=1) gathering_loss = loss_mse(query_reshape, keys[gathering_indices].squeeze(1).detach()) return gathering_loss def read(self, query, updated_memory): batch_size, h,w,dims = query.size() # b X h X w X d softmax_score_query, softmax_score_memory = self.get_score(updated_memory, query) query_reshape = query.contiguous().view(batch_size*h*w, dims) concat_memory = torch.matmul(softmax_score_memory.detach(), updated_memory) # (b X h X w) X d updated_query = torch.cat((query_reshape, concat_memory), dim = 1) # (b X h X w) X 2d updated_query = updated_query.view(batch_size, h, w, 2*dims) updated_query = updated_query.permute(0,3,1,2) return updated_query, softmax_score_query, softmax_score_memory ================================================ FILE: model/Reconstruction.py ================================================ import numpy as np import os import sys import torch import torch.nn as nn import torch.nn.functional as F from .Memory import * class Encoder(torch.nn.Module): def __init__(self, t_length = 2, n_channel =3): super(Encoder, self).__init__() def Basic(intInput, intOutput): return torch.nn.Sequential( torch.nn.Conv2d(in_channels=intInput, out_channels=intOutput, kernel_size=3, stride=1, padding=1), torch.nn.BatchNorm2d(intOutput), torch.nn.ReLU(inplace=False), torch.nn.Conv2d(in_channels=intOutput, out_channels=intOutput, kernel_size=3, stride=1, padding=1), torch.nn.BatchNorm2d(intOutput), torch.nn.ReLU(inplace=False) ) def Basic_(intInput, intOutput): return torch.nn.Sequential( torch.nn.Conv2d(in_channels=intInput, out_channels=intOutput, kernel_size=3, stride=1, padding=1), torch.nn.BatchNorm2d(intOutput), torch.nn.ReLU(inplace=False), torch.nn.Conv2d(in_channels=intOutput, out_channels=intOutput, kernel_size=3, stride=1, padding=1), ) self.moduleConv1 = Basic(n_channel*(t_length-1), 64) self.modulePool1 = torch.nn.MaxPool2d(kernel_size=2, stride=2) self.moduleConv2 = Basic(64, 128) self.modulePool2 = torch.nn.MaxPool2d(kernel_size=2, stride=2) self.moduleConv3 = Basic(128, 256) self.modulePool3 = torch.nn.MaxPool2d(kernel_size=2, stride=2) self.moduleConv4 = Basic_(256, 512) self.moduleBatchNorm = torch.nn.BatchNorm2d(512) self.moduleReLU = torch.nn.ReLU(inplace=False) def forward(self, x): tensorConv1 = self.moduleConv1(x) tensorPool1 = self.modulePool1(tensorConv1) tensorConv2 = self.moduleConv2(tensorPool1) tensorPool2 = self.modulePool2(tensorConv2) tensorConv3 = self.moduleConv3(tensorPool2) tensorPool3 = self.modulePool3(tensorConv3) tensorConv4 = self.moduleConv4(tensorPool3) return tensorConv4 class Decoder(torch.nn.Module): def __init__(self, t_length = 2, n_channel =3): super(Decoder, self).__init__() def Basic(intInput, intOutput): return torch.nn.Sequential( torch.nn.Conv2d(in_channels=intInput, out_channels=intOutput, kernel_size=3, stride=1, padding=1), torch.nn.BatchNorm2d(intOutput), torch.nn.ReLU(inplace=False), torch.nn.Conv2d(in_channels=intOutput, out_channels=intOutput, kernel_size=3, stride=1, padding=1), torch.nn.BatchNorm2d(intOutput), torch.nn.ReLU(inplace=False) ) def Gen(intInput, intOutput, nc): return torch.nn.Sequential( torch.nn.Conv2d(in_channels=intInput, out_channels=nc, kernel_size=3, stride=1, padding=1), torch.nn.BatchNorm2d(nc), torch.nn.ReLU(inplace=False), torch.nn.Conv2d(in_channels=nc, out_channels=nc, kernel_size=3, stride=1, padding=1), torch.nn.BatchNorm2d(nc), torch.nn.ReLU(inplace=False), torch.nn.Conv2d(in_channels=nc, out_channels=intOutput, kernel_size=3, stride=1, padding=1), torch.nn.Tanh() ) def Upsample(nc, intOutput): return torch.nn.Sequential( torch.nn.ConvTranspose2d(in_channels = nc, out_channels=intOutput, kernel_size = 3, stride = 2, padding = 1, output_padding = 1), torch.nn.BatchNorm2d(intOutput), torch.nn.ReLU(inplace=False) ) self.moduleConv = Basic(1024, 512) self.moduleUpsample4 = Upsample(512, 512) self.moduleDeconv3 = Basic(512, 256) self.moduleUpsample3 = Upsample(256, 256) self.moduleDeconv2 = Basic(256, 128) self.moduleUpsample2 = Upsample(128, 128) self.moduleDeconv1 = Gen(128,n_channel,64) def forward(self, x): tensorConv = self.moduleConv(x) tensorUpsample4 = self.moduleUpsample4(tensorConv) tensorDeconv3 = self.moduleDeconv3(tensorUpsample4) tensorUpsample3 = self.moduleUpsample3(tensorDeconv3) tensorDeconv2 = self.moduleDeconv2(tensorUpsample3) tensorUpsample2 = self.moduleUpsample2(tensorDeconv2) output = self.moduleDeconv1(tensorUpsample2) return output class convAE(torch.nn.Module): def __init__(self, n_channel =3, t_length = 2, memory_size = 10, feature_dim = 512, key_dim = 512, temp_update = 0.1, temp_gather=0.1): super(convAE, self).__init__() self.encoder = Encoder(t_length, n_channel) self.decoder = Decoder(t_length, n_channel) self.memory = Memory(memory_size,feature_dim, key_dim, temp_update, temp_gather) def forward(self, x, keys,train=True): fea = self.encoder(x) if train: updated_fea, keys, softmax_score_query, softmax_score_memory, gathering_loss, spreading_loss = self.memory(fea, keys, train) output = self.decoder(updated_fea) return output, fea, updated_fea, keys, softmax_score_query, softmax_score_memory, gathering_loss, spreading_loss #test else: updated_fea, keys, softmax_score_query, softmax_score_memory, gathering_loss = self.memory(fea, keys, train) output = self.decoder(updated_fea) return output, fea, updated_fea, keys, softmax_score_query, softmax_score_memory, gathering_loss ================================================ FILE: model/final_future_prediction_with_memory_spatial_sumonly_weight_ranking_top1.py ================================================ import numpy as np import os import sys import torch import torch.nn as nn import torch.nn.functional as F from .memory_final_spatial_sumonly_weight_ranking_top1 import * class Encoder(torch.nn.Module): def __init__(self, t_length = 5, n_channel =3): super(Encoder, self).__init__() def Basic(intInput, intOutput): return torch.nn.Sequential( torch.nn.Conv2d(in_channels=intInput, out_channels=intOutput, kernel_size=3, stride=1, padding=1), torch.nn.BatchNorm2d(intOutput), torch.nn.ReLU(inplace=False), torch.nn.Conv2d(in_channels=intOutput, out_channels=intOutput, kernel_size=3, stride=1, padding=1), torch.nn.BatchNorm2d(intOutput), torch.nn.ReLU(inplace=False) ) def Basic_(intInput, intOutput): return torch.nn.Sequential( torch.nn.Conv2d(in_channels=intInput, out_channels=intOutput, kernel_size=3, stride=1, padding=1), torch.nn.BatchNorm2d(intOutput), torch.nn.ReLU(inplace=False), torch.nn.Conv2d(in_channels=intOutput, out_channels=intOutput, kernel_size=3, stride=1, padding=1), ) self.moduleConv1 = Basic(n_channel*(t_length-1), 64) self.modulePool1 = torch.nn.MaxPool2d(kernel_size=2, stride=2) self.moduleConv2 = Basic(64, 128) self.modulePool2 = torch.nn.MaxPool2d(kernel_size=2, stride=2) self.moduleConv3 = Basic(128, 256) self.modulePool3 = torch.nn.MaxPool2d(kernel_size=2, stride=2) self.moduleConv4 = Basic_(256, 512) self.moduleBatchNorm = torch.nn.BatchNorm2d(512) self.moduleReLU = torch.nn.ReLU(inplace=False) def forward(self, x): tensorConv1 = self.moduleConv1(x) tensorPool1 = self.modulePool1(tensorConv1) tensorConv2 = self.moduleConv2(tensorPool1) tensorPool2 = self.modulePool2(tensorConv2) tensorConv3 = self.moduleConv3(tensorPool2) tensorPool3 = self.modulePool3(tensorConv3) tensorConv4 = self.moduleConv4(tensorPool3) return tensorConv4, tensorConv1, tensorConv2, tensorConv3 class Decoder(torch.nn.Module): def __init__(self, t_length = 5, n_channel =3): super(Decoder, self).__init__() def Basic(intInput, intOutput): return torch.nn.Sequential( torch.nn.Conv2d(in_channels=intInput, out_channels=intOutput, kernel_size=3, stride=1, padding=1), torch.nn.BatchNorm2d(intOutput), torch.nn.ReLU(inplace=False), torch.nn.Conv2d(in_channels=intOutput, out_channels=intOutput, kernel_size=3, stride=1, padding=1), torch.nn.BatchNorm2d(intOutput), torch.nn.ReLU(inplace=False) ) def Gen(intInput, intOutput, nc): return torch.nn.Sequential( torch.nn.Conv2d(in_channels=intInput, out_channels=nc, kernel_size=3, stride=1, padding=1), torch.nn.BatchNorm2d(nc), torch.nn.ReLU(inplace=False), torch.nn.Conv2d(in_channels=nc, out_channels=nc, kernel_size=3, stride=1, padding=1), torch.nn.BatchNorm2d(nc), torch.nn.ReLU(inplace=False), torch.nn.Conv2d(in_channels=nc, out_channels=intOutput, kernel_size=3, stride=1, padding=1), torch.nn.Tanh() ) def Upsample(nc, intOutput): return torch.nn.Sequential( torch.nn.ConvTranspose2d(in_channels = nc, out_channels=intOutput, kernel_size = 3, stride = 2, padding = 1, output_padding = 1), torch.nn.BatchNorm2d(intOutput), torch.nn.ReLU(inplace=False) ) self.moduleConv = Basic(1024, 512) self.moduleUpsample4 = Upsample(512, 256) self.moduleDeconv3 = Basic(512, 256) self.moduleUpsample3 = Upsample(256, 128) self.moduleDeconv2 = Basic(256, 128) self.moduleUpsample2 = Upsample(128, 64) self.moduleDeconv1 = Gen(128,n_channel,64) def forward(self, x, skip1, skip2, skip3): tensorConv = self.moduleConv(x) tensorUpsample4 = self.moduleUpsample4(tensorConv) cat4 = torch.cat((skip3, tensorUpsample4), dim = 1) tensorDeconv3 = self.moduleDeconv3(cat4) tensorUpsample3 = self.moduleUpsample3(tensorDeconv3) cat3 = torch.cat((skip2, tensorUpsample3), dim = 1) tensorDeconv2 = self.moduleDeconv2(cat3) tensorUpsample2 = self.moduleUpsample2(tensorDeconv2) cat2 = torch.cat((skip1, tensorUpsample2), dim = 1) output = self.moduleDeconv1(cat2) return output class convAE(torch.nn.Module): def __init__(self, n_channel =3, t_length = 5, memory_size = 10, feature_dim = 512, key_dim = 512, temp_update = 0.1, temp_gather=0.1): super(convAE, self).__init__() self.encoder = Encoder(t_length, n_channel) self.decoder = Decoder(t_length, n_channel) self.memory = Memory(memory_size,feature_dim, key_dim, temp_update, temp_gather) def forward(self, x, keys,train=True): fea, skip1, skip2, skip3 = self.encoder(x) if train: updated_fea, keys, softmax_score_query, softmax_score_memory, separateness_loss, compactness_loss = self.memory(fea, keys, train) output = self.decoder(updated_fea, skip1, skip2, skip3) return output, fea, updated_fea, keys, softmax_score_query, softmax_score_memory, separateness_loss, compactness_loss #test else: updated_fea, keys, softmax_score_query, softmax_score_memory,query, top1_keys, keys_ind, compactness_loss = self.memory(fea, keys, train) output = self.decoder(updated_fea, skip1, skip2, skip3) return output, fea, updated_fea, keys, softmax_score_query, softmax_score_memory, query, top1_keys, keys_ind, compactness_loss ================================================ FILE: model/memory_final_spatial_sumonly_weight_ranking_top1.py ================================================ import torch import torch.autograd as ag import torch.nn as nn import torch.nn.functional as F import numpy as np import math import functools import random from torch.nn import functional as F def random_uniform(shape, low, high, cuda): x = torch.rand(*shape) result_cpu = (high - low) * x + low if cuda: return result_cpu.cuda() else: return result_cpu def distance(a, b): return torch.sqrt(((a - b) ** 2).sum()).unsqueeze(0) def distance_batch(a, b): bs, _ = a.shape result = distance(a[0], b) for i in range(bs-1): result = torch.cat((result, distance(a[i], b)), 0) return result def multiply(x): #to flatten matrix into a vector return functools.reduce(lambda x,y: x*y, x, 1) def flatten(x): """ Flatten matrix into a vector """ count = multiply(x.size()) return x.resize_(count) def index(batch_size, x): idx = torch.arange(0, batch_size).long() idx = torch.unsqueeze(idx, -1) return torch.cat((idx, x), dim=1) def MemoryLoss(memory): m, d = memory.size() memory_t = torch.t(memory) similarity = (torch.matmul(memory, memory_t))/2 + 1/2 # 30X30 identity_mask = torch.eye(m).cuda() sim = torch.abs(similarity - identity_mask) return torch.sum(sim)/(m*(m-1)) class Memory(nn.Module): def __init__(self, memory_size, feature_dim, key_dim, temp_update, temp_gather): super(Memory, self).__init__() # Constants self.memory_size = memory_size self.feature_dim = feature_dim self.key_dim = key_dim self.temp_update = temp_update self.temp_gather = temp_gather def hard_neg_mem(self, mem, i): similarity = torch.matmul(mem,torch.t(self.keys_var)) similarity[:,i] = -1 _, max_idx = torch.topk(similarity, 1, dim=1) return self.keys_var[max_idx] def random_pick_memory(self, mem, max_indices): m, d = mem.size() output = [] for i in range(m): flattened_indices = (max_indices==i).nonzero() a, _ = flattened_indices.size() if a != 0: number = np.random.choice(a, 1) output.append(flattened_indices[number, 0]) else: output.append(-1) return torch.tensor(output) def get_update_query(self, mem, max_indices, update_indices, score, query, train): m, d = mem.size() if train: query_update = torch.zeros((m,d)).cuda() # random_update = torch.zeros((m,d)).cuda() for i in range(m): idx = torch.nonzero(max_indices.squeeze(1)==i) a, _ = idx.size() if a != 0: query_update[i] = torch.sum(((score[idx,i] / torch.max(score[:,i])) *query[idx].squeeze(1)), dim=0) else: query_update[i] = 0 return query_update else: query_update = torch.zeros((m,d)).cuda() for i in range(m): idx = torch.nonzero(max_indices.squeeze(1)==i) a, _ = idx.size() if a != 0: query_update[i] = torch.sum(((score[idx,i] / torch.max(score[:,i])) *query[idx].squeeze(1)), dim=0) else: query_update[i] = 0 return query_update def get_score(self, mem, query): bs, h,w,d = query.size() m, d = mem.size() score = torch.matmul(query, torch.t(mem))# b X h X w X m score = score.view(bs*h*w, m)# (b X h X w) X m score_query = F.softmax(score, dim=0) score_memory = F.softmax(score,dim=1) return score_query, score_memory def forward(self, query, keys, train=True): batch_size, dims,h,w = query.size() # b X d X h X w query = F.normalize(query, dim=1) query = query.permute(0,2,3,1) # b X h X w X d #train if train: #losses separateness_loss, compactness_loss = self.gather_loss(query,keys, train) # read updated_query, softmax_score_query,softmax_score_memory = self.read(query, keys) #update updated_memory = self.update(query, keys, train) return updated_query, updated_memory, softmax_score_query, softmax_score_memory, separateness_loss, compactness_loss #test else: # loss compactness_loss, query_re, top1_keys, keys_ind = self.gather_loss(query,keys, train) # read updated_query, softmax_score_query,softmax_score_memory = self.read(query, keys) #update updated_memory = keys return updated_query, updated_memory, softmax_score_query, softmax_score_memory, query_re, top1_keys,keys_ind, compactness_loss def update(self, query, keys,train): batch_size, h,w,dims = query.size() # b X h X w X d softmax_score_query, softmax_score_memory = self.get_score(keys, query) query_reshape = query.contiguous().view(batch_size*h*w, dims) _, gathering_indices = torch.topk(softmax_score_memory, 1, dim=1) _, updating_indices = torch.topk(softmax_score_query, 1, dim=0) if train: query_update = self.get_update_query(keys, gathering_indices, updating_indices, softmax_score_query, query_reshape,train) updated_memory = F.normalize(query_update + keys, dim=1) else: query_update = self.get_update_query(keys, gathering_indices, updating_indices, softmax_score_query, query_reshape, train) updated_memory = F.normalize(query_update + keys, dim=1) return updated_memory.detach() def pointwise_gather_loss(self, query_reshape, keys, gathering_indices, train): n,dims = query_reshape.size() # (b X h X w) X d loss_mse = torch.nn.MSELoss(reduction='none') pointwise_loss = loss_mse(query_reshape, keys[gathering_indices].squeeze(1).detach()) return pointwise_loss def gather_loss(self,query, keys, train): batch_size, h,w,dims = query.size() # b X h X w X d if train: loss = torch.nn.TripletMarginLoss(margin=1.0) loss_mse = torch.nn.MSELoss() softmax_score_query, softmax_score_memory = self.get_score(keys, query) query_reshape = query.contiguous().view(batch_size*h*w, dims) _, gathering_indices = torch.topk(softmax_score_memory, 2, dim=1) #1st, 2nd closest memories pos = keys[gathering_indices[:,0]] neg = keys[gathering_indices[:,1]] top1_loss = loss_mse(query_reshape, pos.detach()) gathering_loss = loss(query_reshape,pos.detach(), neg.detach()) return gathering_loss, top1_loss else: loss_mse = torch.nn.MSELoss() softmax_score_query, softmax_score_memory = self.get_score(keys, query) query_reshape = query.contiguous().view(batch_size*h*w, dims) _, gathering_indices = torch.topk(softmax_score_memory, 1, dim=1) gathering_loss = loss_mse(query_reshape, keys[gathering_indices].squeeze(1).detach()) return gathering_loss, query_reshape, keys[gathering_indices].squeeze(1).detach(), gathering_indices[:,0] def read(self, query, updated_memory): batch_size, h,w,dims = query.size() # b X h X w X d softmax_score_query, softmax_score_memory = self.get_score(updated_memory, query) query_reshape = query.contiguous().view(batch_size*h*w, dims) concat_memory = torch.matmul(softmax_score_memory.detach(), updated_memory) # (b X h X w) X d updated_query = torch.cat((query_reshape, concat_memory), dim = 1) # (b X h X w) X 2d updated_query = updated_query.view(batch_size, h, w, 2*dims) updated_query = updated_query.permute(0,3,1,2) return updated_query, softmax_score_query, softmax_score_memory ================================================ FILE: model/utils.py ================================================ import numpy as np from collections import OrderedDict import os import glob import cv2 import torch.utils.data as data rng = np.random.RandomState(2020) def np_load_frame(filename, resize_height, resize_width): """ Load image path and convert it to numpy.ndarray. Notes that the color channels are BGR and the color space is normalized from [0, 255] to [-1, 1]. :param filename: the full path of image :param resize_height: resized height :param resize_width: resized width :return: numpy.ndarray """ image_decoded = cv2.imread(filename) image_resized = cv2.resize(image_decoded, (resize_width, resize_height)) image_resized = image_resized.astype(dtype=np.float32) image_resized = (image_resized / 127.5) - 1.0 return image_resized class DataLoader(data.Dataset): def __init__(self, video_folder, transform, resize_height, resize_width, time_step=4, num_pred=1): self.dir = video_folder self.transform = transform self.videos = OrderedDict() self._resize_height = resize_height self._resize_width = resize_width self._time_step = time_step self._num_pred = num_pred self.setup() self.samples = self.get_all_samples() def setup(self): videos = glob.glob(os.path.join(self.dir, '*')) for video in sorted(videos): video_name = video.split('/')[-1] self.videos[video_name] = {} self.videos[video_name]['path'] = video self.videos[video_name]['frame'] = glob.glob(os.path.join(video, '*.jpg')) self.videos[video_name]['frame'].sort() self.videos[video_name]['length'] = len(self.videos[video_name]['frame']) def get_all_samples(self): frames = [] videos = glob.glob(os.path.join(self.dir, '*')) for video in sorted(videos): video_name = video.split('/')[-1] for i in range(len(self.videos[video_name]['frame'])-self._time_step): frames.append(self.videos[video_name]['frame'][i]) return frames def __getitem__(self, index): video_name = self.samples[index].split('/')[-2] frame_name = int(self.samples[index].split('/')[-1].split('.')[-2]) batch = [] for i in range(self._time_step+self._num_pred): image = np_load_frame(self.videos[video_name]['frame'][frame_name+i], self._resize_height, self._resize_width) if self.transform is not None: batch.append(self.transform(image)) return np.concatenate(batch, axis=0) def __len__(self): return len(self.samples) ================================================ FILE: utils.py ================================================ import numpy as np import os import sys import torch import torch.nn as nn import torch.nn.functional as F import torchvision import torchvision.utils as v_utils import matplotlib.pyplot as plt import cv2 import math from collections import OrderedDict import copy import time from sklearn.metrics import roc_auc_score def rmse(predictions, targets): return np.sqrt(((predictions - targets) ** 2).mean()) def psnr(mse): return 10 * math.log10(1 / mse) def get_lr(optimizer): for param_group in optimizer.param_groups: return param_group['lr'] def normalize_img(img): img_re = copy.copy(img) img_re = (img_re - np.min(img_re)) / (np.max(img_re) - np.min(img_re)) return img_re def point_score(outputs, imgs): loss_func_mse = nn.MSELoss(reduction='none') error = loss_func_mse((outputs[0]+1)/2,(imgs[0]+1)/2) normal = (1-torch.exp(-error)) score = (torch.sum(normal*loss_func_mse((outputs[0]+1)/2,(imgs[0]+1)/2)) / torch.sum(normal)).item() return score def anomaly_score(psnr, max_psnr, min_psnr): return ((psnr - min_psnr) / (max_psnr-min_psnr)) def anomaly_score_inv(psnr, max_psnr, min_psnr): return (1.0 - ((psnr - min_psnr) / (max_psnr-min_psnr))) def anomaly_score_list(psnr_list): anomaly_score_list = list() for i in range(len(psnr_list)): anomaly_score_list.append(anomaly_score(psnr_list[i], np.max(psnr_list), np.min(psnr_list))) return anomaly_score_list def anomaly_score_list_inv(psnr_list): anomaly_score_list = list() for i in range(len(psnr_list)): anomaly_score_list.append(anomaly_score_inv(psnr_list[i], np.max(psnr_list), np.min(psnr_list))) return anomaly_score_list def AUC(anomal_scores, labels): frame_auc = roc_auc_score(y_true=np.squeeze(labels, axis=0), y_score=np.squeeze(anomal_scores)) return frame_auc def score_sum(list1, list2, alpha): list_result = [] for i in range(len(list1)): list_result.append((alpha*list1[i]+(1-alpha)*list2[i])) return list_result