Repository: uzh-rpg/rpg_event_representation_learning Branch: master Commit: 0c265e6e0858 Files: 12 Total size: 20.7 KB Directory structure: gitextract_wcztzcl5/ ├── .gitignore ├── LICENSE ├── README.md ├── log/ │ └── README.md ├── main.py ├── requirements.txt ├── testing.py └── utils/ ├── dataset.py ├── loader.py ├── loss.py ├── models.py └── quantization_layer_init/ └── trilinear_init.pth ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ .idea __pycache__ ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2019 Robotics and Perception Group Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ # Event Representation Learning [![Event Representation Learning](resources/youtube_preview.png)](https://youtu.be/bQtSx59GXRY) This repository contains learning code that implements event representation learning as described in Gehrig et al. ICCV'19. The paper can be found [here](http://rpg.ifi.uzh.ch/docs/ICCV19_Gehrig.pdf) If you use this code in an academic context, please cite the following work: [Daniel Gehrig](https://danielgehrig18.github.io/), [Antonio Loquercio](https://antonilo.github.io/), Konstantinos G. Derpanis, Davide Scaramuzza, "End-to-End Learning of Representations for Asynchronous Event-Based Data", The International Conference on Computer Vision (ICCV), 2019 ```bibtex @InProceedings{Gehrig_2019_ICCV, author = {Daniel Gehrig and Antonio Loquercio and Konstantinos G. Derpanis and Davide Scaramuzza}, title = {End-to-End Learning of Representations for Asynchronous Event-Based Data}, booktitle = {Int. Conf. Comput. Vis. (ICCV)}, month = {October}, year = {2019} } ``` ## Requirements * Python 3.7 * virtualenv * cuda 10 ## Dependencies Create a virtual environment with `python3.7` and activate it virtualenv venv -p /usr/local/bin/python3.7 source venv/bin/activate Install all dependencies by calling pip install -r requirements.txt ## Training Before training, download the `N-Caltech101` dataset and unzip it wget http://rpg.ifi.uzh.ch/datasets/gehrig_et_al_iccv19/N-Caltech101.zip unzip N-Caltech101.zip Then start training by calling python main.py --validation_dataset N-Caltech101/validation/ --training_dataset N-Caltech101/training/ --log_dir log/temp --device cuda:0 Here, `validation_dataset` and `training_dataset` should point to the folders where the training and validation set are stored. `log_dir` controls logging and `device` controls on which device you want to train. Checkpoints and models with lowest validation loss will be saved in the root folder of `log_dir`. The N-Cars dataset can be downloaded [here](http://rpg.ifi.uzh.ch/datasets/gehrig_et_al_iccv19/N-Cars.zip). ### Additional parameters * `--num_worker` how many threads to use to load data * `--pin_memory` wether to pin memory or not * `--num_epochs` number of epochs to train * `--save_every_n_epochs` save a checkpoint every n epochs. * `--batch_size` batch size for training ### Visualization Training can be visualized by calling tensorboard tensorboard --logdir log/temp Training and validation losses as well as classification accuracies are plotted. In addition, the learnt representations are visualized. The training and validation curves should look something like this: ![alt_text](resources/tb.png) ## Testing Once trained, the models can be tested by calling the following script: python testing.py --test N-Caltech101/testing/ --device cuda:0 Which will print the test score after iteration through the whole dataset. ================================================ FILE: log/README.md ================================================ logs will be written here. ================================================ FILE: main.py ================================================ import argparse from os.path import dirname import torch import torchvision import os import numpy as np import tqdm from utils.models import Classifier from torch.utils.tensorboard import SummaryWriter from utils.loader import Loader from utils.loss import cross_entropy_loss_and_accuracy from utils.dataset import NCaltech101 torch.manual_seed(1) np.random.seed(1) def FLAGS(): parser = argparse.ArgumentParser("""Train classifier using a learnt quantization layer.""") # training / validation dataset parser.add_argument("--validation_dataset", default="", required=True) parser.add_argument("--training_dataset", default="", required=True) # logging options parser.add_argument("--log_dir", default="", required=True) # loader and device options parser.add_argument("--device", default="cuda:0") parser.add_argument("--num_workers", type=int, default=4) parser.add_argument("--pin_memory", type=bool, default=True) parser.add_argument("--batch_size", type=int, default=4) parser.add_argument("--num_epochs", type=int, default=30) parser.add_argument("--save_every_n_epochs", type=int, default=5) flags = parser.parse_args() assert os.path.isdir(dirname(flags.log_dir)), f"Log directory root {dirname(flags.log_dir)} not found." assert os.path.isdir(flags.validation_dataset), f"Validation dataset directory {flags.validation_dataset} not found." assert os.path.isdir(flags.training_dataset), f"Training dataset directory {flags.training_dataset} not found." print(f"----------------------------\n" f"Starting training with \n" f"num_epochs: {flags.num_epochs}\n" f"batch_size: {flags.batch_size}\n" f"device: {flags.device}\n" f"log_dir: {flags.log_dir}\n" f"training_dataset: {flags.training_dataset}\n" f"validation_dataset: {flags.validation_dataset}\n" f"----------------------------") return flags def percentile(t, q): B, C, H, W = t.shape k = 1 + round(.01 * float(q) * (C * H * W - 1)) result = t.view(B, -1).kthvalue(k).values return result[:,None,None,None] def create_image(representation): B, C, H, W = representation.shape representation = representation.view(B, 3, C // 3, H, W).sum(2) # do robust min max norm representation = representation.detach().cpu() robust_max_vals = percentile(representation, 99) robust_min_vals = percentile(representation, 1) representation = (representation - robust_min_vals)/(robust_max_vals - robust_min_vals) representation = torch.clamp(255*representation, 0, 255).byte() representation = torchvision.utils.make_grid(representation) return representation if __name__ == '__main__': flags = FLAGS() # datasets, add augmentation to training set training_dataset = NCaltech101(flags.training_dataset, augmentation=True) validation_dataset = NCaltech101(flags.validation_dataset) # construct loader, handles data streaming to gpu training_loader = Loader(training_dataset, flags, device=flags.device) validation_loader = Loader(validation_dataset, flags, device=flags.device) # model, and put to device model = Classifier() model = model.to(flags.device) # optimizer and lr scheduler optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, 0.5) writer = SummaryWriter(flags.log_dir) iteration = 0 min_validation_loss = 1000 for i in range(flags.num_epochs): sum_accuracy = 0 sum_loss = 0 model = model.eval() print(f"Validation step [{i:3d}/{flags.num_epochs:3d}]") for events, labels in tqdm.tqdm(validation_loader): with torch.no_grad(): pred_labels, representation = model(events) loss, accuracy = cross_entropy_loss_and_accuracy(pred_labels, labels) sum_accuracy += accuracy sum_loss += loss validation_loss = sum_loss.item() / len(validation_loader) validation_accuracy = sum_accuracy.item() / len(validation_loader) writer.add_scalar("validation/accuracy", validation_accuracy, iteration) writer.add_scalar("validation/loss", validation_loss, iteration) # visualize representation representation_vizualization = create_image(representation) writer.add_image("validation/representation", representation_vizualization, iteration) print(f"Validation Loss {validation_loss:.4f} Accuracy {validation_accuracy:.4f}") if validation_loss < min_validation_loss: min_validation_loss = validation_loss state_dict = model.state_dict() torch.save({ "state_dict": state_dict, "min_val_loss": min_validation_loss, "iteration": iteration }, "log/model_best.pth") print("New best at ", validation_loss) if i % flags.save_every_n_epochs == 0: state_dict = model.state_dict() torch.save({ "state_dict": state_dict, "min_val_loss": min_validation_loss, "iteration": iteration }, "log/checkpoint_%05d_%.4f.pth" % (iteration, min_validation_loss)) sum_accuracy = 0 sum_loss = 0 model = model.train() print(f"Training step [{i:3d}/{flags.num_epochs:3d}]") for events, labels in tqdm.tqdm(training_loader): optimizer.zero_grad() pred_labels, representation = model(events) loss, accuracy = cross_entropy_loss_and_accuracy(pred_labels, labels) loss.backward() optimizer.step() sum_accuracy += accuracy sum_loss += loss iteration += 1 if i % 10 == 9: lr_scheduler.step() training_loss = sum_loss.item() / len(training_loader) training_accuracy = sum_accuracy.item() / len(training_loader) print(f"Training Iteration {iteration:5d} Loss {training_loss:.4f} Accuracy {training_accuracy:.4f}") writer.add_scalar("training/accuracy", training_accuracy, iteration) writer.add_scalar("training/loss", training_loss, iteration) representation_vizualization = create_image(representation) writer.add_image("training/representation", representation_vizualization, iteration) ================================================ FILE: requirements.txt ================================================ https://download.pytorch.org/whl/cu100/torch-1.1.0-cp37-cp37m-linux_x86_64.whl https://download.pytorch.org/whl/cu100/torchvision-0.3.0-cp37-cp37m-linux_x86_64.whl tqdm tb-nightly future ================================================ FILE: testing.py ================================================ from os.path import dirname import argparse import torch import tqdm import os from loader import Loader from loss import cross_entropy_loss_and_accuracy from models import Classifier from dataset import NCaltech101 def FLAGS(): parser = argparse.ArgumentParser( """Deep Learning for Events. Supply a config file.""") # can be set in config parser.add_argument("--checkpoint", default="", required=True) parser.add_argument("--test_dataset", default="", required=True) parser.add_argument("--device", default="cuda:0") parser.add_argument("--num_workers", type=int, default=4) parser.add_argument("--batch_size", type=int, default=4) parser.add_argument("--pin_memory", type=bool, default=True) flags = parser.parse_args() assert os.path.isdir(dirname(flags.checkpoint)), f"Checkpoint{flags.checkpoint} not found." assert os.path.isdir(flags.test_dataset), f"Test dataset directory {flags.test_dataset} not found." print(f"----------------------------\n" f"Starting testing with \n" f"checkpoint: {flags.checkpoint}\n" f"test_dataset: {flags.test_dataset}\n" f"batch_size: {flags.batch_size}\n" f"device: {flags.device}\n" f"----------------------------") return flags if __name__ == '__main__': flags = FLAGS() test_dataset = NCaltech101(flags.test_dataset) # construct loader, responsible for streaming data to gpu test_loader = Loader(test_dataset, flags, flags.device) # model, load and put to device model = Classifier() ckpt = torch.load(flags.checkpoint) model.load_state_dict(ckpt["state_dict"]) model = model.to(flags.device) model = model.eval() sum_accuracy = 0 sum_loss = 0 print("Test step") for events, labels in tqdm.tqdm(test_loader): with torch.no_grad(): pred_labels, _ = model(events) loss, accuracy = cross_entropy_loss_and_accuracy(pred_labels, labels) sum_accuracy += accuracy sum_loss += loss test_loss = sum_loss.item() / len(test_loader) test_accuracy = sum_accuracy.item() / len(test_loader) print(f"Test Loss: {test_loss}, Test Accuracy: {test_accuracy}") ================================================ FILE: utils/dataset.py ================================================ import numpy as np from os import listdir from os.path import join def random_shift_events(events, max_shift=20, resolution=(180, 240)): H, W = resolution x_shift, y_shift = np.random.randint(-max_shift, max_shift+1, size=(2,)) events[:,0] += x_shift events[:,1] += y_shift valid_events = (events[:,0] >= 0) & (events[:,0] < W) & (events[:,1] >= 0) & (events[:,1] < H) events = events[valid_events] return events def random_flip_events_along_x(events, resolution=(180, 240), p=0.5): H, W = resolution if np.random.random() < p: events[:,0] = W - 1 - events[:,0] return events class NCaltech101: def __init__(self, root, augmentation=False): self.classes = listdir(root) self.files = [] self.labels = [] self.augmentation = augmentation for i, c in enumerate(self.classes): new_files = [join(root, c, f) for f in listdir(join(root, c))] self.files += new_files self.labels += [i] * len(new_files) def __len__(self): return len(self.files) def __getitem__(self, idx): """ returns events and label, loading events from aedat :param idx: :return: x,y,t,p, label """ label = self.labels[idx] f = self.files[idx] events = np.load(f).astype(np.float32) if self.augmentation: events = random_shift_events(events) events = random_flip_events_along_x(events) return events, label ================================================ FILE: utils/loader.py ================================================ import torch import numpy as np from torch.utils.data.dataloader import default_collate class Loader: def __init__(self, dataset, flags, device): self.device = device split_indices = list(range(len(dataset))) sampler = torch.utils.data.sampler.SubsetRandomSampler(split_indices) self.loader = torch.utils.data.DataLoader(dataset, batch_size=flags.batch_size, sampler=sampler, num_workers=flags.num_workers, pin_memory=flags.pin_memory, collate_fn=collate_events) def __iter__(self): for data in self.loader: data = [d.to(self.device) for d in data] yield data def __len__(self): return len(self.loader) def collate_events(data): labels = [] events = [] for i, d in enumerate(data): labels.append(d[1]) ev = np.concatenate([d[0], i*np.ones((len(d[0]),1), dtype=np.float32)],1) events.append(ev) events = torch.from_numpy(np.concatenate(events,0)) labels = default_collate(labels) return events, labels ================================================ FILE: utils/loss.py ================================================ import torch def cross_entropy_loss_and_accuracy(prediction, target): cross_entropy_loss = torch.nn.CrossEntropyLoss() loss = cross_entropy_loss(prediction, target) accuracy = (prediction.argmax(1) == target).float().mean() return loss, accuracy ================================================ FILE: utils/models.py ================================================ import torch.nn as nn from os.path import join, dirname, isfile import torch import torch.nn.functional as F import numpy as np from torchvision.models.resnet import resnet34 import tqdm class ValueLayer(nn.Module): def __init__(self, mlp_layers, activation=nn.ReLU(), num_channels=9): assert mlp_layers[-1] == 1, "Last layer of the mlp must have 1 input channel." assert mlp_layers[0] == 1, "First layer of the mlp must have 1 output channel" nn.Module.__init__(self) self.mlp = nn.ModuleList() self.activation = activation # create mlp in_channels = 1 for out_channels in mlp_layers[1:]: self.mlp.append(nn.Linear(in_channels, out_channels)) in_channels = out_channels # init with trilinear kernel path = join(dirname(__file__), "quantization_layer_init", "trilinear_init.pth") if isfile(path): state_dict = torch.load(path) self.load_state_dict(state_dict) else: self.init_kernel(num_channels) def forward(self, x): # create sample of batchsize 1 and input channels 1 x = x[None,...,None] # apply mlp convolution for i in range(len(self.mlp[:-1])): x = self.activation(self.mlp[i](x)) x = self.mlp[-1](x) x = x.squeeze() return x def init_kernel(self, num_channels): ts = torch.zeros((1, 2000)) optim = torch.optim.Adam(self.parameters(), lr=1e-2) torch.manual_seed(1) for _ in tqdm.tqdm(range(1000)): # converges in a reasonable time optim.zero_grad() ts.uniform_(-1, 1) # gt gt_values = self.trilinear_kernel(ts, num_channels) # pred values = self.forward(ts) # optimize loss = (values - gt_values).pow(2).sum() loss.backward() optim.step() def trilinear_kernel(self, ts, num_channels): gt_values = torch.zeros_like(ts) gt_values[ts > 0] = (1 - (num_channels-1) * ts)[ts > 0] gt_values[ts < 0] = ((num_channels-1) * ts + 1)[ts < 0] gt_values[ts < -1.0 / (num_channels-1)] = 0 gt_values[ts > 1.0 / (num_channels-1)] = 0 return gt_values class QuantizationLayer(nn.Module): def __init__(self, dim, mlp_layers=[1, 100, 100, 1], activation=nn.LeakyReLU(negative_slope=0.1)): nn.Module.__init__(self) self.value_layer = ValueLayer(mlp_layers, activation=activation, num_channels=dim[0]) self.dim = dim def forward(self, events): # points is a list, since events can have any size B = int((1+events[-1,-1]).item()) num_voxels = int(2 * np.prod(self.dim) * B) vox = events[0].new_full([num_voxels,], fill_value=0) C, H, W = self.dim # get values for each channel x, y, t, p, b = events.t() # normalizing timestamps for bi in range(B): t[events[:,-1] == bi] /= t[events[:,-1] == bi].max() p = (p+1)/2 # maps polarity to 0, 1 idx_before_bins = x \ + W * y \ + 0 \ + W * H * C * p \ + W * H * C * 2 * b for i_bin in range(C): values = t * self.value_layer.forward(t-i_bin/(C-1)) # draw in voxel grid idx = idx_before_bins + W * H * i_bin vox.put_(idx.long(), values, accumulate=True) vox = vox.view(-1, 2, C, H, W) vox = torch.cat([vox[:, 0, ...], vox[:, 1, ...]], 1) return vox class Classifier(nn.Module): def __init__(self, voxel_dimension=(9,180,240), # dimension of voxel will be C x 2 x H x W crop_dimension=(224, 224), # dimension of crop before it goes into classifier num_classes=101, mlp_layers=[1, 30, 30, 1], activation=nn.LeakyReLU(negative_slope=0.1), pretrained=True): nn.Module.__init__(self) self.quantization_layer = QuantizationLayer(voxel_dimension, mlp_layers, activation) self.classifier = resnet34(pretrained=pretrained) self.crop_dimension = crop_dimension # replace fc layer and first convolutional layer input_channels = 2*voxel_dimension[0] self.classifier.conv1 = nn.Conv2d(input_channels, 64, kernel_size=7, stride=2, padding=3, bias=False) self.classifier.fc = nn.Linear(self.classifier.fc.in_features, num_classes) def crop_and_resize_to_resolution(self, x, output_resolution=(224, 224)): B, C, H, W = x.shape if H > W: h = H // 2 x = x[:, :, h - W // 2:h + W // 2, :] else: h = W // 2 x = x[:, :, :, h - H // 2:h + H // 2] x = F.interpolate(x, size=output_resolution) return x def forward(self, x): vox = self.quantization_layer.forward(x) vox_cropped = self.crop_and_resize_to_resolution(vox, self.crop_dimension) pred = self.classifier.forward(vox_cropped) return pred, vox