Repository: yuxiangsun/RTFNet Branch: master Commit: 2457cc68b142 Files: 13 Total size: 44.9 KB Directory structure: gitextract_23w8bcfk/ ├── .devcontainer/ │ ├── devcontainer.json │ └── docker-compose.yml ├── Dockerfile ├── LICENSE ├── README.md ├── model/ │ ├── RTFNet.py │ └── __init__.py ├── run_demo.py ├── train.py └── util/ ├── MF_dataset.py ├── __init__.py ├── augmentation.py └── util.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .devcontainer/devcontainer.json ================================================ // For format details, see https://aka.ms/vscode-remote/devcontainer.json or this file's README at: // https://github.com/microsoft/vscode-dev-containers/tree/v0.122.1/containers/docker-from-docker-compose // If you want to run as a non-root user in the container, see .devcontainer/docker-compose.yml. { "name": "RTFNet", // You can freely choose a name. "dockerComposeFile": "docker-compose.yml", "service": "RTFNet", // The name of the docker-compose service. "workspaceFolder": "/workspace", // Set *default* container specific settings.json values on container create. // "settings": { // "terminal.integrated.shell.linux": "/bin/bash" // }, // Add the IDs of extensions you want installed when the container is created. // "extensions": [ // "ms-azuretools.vscode-docker" // ] // Uncomment the next line if you want start specific services in your Docker Compose config. // "runServices": [], // Uncomment the next line if you want to keep your containers running after VS Code shuts down. // "shutdownAction": "none", // Use 'postCreateCommand' to run commands after the container is created. // "postCreateCommand": "docker --version", // Uncomment to connect as a non-root user. See https://aka.ms/vscode-remote/containers/non-root. // "remoteUser": "vscode" } ================================================ FILE: .devcontainer/docker-compose.yml ================================================ #------------------------------------------------------------------------------------------------------------- # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. See https://go.microsoft.com/fwlink/?linkid=2090316 for license information. #------------------------------------------------------------------------------------------------------------- version: '2.3' services: RTFNet: # Uncomment the next line to use a non-root user for all processes. You can also # simply use the "remoteUser" property in devcontainer.json if you just want VS Code # and its sub-processes (terminals, tasks, debugging) to execute as the user. On Linux, # you may need to update USER_UID and USER_GID in .devcontainer/Dockerfile to match your # user if not 1000. See https://aka.ms/vscode-remote/containers/non-root for details. # user: vscode runtime: nvidia image: docker_image_rtfnet # The name of the docker image ports: - '1234:6006' volumes: # Update this to wherever you want VS Code to mount the folder of your project - ..:/workspace:cached # Do not change! # - /home/sun/somefolder/:/somefolder # folder_in_local_computer:folder_in_docker_container # Forwards the local Docker socket to the container. - /var/run/docker.sock:/var/run/docker-host.sock shm_size: 32g devices: - /dev/nvidia0 - /dev/nvidia1 # Please add or delete according to the number of your GPU cards # Uncomment the next four lines if you will use a ptrace-based debuggers like C++, Go, and Rust. # cap_add: # - SYS_PTRACE # security_opt: # - seccomp:unconfined # Overrides default command so things don't shut down after the process ends. #entrypoint: /usr/local/share/docker-init.sh command: sleep infinity ================================================ FILE: Dockerfile ================================================ FROM nvidia/cuda:12.5.0-devel-ubuntu22.04 ENV DEBIAN_FRONTEND=noninteractive ENV DEBCONF_NOWARNINGS=yes RUN apt-get update && apt-get install -y vim python3 python3-pip RUN pip3 install -U scipy scikit-learn RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121 RUN pip3 install tensorboard torchsummary==1.5.1 numpy==1.23.0 ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2019 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ # RTFNet-pytorch This is the official pytorch implementation of [RTFNet: RGB-Thermal Fusion Network for Semantic Segmentation of Urban Scenes](https://github.com/yuxiangsun/RTFNet/blob/master/doc/RAL2019_RTFNet.pdf) (IEEE RA-L). Some of the codes are borrowed from [MFNet](https://github.com/haqishen/MFNet-pytorch). Note that our implementations of the evaluation metrics (Acc and IoU) are different from those in MFNet. In addition, we consider the unlabelled class when computing the metrics. The current version supports Python>=3.10.12, CUDA>=12.5.0 and PyTorch>=2.3.1, but it should work fine with lower versions of CUDA and PyTorch. Please modify the `Dockerfile` as you want. If you do not use docker, please manually install the dependencies listed in the `Dockerfile`. ## Introduction RTFNet is a data-fusion network for semantic segmentation using RGB and thermal images. It consists of two encoders and one decoder. ## Dataset The original dataset can be downloaded from the MFNet project [page](https://www.mi.t.u-tokyo.ac.jp/static/projects/mil_multispectral/), but you are encouraged to download our preprocessed dataset from [here](http://gofile.me/4jm56/CfukComo1). ## Pretrained weights The weights used in the paper: RTFNet 50: http://gofile.me/4jm56/9VygmBgPR RTFNet 152: http://gofile.me/4jm56/ODE2fxJKG ## Usage * Assume you have [docker](https://docs.docker.com/install/linux/docker-ce/ubuntu/) and [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html) installed. First, you need to build a docker image. Then, download the dataset: ``` $ cd ~ $ git clone https://github.com/yuxiangsun/RTFNet.git $ cd ~/RTFNet $ docker build -t docker_image_rtfnet . $ mkdir ~/RTFNet/dataset $ cd ~/RTFNet/dataset $ (download our preprocessed dataset.zip in this folder) $ unzip -d .. dataset.zip ``` * To reproduce our results (for different RTFNet variants, please mannully change `num_resnet_layers` in `RTFNet.py` and `weight_name` in `run_demo.py`): ``` $ cd ~/RTFNet $ mkdir -p ~/RTFNet/weights_backup/RTFNet_50 $ cd ~/RTFNet/weights_backup/RTFNet_50 $ (download the RTFNet_50 weight in this folder) $ mkdir -p ~/RTFNet/weights_backup/RTFNet_152 $ cd ~/RTFNet/weights_backup/RTFNet_152 $ (download the RTFNet_152 weight in this folder) $ docker run -it --shm-size 8G -p 1234:6006 --name docker_container_rtfnet --gpus all -v ~/RTFNet:/workspace docker_image_rtfnet $ (currently, you should be in the docker) $ cd /workspace $ python3 run_demo.py ``` The results will be saved in the `./runs` folder. * To train RTFNet (for different RTFNet variants, please mannully change `num_resnet_layers` in `RTFNet.py`): ``` $ docker run -it --shm-size 8G -p 1234:6006 --name docker_container_rtfnet --gpus all -v ~/RTFNet:/workspace docker_image_rtfnet $ (currently, you should be in the docker) $ cd /workspace $ python3 train.py $ (fire up another terminal) $ docker exec -it docker_container_rtfnet bash $ cd /workspace $ tensorboard --bind_all --logdir=./runs/tensorboard_log/ $ (fire up your favorite browser with http://localhost:1234, you will see the tensorboard) ``` The results will be saved in the `./runs` folder. Note: Please change the smoothing factor in the Tensorboard webpage to `0.999`, otherwise, you may not find the patterns from the noisy plots. If you have the error `docker: Error response from daemon: could not select device driver`, please first install [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html) on your computer! ## Citation If you use RTFNet in an academic work, please cite: ``` @ARTICLE{sun2019rtfnet, author={Yuxiang Sun and Weixun Zuo and Ming Liu}, journal={{IEEE Robotics and Automation Letters}}, title={{RTFNet: RGB-Thermal Fusion Network for Semantic Segmentation of Urban Scenes}}, year={2019}, volume={4}, number={3}, pages={2576-2583}, doi={10.1109/LRA.2019.2904733}, ISSN={2377-3766}, month={July},} ``` ## Demos ## About VSCode and Docker We suggest use VSCode and Docker for deep learning research. Note that this repo already contains the `.devcontainer` folder, which is needed by VSCode. For more details, please refer to this [tutorial](https://github.com/yuxiangsun/VSCode_Docker_Tutorial). ## Contact sun.yuxiang@outlook.com ================================================ FILE: model/RTFNet.py ================================================ # coding:utf-8 # By Yuxiang Sun, Aug. 2, 2019 # Email: sun.yuxiang@outlook.com import torch import torch.nn as nn import torchvision.models as models class RTFNet(nn.Module): def __init__(self, n_class): super(RTFNet, self).__init__() self.num_resnet_layers = 152 if self.num_resnet_layers == 18: resnet_raw_model1 = models.resnet18(pretrained=True) resnet_raw_model2 = models.resnet18(pretrained=True) self.inplanes = 512 elif self.num_resnet_layers == 34: resnet_raw_model1 = models.resnet34(pretrained=True) resnet_raw_model2 = models.resnet34(pretrained=True) self.inplanes = 512 elif self.num_resnet_layers == 50: resnet_raw_model1 = models.resnet50(pretrained=True) resnet_raw_model2 = models.resnet50(pretrained=True) self.inplanes = 2048 elif self.num_resnet_layers == 101: resnet_raw_model1 = models.resnet101(pretrained=True) resnet_raw_model2 = models.resnet101(pretrained=True) self.inplanes = 2048 elif self.num_resnet_layers == 152: resnet_raw_model1 = models.resnet152(pretrained=True) resnet_raw_model2 = models.resnet152(pretrained=True) self.inplanes = 2048 ######## Thermal ENCODER ######## self.encoder_thermal_conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False) self.encoder_thermal_conv1.weight.data = torch.unsqueeze(torch.mean(resnet_raw_model1.conv1.weight.data, dim=1), dim=1) self.encoder_thermal_bn1 = resnet_raw_model1.bn1 self.encoder_thermal_relu = resnet_raw_model1.relu self.encoder_thermal_maxpool = resnet_raw_model1.maxpool self.encoder_thermal_layer1 = resnet_raw_model1.layer1 self.encoder_thermal_layer2 = resnet_raw_model1.layer2 self.encoder_thermal_layer3 = resnet_raw_model1.layer3 self.encoder_thermal_layer4 = resnet_raw_model1.layer4 ######## RGB ENCODER ######## self.encoder_rgb_conv1 = resnet_raw_model2.conv1 self.encoder_rgb_bn1 = resnet_raw_model2.bn1 self.encoder_rgb_relu = resnet_raw_model2.relu self.encoder_rgb_maxpool = resnet_raw_model2.maxpool self.encoder_rgb_layer1 = resnet_raw_model2.layer1 self.encoder_rgb_layer2 = resnet_raw_model2.layer2 self.encoder_rgb_layer3 = resnet_raw_model2.layer3 self.encoder_rgb_layer4 = resnet_raw_model2.layer4 ######## DECODER ######## self.deconv1 = self._make_transpose_layer(TransBottleneck, self.inplanes//2, 2, stride=2) # using // for python 3.6 self.deconv2 = self._make_transpose_layer(TransBottleneck, self.inplanes//2, 2, stride=2) # using // for python 3.6 self.deconv3 = self._make_transpose_layer(TransBottleneck, self.inplanes//2, 2, stride=2) # using // for python 3.6 self.deconv4 = self._make_transpose_layer(TransBottleneck, self.inplanes//2, 2, stride=2) # using // for python 3.6 self.deconv5 = self._make_transpose_layer(TransBottleneck, n_class, 2, stride=2) def _make_transpose_layer(self, block, planes, blocks, stride=1): upsample = None if stride != 1: upsample = nn.Sequential( nn.ConvTranspose2d(self.inplanes, planes, kernel_size=2, stride=stride, padding=0, bias=False), nn.BatchNorm2d(planes), ) elif self.inplanes != planes: upsample = nn.Sequential( nn.Conv2d(self.inplanes, planes, kernel_size=1, stride=stride, padding=0, bias=False), nn.BatchNorm2d(planes), ) for m in upsample.modules(): if isinstance(m, nn.ConvTranspose2d): nn.init.xavier_uniform_(m.weight.data) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() layers = [] for i in range(1, blocks): layers.append(block(self.inplanes, self.inplanes)) layers.append(block(self.inplanes, planes, stride, upsample)) self.inplanes = planes return nn.Sequential(*layers) def forward(self, input): rgb = input[:,:3] thermal = input[:,3:] verbose = False # encoder ###################################################################### if verbose: print("rgb.size() original: ", rgb.size()) # (480, 640) if verbose: print("thermal.size() original: ", thermal.size()) # (480, 640) ###################################################################### rgb = self.encoder_rgb_conv1(rgb) if verbose: print("rgb.size() after conv1: ", rgb.size()) # (240, 320) rgb = self.encoder_rgb_bn1(rgb) if verbose: print("rgb.size() after bn1: ", rgb.size()) # (240, 320) rgb = self.encoder_rgb_relu(rgb) if verbose: print("rgb.size() after relu: ", rgb.size()) # (240, 320) thermal = self.encoder_thermal_conv1(thermal) if verbose: print("thermal.size() after conv1: ", thermal.size()) # (240, 320) thermal = self.encoder_thermal_bn1(thermal) if verbose: print("thermal.size() after bn1: ", thermal.size()) # (240, 320) thermal = self.encoder_thermal_relu(thermal) if verbose: print("thermal.size() after relu: ", thermal.size()) # (240, 320) rgb = rgb + thermal rgb = self.encoder_rgb_maxpool(rgb) if verbose: print("rgb.size() after maxpool: ", rgb.size()) # (120, 160) thermal = self.encoder_thermal_maxpool(thermal) if verbose: print("thermal.size() after maxpool: ", thermal.size()) # (120, 160) ###################################################################### rgb = self.encoder_rgb_layer1(rgb) if verbose: print("rgb.size() after layer1: ", rgb.size()) # (120, 160) thermal = self.encoder_thermal_layer1(thermal) if verbose: print("thermal.size() after layer1: ", thermal.size()) # (120, 160) rgb = rgb + thermal ###################################################################### rgb = self.encoder_rgb_layer2(rgb) if verbose: print("rgb.size() after layer2: ", rgb.size()) # (60, 80) thermal = self.encoder_thermal_layer2(thermal) if verbose: print("thermal.size() after layer2: ", thermal.size()) # (60, 80) rgb = rgb + thermal ###################################################################### rgb = self.encoder_rgb_layer3(rgb) if verbose: print("rgb.size() after layer3: ", rgb.size()) # (30, 40) thermal = self.encoder_thermal_layer3(thermal) if verbose: print("thermal.size() after layer3: ", thermal.size()) # (30, 40) rgb = rgb + thermal ###################################################################### rgb = self.encoder_rgb_layer4(rgb) if verbose: print("rgb.size() after layer4: ", rgb.size()) # (15, 20) thermal = self.encoder_thermal_layer4(thermal) if verbose: print("thermal.size() after layer4: ", thermal.size()) # (15, 20) fuse = rgb + thermal ###################################################################### # decoder fuse = self.deconv1(fuse) if verbose: print("fuse after deconv1: ", fuse.size()) # (30, 40) fuse = self.deconv2(fuse) if verbose: print("fuse after deconv2: ", fuse.size()) # (60, 80) fuse = self.deconv3(fuse) if verbose: print("fuse after deconv3: ", fuse.size()) # (120, 160) fuse = self.deconv4(fuse) if verbose: print("fuse after deconv4: ", fuse.size()) # (240, 320) fuse = self.deconv5(fuse) if verbose: print("fuse after deconv5: ", fuse.size()) # (480, 640) return fuse class TransBottleneck(nn.Module): def __init__(self, inplanes, planes, stride=1, upsample=None): super(TransBottleneck, self).__init__() self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) self.bn1 = nn.BatchNorm2d(planes) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(planes) if upsample is not None and stride != 1: self.conv3 = nn.ConvTranspose2d(planes, planes, kernel_size=2, stride=stride, padding=0, bias=False) else: self.conv3 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn3 = nn.BatchNorm2d(planes) self.relu = nn.ReLU(inplace=True) self.upsample = upsample self.stride = stride for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.xavier_uniform_(m.weight.data) elif isinstance(m, nn.ConvTranspose2d): nn.init.xavier_uniform_(m.weight.data) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() def forward(self, x): residual = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out = self.relu(out) out = self.conv3(out) out = self.bn3(out) if self.upsample is not None: residual = self.upsample(x) out += residual out = self.relu(out) return out def unit_test(): num_minibatch = 2 rgb = torch.randn(num_minibatch, 3, 480, 640).cuda(0) thermal = torch.randn(num_minibatch, 1, 480, 640).cuda(0) rtf_net = RTFNet(9).cuda(0) input = torch.cat((rgb, thermal), dim=1) rtf_net(input) #print('The model: ', rtf_net.modules) if __name__ == '__main__': unit_test() ================================================ FILE: model/__init__.py ================================================ from .RTFNet import RTFNet ================================================ FILE: run_demo.py ================================================ # By Yuxiang Sun, Dec. 14, 2020 # Email: sun.yuxiang@outlook.com import os, argparse, time, datetime, sys, shutil, stat, torch import numpy as np from torch.autograd import Variable from torch.utils.data import DataLoader from util.MF_dataset import MF_dataset from util.util import compute_results, visualize from sklearn.metrics import confusion_matrix from scipy.io import savemat from model import RTFNet ############################################################################################# parser = argparse.ArgumentParser(description='Test with pytorch') ############################################################################################# parser.add_argument('--model_name', '-m', type=str, default='RTFNet') parser.add_argument('--weight_name', '-w', type=str, default='RTFNet_152') # RTFNet_152, RTFNet_50, please change the number of layers in the network file parser.add_argument('--file_name', '-f', type=str, default='final.pth') parser.add_argument('--dataset_split', '-d', type=str, default='test') # test, test_day, test_night parser.add_argument('--gpu', '-g', type=int, default=0) ############################################################################################# parser.add_argument('--img_height', '-ih', type=int, default=480) parser.add_argument('--img_width', '-iw', type=int, default=640) parser.add_argument('--num_workers', '-j', type=int, default=16) parser.add_argument('--n_class', '-nc', type=int, default=9) parser.add_argument('--data_dir', '-dr', type=str, default='./dataset/') parser.add_argument('--model_dir', '-wd', type=str, default='./weights_backup/') args = parser.parse_args() ############################################################################################# if __name__ == '__main__': torch.cuda.set_device(args.gpu) print("\nthe pytorch version:", torch.__version__) print("the gpu count:", torch.cuda.device_count()) print("the current used gpu:", torch.cuda.current_device(), '\n') # prepare save direcotry if os.path.exists("./runs"): print("previous \"./runs\" folder exist, will delete this folder") shutil.rmtree("./runs") os.makedirs("./runs") os.chmod("./runs", stat.S_IRWXO) # allow the folder created by docker read, written, and execuated by local machine model_dir = os.path.join(args.model_dir, args.weight_name) if os.path.exists(model_dir) is False: sys.exit("the %s does not exit." %(model_dir)) model_file = os.path.join(model_dir, args.file_name) if os.path.exists(model_file) is True: print('use the final model file.') else: sys.exit('no model file found.') print('testing %s: %s on GPU #%d with pytorch' % (args.model_name, args.weight_name, args.gpu)) conf_total = np.zeros((args.n_class, args.n_class)) model = eval(args.model_name)(n_class=args.n_class) if args.gpu >= 0: model.cuda(args.gpu) print('loading model file %s... ' % model_file) pretrained_weight = torch.load(model_file, map_location = lambda storage, loc: storage.cuda(args.gpu)) own_state = model.state_dict() for name, param in pretrained_weight.items(): if name not in own_state: continue own_state[name].copy_(param) print('done!') batch_size = 1 test_dataset = MF_dataset(data_dir=args.data_dir, split=args.dataset_split, input_h=args.img_height, input_w=args.img_width) test_loader = DataLoader( dataset = test_dataset, batch_size = batch_size, shuffle = False, num_workers = args.num_workers, pin_memory = True, drop_last = False ) ave_time_cost = 0.0 model.eval() with torch.no_grad(): for it, (images, labels, names) in enumerate(test_loader): images = Variable(images).cuda(args.gpu) labels = Variable(labels).cuda(args.gpu) start_time = time.time() logits = model(images) # logits.size(): mini_batch*num_class*480*640 end_time = time.time() if it>=5: # # ignore the first 5 frames ave_time_cost += (end_time-start_time) # convert tensor to numpy 1d array label = labels.cpu().numpy().squeeze().flatten() prediction = logits.argmax(1).cpu().numpy().squeeze().flatten() # prediction and label are both 1-d array, size: minibatch*640*480 # generate confusion matrix frame-by-frame conf = confusion_matrix(y_true=label, y_pred=prediction, labels=[0,1,2,3,4,5,6,7,8]) # conf is an n_class*n_class matrix, vertical axis: groundtruth, horizontal axis: prediction conf_total += conf # save demo images visualize(image_name=names, predictions=logits.argmax(1), weight_name=args.weight_name) print("%s, %s, frame %d/%d, %s, time cost: %.2f ms, demo result saved." %(args.model_name, args.weight_name, it+1, len(test_loader), names, (end_time-start_time)*1000)) precision_per_class, recall_per_class, iou_per_class = compute_results(conf_total) conf_total_matfile = os.path.join("./runs", 'conf_'+args.weight_name+'.mat') savemat(conf_total_matfile, {'conf': conf_total}) # 'conf' is the variable name when loaded in Matlab print('\n###########################################################################') print('\n%s: %s test results (with batch size %d) on %s using %s:' %(args.model_name, args.weight_name, batch_size, datetime.date.today(), torch.cuda.get_device_name(args.gpu))) print('\n* the tested dataset name: %s' % args.dataset_split) print('* the tested image count: %d' % len(test_loader)) print('* the tested image size: %d*%d' %(args.img_height, args.img_width)) print('* the weight name: %s' %args.weight_name) print('* the file name: %s' %args.file_name) print("* recall per class: \n unlabeled: %.6f, car: %.6f, person: %.6f, bike: %.6f, curve: %.6f, car_stop: %.6f, guardrail: %.6f, color_cone: %.6f, bump: %.6f" \ %(recall_per_class[0], recall_per_class[1], recall_per_class[2], recall_per_class[3], recall_per_class[4], recall_per_class[5], recall_per_class[6], recall_per_class[7], recall_per_class[8])) print("* iou per class: \n unlabeled: %.6f, car: %.6f, person: %.6f, bike: %.6f, curve: %.6f, car_stop: %.6f, guardrail: %.6f, color_cone: %.6f, bump: %.6f" \ %(iou_per_class[0], iou_per_class[1], iou_per_class[2], iou_per_class[3], iou_per_class[4], iou_per_class[5], iou_per_class[6], iou_per_class[7], iou_per_class[8])) print("\n* average values (np.mean(x)): \n recall: %.6f, iou: %.6f" \ %(recall_per_class.mean(), iou_per_class.mean())) print("* average values (np.mean(np.nan_to_num(x))): \n recall: %.6f, iou: %.6f" \ %(np.mean(np.nan_to_num(recall_per_class)), np.mean(np.nan_to_num(iou_per_class)))) print('\n* the average time cost per frame (with batch size %d): %.2f ms, namely, the inference speed is %.2f fps' %(batch_size, ave_time_cost*1000/(len(test_loader)-5), 1.0/(ave_time_cost/(len(test_loader)-5)))) # ignore the first 10 frames #print('\n* the total confusion matrix: ') #np.set_printoptions(precision=8, threshold=np.inf, linewidth=np.inf, suppress=True) #print(conf_total) print('\n###########################################################################') ================================================ FILE: train.py ================================================ # By Yuxiang Sun, Dec. 4, 2019 # Email: sun.yuxiang@outlook.com import os, argparse, time, datetime, stat, shutil import numpy as np import torch import torch.nn.functional as F from torch.autograd import Variable from torch.utils.data import DataLoader import torchvision.utils as vutils from util.MF_dataset import MF_dataset from util.augmentation import RandomFlip, RandomCrop, RandomCropOut, RandomBrightness, RandomNoise from util.util import compute_results from sklearn.metrics import confusion_matrix from torch.utils.tensorboard import SummaryWriter from model import RTFNet ############################################################################################# parser = argparse.ArgumentParser(description='Train with pytorch') ############################################################################################# parser.add_argument('--model_name', '-m', type=str, default='RTFNet') #batch_size: RTFNet-152: 2; RTFNet-101: 2; RTFNet-50: 3; RTFNet-34: 10; RTFNet-18: 15; parser.add_argument('--batch_size', '-b', type=int, default=2) parser.add_argument('--lr_start', '-ls', type=float, default=0.01) parser.add_argument('--gpu', '-g', type=int, default=0) ############################################################################################# parser.add_argument('--lr_decay', '-ld', type=float, default=0.95) parser.add_argument('--epoch_max', '-em', type=int, default=10000) # please stop training mannully parser.add_argument('--epoch_from', '-ef', type=int, default=0) parser.add_argument('--num_workers', '-j', type=int, default=8) parser.add_argument('--n_class', '-nc', type=int, default=9) parser.add_argument('--data_dir', '-dr', type=str, default='./dataset/') args = parser.parse_args() ############################################################################################# augmentation_methods = [ RandomFlip(prob=0.5), RandomCrop(crop_rate=0.1, prob=1.0), # RandomCropOut(crop_rate=0.2, prob=1.0), # RandomBrightness(bright_range=0.15, prob=0.9), # RandomNoise(noise_range=5, prob=0.9), ] def train(epo, model, train_loader, optimizer): model.train() for it, (images, labels, names) in enumerate(train_loader): images = Variable(images).cuda(args.gpu) labels = Variable(labels).cuda(args.gpu) start_t = time.time() # time.time() returns the current time optimizer.zero_grad() logits = model(images) loss = F.cross_entropy(logits, labels) # Note that the cross_entropy function has already include the softmax function loss.backward() optimizer.step() lr_this_epo=0 for param_group in optimizer.param_groups: lr_this_epo = param_group['lr'] print('Train: %s, epo %s/%s, iter %s/%s, lr %.8f, %.2f img/sec, loss %.4f, time %s' \ % (args.model_name, epo, args.epoch_max, it+1, len(train_loader), lr_this_epo, len(names)/(time.time()-start_t), float(loss), datetime.datetime.now().replace(microsecond=0)-start_datetime)) if accIter['train'] % 1 == 0: writer.add_scalar('Train/loss', loss, accIter['train']) view_figure = True # note that I have not colorized the GT and predictions here if accIter['train'] % 500 == 0: if view_figure: input_rgb_images = vutils.make_grid(images[:,:3], nrow=8, padding=10) # can only display 3-channel images, so images[:,:3] writer.add_image('Train/input_rgb_images', input_rgb_images, accIter['train']) scale = max(1, 255//args.n_class) # label (0,1,2..) is invisable, multiply a constant for visualization groundtruth_tensor = labels.unsqueeze(1) * scale # mini_batch*480*640 -> mini_batch*1*480*640 groundtruth_tensor = torch.cat((groundtruth_tensor, groundtruth_tensor, groundtruth_tensor), 1) # change to 3-channel for visualization groudtruth_images = vutils.make_grid(groundtruth_tensor, nrow=8, padding=10) writer.add_image('Train/groudtruth_images', groudtruth_images, accIter['train']) predicted_tensor = logits.argmax(1).unsqueeze(1) * scale # mini_batch*args.n_class*480*640 -> mini_batch*480*640 -> mini_batch*1*480*640 predicted_tensor = torch.cat((predicted_tensor, predicted_tensor, predicted_tensor),1) # change to 3-channel for visualization, mini_batch*1*480*640 predicted_images = vutils.make_grid(predicted_tensor, nrow=8, padding=10) writer.add_image('Train/predicted_images', predicted_images, accIter['train']) accIter['train'] = accIter['train'] + 1 def validation(epo, model, val_loader): model.eval() with torch.no_grad(): for it, (images, labels, names) in enumerate(val_loader): images = Variable(images).cuda(args.gpu) labels = Variable(labels).cuda(args.gpu) start_t = time.time() # time.time() returns the current time logits = model(images) loss = F.cross_entropy(logits, labels) # Note that the cross_entropy function has already include the softmax function print('Val: %s, epo %s/%s, iter %s/%s, %.2f img/sec, loss %.4f, time %s' \ % (args.model_name, epo, args.epoch_max, it + 1, len(val_loader), len(names)/(time.time()-start_t), float(loss), datetime.datetime.now().replace(microsecond=0)-start_datetime)) if accIter['val'] % 1 == 0: writer.add_scalar('Validation/loss', loss, accIter['val']) view_figure = False # note that I have not colorized the GT and predictions here if accIter['val'] % 100 == 0: if view_figure: input_rgb_images = vutils.make_grid(images[:, :3], nrow=8, padding=10) # can only display 3-channel images, so images[:,:3] writer.add_image('Validation/input_rgb_images', input_rgb_images, accIter['val']) scale = max(1, 255 // args.n_class) # label (0,1,2..) is invisable, multiply a constant for visualization groundtruth_tensor = labels.unsqueeze(1) * scale # mini_batch*480*640 -> mini_batch*1*480*640 groundtruth_tensor = torch.cat((groundtruth_tensor, groundtruth_tensor, groundtruth_tensor), 1) # change to 3-channel for visualization groudtruth_images = vutils.make_grid(groundtruth_tensor, nrow=8, padding=10) writer.add_image('Validation/groudtruth_images', groudtruth_images, accIter['val']) predicted_tensor = logits.argmax(1).unsqueeze(1)*scale # mini_batch*args.n_class*480*640 -> mini_batch*480*640 -> mini_batch*1*480*640 predicted_tensor = torch.cat((predicted_tensor, predicted_tensor, predicted_tensor), 1) # change to 3-channel for visualization, mini_batch*1*480*640 predicted_images = vutils.make_grid(predicted_tensor, nrow=8, padding=10) writer.add_image('Validation/predicted_images', predicted_images, accIter['val']) accIter['val'] += 1 def testing(epo, model, test_loader): model.eval() conf_total = np.zeros((args.n_class, args.n_class)) label_list = ["unlabeled", "car", "person", "bike", "curve", "car_stop", "guardrail", "color_cone", "bump"] testing_results_file = os.path.join(weight_dir, 'testing_results_file.txt') with torch.no_grad(): for it, (images, labels, names) in enumerate(test_loader): images = Variable(images).cuda(args.gpu) labels = Variable(labels).cuda(args.gpu) logits = model(images) label = labels.cpu().numpy().squeeze().flatten() prediction = logits.argmax(1).cpu().numpy().squeeze().flatten() # prediction and label are both 1-d array, size: minibatch*640*480 conf = confusion_matrix(y_true=label, y_pred=prediction, labels=[0,1,2,3,4,5,6,7,8]) # conf is args.n_class*args.n_class matrix, vertical axis: groundtruth, horizontal axis: prediction conf_total += conf print('Test: %s, epo %s/%s, iter %s/%s, time %s' % (args.model_name, epo, args.epoch_max, it+1, len(test_loader), datetime.datetime.now().replace(microsecond=0)-start_datetime)) precision, recall, IoU = compute_results(conf_total) writer.add_scalar('Test/average_precision',precision.mean(), epo) writer.add_scalar('Test/average_recall', recall.mean(), epo) writer.add_scalar('Test/average_IoU', IoU.mean(), epo) for i in range(len(precision)): writer.add_scalar("Test(class)/precision_class_%s" % label_list[i], precision[i], epo) writer.add_scalar("Test(class)/recall_class_%s"% label_list[i], recall[i],epo) writer.add_scalar('Test(class)/Iou_%s'% label_list[i], IoU[i], epo) if epo==0: with open(testing_results_file, 'w') as f: f.write("# %s, initial lr: %s, batch size: %s, date: %s \n" %(args.model_name, args.lr_start, args.batch_size, datetime.date.today())) f.write("# epoch: unlabeled, car, person, bike, curve, car_stop, guardrail, color_cone, bump, average(nan_to_num). (Acc %, IoU %)\n") with open(testing_results_file, 'a') as f: f.write(str(epo)+': ') for i in range(len(precision)): f.write('%0.4f, %0.4f, ' % (100*recall[i], 100*IoU[i])) f.write('%0.4f, %0.4f\n' % (100*np.mean(np.nan_to_num(recall)), 100*np.mean(np.nan_to_num(IoU)))) print('saving testing results.') with open(testing_results_file, "r") as file: writer.add_text('testing_results', file.read().replace('\n', ' \n'), epo) if __name__ == '__main__': torch.cuda.set_device(args.gpu) print("\nthe pytorch version:", torch.__version__) print("the gpu count:", torch.cuda.device_count()) print("the current used gpu:", torch.cuda.current_device(), '\n') model = eval(args.model_name)(n_class=args.n_class) if args.gpu >= 0: model.cuda(args.gpu) optimizer = torch.optim.SGD(model.parameters(), lr=args.lr_start, momentum=0.9, weight_decay=0.0005) scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=args.lr_decay, last_epoch=-1) # preparing folders if os.path.exists("./runs"): shutil.rmtree("./runs") weight_dir = os.path.join("./runs", args.model_name) os.makedirs(weight_dir) os.chmod(weight_dir, stat.S_IRWXO) # allow the folder created by docker read, written, and execuated by local machine writer = SummaryWriter("./runs/tensorboard_log") os.chmod("./runs/tensorboard_log", stat.S_IRWXO) # allow the folder created by docker read, written, and execuated by local machine os.chmod("./runs", stat.S_IRWXO) print('training %s on GPU #%d with pytorch' % (args.model_name, args.gpu)) print('from epoch %d / %s' % (args.epoch_from, args.epoch_max)) print('weight will be saved in: %s' % weight_dir) train_dataset = MF_dataset(data_dir=args.data_dir, split='train', transform=augmentation_methods) val_dataset = MF_dataset(data_dir=args.data_dir, split='val') test_dataset = MF_dataset(data_dir=args.data_dir, split='test') train_loader = DataLoader( dataset = train_dataset, batch_size = args.batch_size, shuffle = True, num_workers = args.num_workers, pin_memory = True, drop_last = False ) val_loader = DataLoader( dataset = val_dataset, batch_size = args.batch_size, shuffle = False, num_workers = args.num_workers, pin_memory = True, drop_last = False ) test_loader = DataLoader( dataset = test_dataset, batch_size = args.batch_size, shuffle = False, num_workers = args.num_workers, pin_memory = True, drop_last = False ) start_datetime = datetime.datetime.now().replace(microsecond=0) accIter = {'train': 0, 'val': 0} for epo in range(args.epoch_from, args.epoch_max): print('\ntrain %s, epo #%s begin...' % (args.model_name, epo)) #scheduler.step() # if using pytorch 0.4.1, please put this statement here train(epo, model, train_loader, optimizer) validation(epo, model, val_loader) checkpoint_model_file = os.path.join(weight_dir, str(epo) + '.pth') print('saving check point %s: ' % checkpoint_model_file) torch.save(model.state_dict(), checkpoint_model_file) testing(epo, model, test_loader) # testing is just for your reference, you can comment this line during training scheduler.step() # if using pytorch 1.1 or above, please put this statement here ================================================ FILE: util/MF_dataset.py ================================================ # By Yuxiang Sun, Jul. 3, 2021 # Email: sun.yuxiang@outlook.com import os, torch from torch.utils.data.dataset import Dataset import numpy as np import PIL class MF_dataset(Dataset): def __init__(self, data_dir, split, input_h=480, input_w=640 ,transform=[]): super(MF_dataset, self).__init__() assert split in ['train', 'val', 'test', 'test_day', 'test_night', 'val_test', 'most_wanted'], \ 'split must be "train"|"val"|"test"|"test_day"|"test_night"|"val_test"|"most_wanted"' # test_day, test_night with open(os.path.join(data_dir, split+'.txt'), 'r') as f: self.names = [name.strip() for name in f.readlines()] self.data_dir = data_dir self.split = split self.input_h = input_h self.input_w = input_w self.transform = transform self.n_data = len(self.names) def read_image(self, name, folder): file_path = os.path.join(self.data_dir, '%s/%s.png' % (folder, name)) image = np.asarray(PIL.Image.open(file_path)) return image def __getitem__(self, index): name = self.names[index] image = self.read_image(name, 'images') label = self.read_image(name, 'labels') for func in self.transform: image, label = func(image, label) image = np.asarray(PIL.Image.fromarray(image).resize((self.input_w, self.input_h))) image = image.astype('float32') image = np.transpose(image, (2,0,1))/255.0 label = np.asarray(PIL.Image.fromarray(label).resize((self.input_w, self.input_h), resample=PIL.Image.NEAREST)) label = label.astype('int64') return torch.tensor(image), torch.tensor(label), name def __len__(self): return self.n_data ================================================ FILE: util/__init__.py ================================================ ================================================ FILE: util/augmentation.py ================================================ import numpy as np from PIL import Image #from ipdb import set_trace as st class RandomFlip(): def __init__(self, prob=0.5): #super(RandomFlip, self).__init__() self.prob = prob def __call__(self, image, label): if np.random.rand() < self.prob: image = image[:,::-1] label = label[:,::-1] return image, label class RandomCrop(): def __init__(self, crop_rate=0.1, prob=1.0): #super(RandomCrop, self).__init__() self.crop_rate = crop_rate self.prob = prob def __call__(self, image, label): if np.random.rand() < self.prob: w, h, c = image.shape h1 = np.random.randint(0, h*self.crop_rate) w1 = np.random.randint(0, w*self.crop_rate) h2 = np.random.randint(h-h*self.crop_rate, h+1) w2 = np.random.randint(w-w*self.crop_rate, w+1) image = image[w1:w2, h1:h2] label = label[w1:w2, h1:h2] return image, label class RandomCropOut(): def __init__(self, crop_rate=0.2, prob=1.0): #super(RandomCropOut, self).__init__() self.crop_rate = crop_rate self.prob = prob def __call__(self, image, label): if np.random.rand() < self.prob: w, h, c = image.shape h1 = np.random.randint(0, h*self.crop_rate) w1 = np.random.randint(0, w*self.crop_rate) h2 = int(h1 + h*self.crop_rate) w2 = int(w1 + w*self.crop_rate) image[w1:w2, h1:h2] = 0 label[w1:w2, h1:h2] = 0 return image, label class RandomBrightness(): def __init__(self, bright_range=0.15, prob=0.9): #super(RandomBrightness, self).__init__() self.bright_range = bright_range self.prob = prob def __call__(self, image, label): if np.random.rand() < self.prob: bright_factor = np.random.uniform(1-self.bright_range, 1+self.bright_range) image = (image * bright_factor).astype(image.dtype) return image, label class RandomNoise(): def __init__(self, noise_range=5, prob=0.9): #super(RandomNoise, self).__init__() self.noise_range = noise_range self.prob = prob def __call__(self, image, label): if np.random.rand() < self.prob: w, h, c = image.shape noise = np.random.randint( -self.noise_range, self.noise_range, (w,h,c) ) image = (image + noise).clip(0,255).astype(image.dtype) return image, label ================================================ FILE: util/util.py ================================================ # By Yuxiang Sun, Dec. 4, 2020 # Email: sun.yuxiang@outlook.com import numpy as np from PIL import Image # 0:unlabeled, 1:car, 2:person, 3:bike, 4:curve, 5:car_stop, 6:guardrail, 7:color_cone, 8:bump def get_palette(): unlabelled = [0,0,0] car = [64,0,128] person = [64,64,0] bike = [0,128,192] curve = [0,0,192] car_stop = [128,128,0] guardrail = [64,64,128] color_cone = [192,128,128] bump = [192,64,0] palette = np.array([unlabelled,car, person, bike, curve, car_stop, guardrail, color_cone, bump]) return palette def visualize(image_name, predictions, weight_name): palette = get_palette() for (i, pred) in enumerate(predictions): pred = predictions[i].cpu().numpy() img = np.zeros((pred.shape[0], pred.shape[1], 3), dtype=np.uint8) for cid in range(0, len(palette)): # fix the mistake from the MFNet code on Dec.27, 2019 img[pred == cid] = palette[cid] img = Image.fromarray(np.uint8(img)) img.save('runs/Pred_' + weight_name + '_' + image_name[i] + '.png') def compute_results(conf_total): n_class = conf_total.shape[0] consider_unlabeled = True # must consider the unlabeled, please set it to True if consider_unlabeled is True: start_index = 0 else: start_index = 1 precision_per_class = np.zeros(n_class) recall_per_class = np.zeros(n_class) iou_per_class = np.zeros(n_class) for cid in range(start_index, n_class): # cid: class id if conf_total[start_index:, cid].sum() == 0: precision_per_class[cid] = np.nan else: precision_per_class[cid] = float(conf_total[cid, cid]) / float(conf_total[start_index:, cid].sum()) # precision = TP/TP+FP if conf_total[cid, start_index:].sum() == 0: recall_per_class[cid] = np.nan else: recall_per_class[cid] = float(conf_total[cid, cid]) / float(conf_total[cid, start_index:].sum()) # recall = TP/TP+FN if (conf_total[cid, start_index:].sum() + conf_total[start_index:, cid].sum() - conf_total[cid, cid]) == 0: iou_per_class[cid] = np.nan else: iou_per_class[cid] = float(conf_total[cid, cid]) / float((conf_total[cid, start_index:].sum() + conf_total[start_index:, cid].sum() - conf_total[cid, cid])) # IoU = TP/TP+FP+FN return precision_per_class, recall_per_class, iou_per_class