Repository: kakaxi314/GuideNet Branch: master Commit: 9f53b4086d70 Files: 17 Total size: 40.6 KB Directory structure: gitextract_zwcztk8n/ ├── LICENSE ├── README.md ├── augs.py ├── checkpoints/ │ └── .gitignore ├── configs/ │ ├── GN.yaml │ └── GNS.yaml ├── criteria.py ├── datas/ │ └── .gitignore ├── datasets.py ├── exts/ │ ├── guideconv.cpp │ ├── guideconv_kernel.cu │ └── setup.py ├── models.py ├── optimizers.py ├── test.py ├── train.py └── utils.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2019 Jie Tang Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ # [Learning Guided Convolutional Network for Depth Completion](https://arxiv.org/pdf/1908.01238). ## Introduction This is the pytorch implementation of our paper. ## Dependency ``` PyTorch 1.4 PyTorch-Encoding v1.4.0 ``` ## Setup Compile the C++ and CUDA code: ``` cd exts python setup.py install ``` ## Dataset Please download KITTI [depth completion](http://www.cvlibs.net/datasets/kitti/eval_depth.php?benchmark=depth_completion) dataset. The structure of data directory: ``` └── datas └── kitti ├── data_depth_annotated │   ├── train │   └── val ├── data_depth_velodyne │   ├── train │   └── val ├── raw │   ├── 2011_09_26 │   ├── 2011_09_28 │   ├── 2011_09_29 │   ├── 2011_09_30 │   └── 2011_10_03 ├── test_depth_completion_anonymous │   ├── image │   ├── intrinsics │   └── velodyne_raw └── val_selection_cropped ├── groundtruth_depth ├── image ├── intrinsics └── velodyne_raw ``` ## Configs The config of different settings: - GN.yaml - GNS.yaml *Compared to **GN**, **GNS** uses fewer parameters to generate the guided kernels, but achieves slightly better results.* ## Trained Models You can directly download the trained model and put it in *checkpoints*: - [GN](https://drive.google.com/file/d/1-sa2pnMMjSv2dV2bRwuyLxPr1onmVykj/view?usp=sharing) - [GNS](https://drive.google.com/file/d/16tVrZQEDBucgjZmTjZl4iFkklkjfeDcs/view?usp=sharing) ## Train You can also train by yourself: ``` python train.py ``` *Pay attention to the settings in the config file (e.g. gpu id).* ## Test With the trained model, you can test and save depth images. ``` python test.py ``` ## Citation If you find this work useful in your research, please consider citing: ``` @article{guidenet, title={Learning guided convolutional network for depth completion}, author={Tang, Jie and Tian, Fei-Peng and Feng, Wei and Li, Jian and Tan, Ping}, journal={IEEE Transactions on Image Processing}, volume={30}, pages={1116--1129}, year={2020}, publisher={IEEE} } ``` ================================================ FILE: augs.py ================================================ #!/usr/bin/env python # -*- coding:utf-8 -*- # @Filename: augs.py # @Project: GuideNet # @Author: jie # @Time: 2021/3/14 8:27 PM import numpy as np __all__ = [ 'Compose', 'Norm', 'Jitter', 'Flip', ] class Compose(object): """ Sequential operations on input images, (i.e. rgb, lidar and depth). """ def __init__(self, transforms): self.transforms = transforms def __call__(self, rgb, lidar, depth): for t in self.transforms: rgb, lidar, depth = t(rgb, lidar, depth) return rgb, lidar, depth class Norm(object): """ normalize rgb image. """ def __init__(self, mean, std): self.mean = np.array(mean) self.std = np.array(std) def __call__(self, rgb, lidar, depth): rgb = (rgb - self.mean) / self.std return rgb, lidar, depth class Jitter(object): """ borrow from https://github.com/kujason/avod/blob/master/avod/datasets/kitti/kitti_aug.py """ def __call__(self, rgb, lidar, depth): pca = compute_pca(rgb) rgb = add_pca_jitter(rgb, pca) return rgb, lidar, depth class Flip(object): """ random horizontal flip of images. """ def __call__(self, rgb, lidar, depth): flip = bool(np.random.randint(2)) if flip: rgb = rgb[:, ::-1, :] lidar = lidar[:, ::-1, :] depth = depth[:, ::-1, :] return rgb, lidar, depth def compute_pca(image): """ calculate PCA of image """ reshaped_data = image.reshape(-1, 3) reshaped_data = (reshaped_data / 255.0).astype(np.float32) covariance = np.cov(reshaped_data.T) e_vals, e_vecs = np.linalg.eigh(covariance) pca = np.sqrt(e_vals) * e_vecs return pca def add_pca_jitter(img_data, pca): """ add a multiple of principle components with Gaussian noise """ new_img_data = np.copy(img_data).astype(np.float32) / 255.0 magnitude = np.random.randn(3) * 0.1 noise = (pca * magnitude).sum(axis=1) new_img_data = new_img_data + noise np.clip(new_img_data, 0.0, 1.0, out=new_img_data) new_img_data = (new_img_data * 255).astype(np.uint8) return new_img_data ================================================ FILE: checkpoints/.gitignore ================================================ # Ignore everything in this directory * # Except this file !.gitignore ================================================ FILE: configs/GN.yaml ================================================ batch_size: 8 data_config: kitti: path: datas/kitti gpu_ids: - 4 - 5 loss: MSE lr_config: MultiStepLR: gamma: 0.5 last_epoch: -1 milestones: - 5 - 10 - 15 manual_seed: 0 metric: RMSE model: GN name: GN nepoch: 20 num_workers: 4 optim_config: AdamW: lr: 0.001 weight_decay: 0.05 resume_seed: 6288 start_epoch: 0 test_aug_configs: - Norm: mean: - 90.995 - 96.2278 - 94.3213 std: - 79.2382 - 80.5267 - 82.1483 test_epoch: 15 test_iters: 500 train_aug_configs: - Jitter - Flip - Norm: mean: - 90.995 - 96.2278 - 94.3213 std: - 79.2382 - 80.5267 - 82.1483 tta: true vis: true vis_iters: 100 ================================================ FILE: configs/GNS.yaml ================================================ batch_size: 8 data_config: kitti: path: datas/kitti gpu_ids: - 6 - 7 loss: MSE lr_config: MultiStepLR: gamma: 0.5 last_epoch: -1 milestones: - 5 - 10 - 15 manual_seed: 0 metric: RMSE model: GNS name: GNS nepoch: 20 num_workers: 4 optim_config: AdamW: lr: 0.001 weight_decay: 0.05 resume_seed: 1600 start_epoch: 0 test_aug_configs: - Norm: mean: - 90.995 - 96.2278 - 94.3213 std: - 79.2382 - 80.5267 - 82.1483 test_epoch: 15 test_iters: 500 train_aug_configs: - Jitter - Flip - Norm: mean: - 90.995 - 96.2278 - 94.3213 std: - 79.2382 - 80.5267 - 82.1483 tta: true vis: true vis_iters: 100 ================================================ FILE: criteria.py ================================================ #!/usr/bin/env python # -*- coding:utf-8 -*- # @Filename: criteria.py # @Project: GuideNet # @Author: jie # @Time: 2021/3/14 7:51 PM import torch import torch.nn as nn __all__ = [ 'RMSE', 'MSE', ] class RMSE(nn.Module): def __init__(self): super().__init__() def forward(self, outputs, target, *args): val_pixels = (target > 1e-3).float().cuda() err = (target * val_pixels - outputs * val_pixels) ** 2 loss = torch.sum(err.view(err.size(0), 1, -1), -1, keepdim=True) cnt = torch.sum(val_pixels.view(val_pixels.size(0), 1, -1), -1, keepdim=True) return torch.sqrt(loss / cnt) class MSE(nn.Module): def __init__(self): super().__init__() def forward(self, outputs, target, *args): val_pixels = (target > 1e-3).float().cuda() loss = target * val_pixels - outputs * val_pixels return loss ** 2 ================================================ FILE: datas/.gitignore ================================================ # Ignore everything in this directory * # Except this file !.gitignore ================================================ FILE: datasets.py ================================================ #!/usr/bin/env python # -*- coding:utf-8 -*- # @Filename: datasets.py # @Project: GuideNet # @Author: jie # @Time: 2021/3/14 8:08 PM import os import numpy as np import glob from PIL import Image import torch.utils.data as data __all__ = [ 'kitti', ] class kitti(data.Dataset): """ kitti depth completion dataset: http://www.cvlibs.net/datasets/kitti/eval_depth.php?benchmark=depth_completion """ def __init__(self, path='../datas/kitti', mode='train', height=256, width=1216, return_idx=False, return_size=False, transform=None): self.base_dir = path self.height = height self.width = width self.mode = mode self.return_idx = return_idx self.return_size = return_size self.transform = transform if mode in ['train', 'val']: self.depth_path = os.path.join(self.base_dir, 'data_depth_annotated', mode) self.lidar_path = os.path.join(self.base_dir, 'data_depth_velodyne', mode) self.depths = list(sorted(glob.iglob(self.depth_path + "/**/*.png", recursive=True))) self.lidars = list(sorted(glob.iglob(self.lidar_path + "/**/*.png", recursive=True))) elif mode == 'selval': self.depth_path = os.path.join(self.base_dir, 'val_selection_cropped', 'groundtruth_depth') self.lidar_path = os.path.join(self.base_dir, 'val_selection_cropped', 'velodyne_raw') self.image_path = os.path.join(self.base_dir, 'val_selection_cropped', 'image') self.depths = list(sorted(glob.iglob(self.depth_path + "/*.png", recursive=True))) self.lidars = list(sorted(glob.iglob(self.lidar_path + "/*.png", recursive=True))) self.images = list(sorted(glob.iglob(self.image_path + "/*.png", recursive=True))) elif mode == 'test': self.lidar_path = os.path.join(self.base_dir, 'test_depth_completion_anonymous', 'velodyne_raw') self.image_path = os.path.join(self.base_dir, 'test_depth_completion_anonymous', 'image') self.lidars = list(sorted(glob.iglob(self.lidar_path + "/*.png", recursive=True))) self.images = list(sorted(glob.iglob(self.image_path + "/*.png", recursive=True))) self.depths = self.lidars else: raise ValueError("Unknown mode: {}".format(mode)) assert (len(self.depths) == len(self.lidars)) self.names = [os.path.split(path)[-1] for path in self.depths] def __len__(self): return len(self.depths) def __getitem__(self, index): depth = self.pull_DEPTH(self.depths[index]) depth = np.expand_dims(depth, axis=2) lidar = self.pull_DEPTH(self.lidars[index]) lidar = np.expand_dims(lidar, axis=2) file_names = self.depths[index].split('/') if self.mode in ['train', 'val']: rgb_path = os.path.join(*file_names[:-7], 'raw', file_names[-5].split('_drive')[0], file_names[-5], file_names[-2], 'data', file_names[-1]) elif self.mode in ['selval', 'test']: rgb_path = self.images[index] else: ValueError("Unknown mode: {}".format(self.mode)) rgb = self.pull_RGB(rgb_path) rgb = rgb.astype(np.float32) lidar = lidar.astype(np.float32) depth = depth.astype(np.float32) shape = lidar.shape if self.transform: rgb, lidar, depth = self.transform(rgb, lidar, depth) rgb = rgb.transpose(2, 0, 1).astype(np.float32) lidar = lidar.transpose(2, 0, 1).astype(np.float32) depth = depth.transpose(2, 0, 1).astype(np.float32) lp = (rgb.shape[2] - self.width) // 2 rgb = rgb[:, -self.height:, lp:lp + self.width] lidar = lidar[:, -self.height:, lp:lp + self.width] depth = depth[:, -self.height:, lp:lp + self.width] output = [rgb, lidar, depth] if self.return_idx: output.append(np.array([index], dtype=int)) if self.return_size: output.append(np.array(shape[:2], dtype=int)) return output def pull_RGB(self, path): img = np.array(Image.open(path).convert('RGB'), dtype=np.uint8) return img def pull_DEPTH(self, path): depth_png = np.array(Image.open(path), dtype=int) assert (np.max(depth_png) > 255) depth_image = (depth_png / 256.).astype(np.float32) return depth_image ================================================ FILE: exts/guideconv.cpp ================================================ // // Created by jie on 09/02/19. // #include #include #include void Conv2d_LF_Cuda(at::Tensor x, at::Tensor y, at::Tensor z, size_t N1, size_t N2, size_t Ci, size_t Co, size_t B, size_t K); void Conv2d_LB_Cuda(at::Tensor x, at::Tensor y, at::Tensor gx, at::Tensor gy, at::Tensor gz, size_t N1, size_t N2, size_t Ci, size_t Co, size_t B, size_t K); at::Tensor Conv2dLocal_F( at::Tensor a, // BCHW at::Tensor b // BCKKHW ) { int N1, N2, Ci, Co, K, B; B = a.size(0); Ci = a.size(1); N1 = a.size(2); N2 = a.size(3); Co = Ci; K = sqrt(b.size(1) / Co); auto c = at::zeros_like(a); Conv2d_LF_Cuda(a, b, c, N1, N2, Ci, Co, B, K); return c; } std::tuple Conv2dLocal_B( at::Tensor a, at::Tensor b, at::Tensor gc ) { int N1, N2, Ci, Co, K, B; B = a.size(0); Ci = a.size(1); N1 = a.size(2); N2 = a.size(3); Co = Ci; K = sqrt(b.size(1) / Co); auto ga = at::zeros_like(a); auto gb = at::zeros_like(b); Conv2d_LB_Cuda(a, b, ga, gb, gc, N1, N2, Ci, Co, B, K); return std::make_tuple(ga, gb); } PYBIND11_MODULE(TORCH_EXTENSION_NAME, m ) { m.def("Conv2dLocal_F", &Conv2dLocal_F, "Conv2dLocal Forward (CUDA)"); m.def("Conv2dLocal_B", &Conv2dLocal_B, "Conv2dLocal Backward (CUDA)"); } ================================================ FILE: exts/guideconv_kernel.cu ================================================ // // Created by jie on 09/02/19. // #include #include #include namespace { template __global__ void conv2d_kernel_lf(scalar_t *__restrict__ x, scalar_t *__restrict__ y, scalar_t *__restrict__ z, size_t N1, size_t N2, size_t Ci, size_t Co, size_t B, size_t K) { int col_index = threadIdx.x + blockIdx.x * blockDim.x; int row_index = threadIdx.y + blockIdx.y * blockDim.y; int cha_index = threadIdx.z + blockIdx.z * blockDim.z; if ((row_index < N1) && (col_index < N2) && (cha_index < Co)) { for (int b = 0; b < B; b++) { scalar_t result = 0; for (int i = -int((K - 1) / 2.); i < (K + 1) / 2.; i++) { for (int j = -int((K - 1) / 2.); j < (K + 1) / 2.; j++) { if ((row_index + i < 0) || (row_index + i >= N1) || (col_index + j < 0) || (col_index + j >= N2)) { continue; } result += x[b * N1 * N2 * Ci + cha_index * N1 * N2 + (row_index + i) * N2 + col_index + j] * y[b * N1 * N2 * Ci * K * K + cha_index * N1 * N2 * K * K + (i + (K - 1) / 2) * K * N1 * N2 + (j + (K - 1) / 2) * N1 * N2 + row_index * N2 + col_index]; } } z[b * N1 * N2 * Co + cha_index * N1 * N2 + row_index * N2 + col_index] = result; } } } template __global__ void conv2d_kernel_lb(scalar_t *__restrict__ x, scalar_t *__restrict__ y, scalar_t *__restrict__ gx, scalar_t *__restrict__ gy, scalar_t *__restrict__ gz, size_t N1, size_t N2, size_t Ci, size_t Co, size_t B, size_t K) { int col_index = threadIdx.x + blockIdx.x * blockDim.x; int row_index = threadIdx.y + blockIdx.y * blockDim.y; int cha_index = threadIdx.z + blockIdx.z * blockDim.z; if ((row_index < N1) && (col_index < N2) && (cha_index < Co)) { for (int b = 0; b < B; b++) { scalar_t result = 0; for (int i = -int((K - 1) / 2.); i < (K + 1) / 2.; i++) { for (int j = -int((K - 1) / 2.); j < (K + 1) / 2.; j++) { if ((row_index - i < 0) || (row_index - i >= N1) || (col_index - j < 0) || (col_index - j >= N2)) { continue; } result += gz[b * N1 * N2 * Ci + cha_index * N1 * N2 + (row_index - i) * N2 + col_index - j ] * y[b * N1 * N2 * Ci * K * K + cha_index * N1 * N2 * K * K + (i + (K - 1) / 2) * K * N1 * N2 + (j + (K - 1) / 2) * N1 * N2 + (row_index - i) * N2 + col_index - j]; gy[b * N1 * N2 * Ci * K * K + cha_index * N1 * N2 * K * K + (i + (K - 1) / 2) * K * N1 * N2 + (j + (K - 1) / 2) * N1 * N2 + (row_index - i) * N2 + col_index - j] = gz[b * N1 * N2 * Ci + cha_index * N1 * N2 + (row_index - i) * N2 + col_index - j ] * x[b * N1 * N2 * Ci + cha_index * N1 * N2 + row_index * N2 + col_index]; } } gx[b * N1 * N2 * Co + cha_index * N1 * N2 + row_index * N2 + col_index] = result; } } } } void Conv2d_LF_Cuda(at::Tensor x, at::Tensor y, at::Tensor z, size_t N1, size_t N2, size_t Ci, size_t Co, size_t B, size_t K) { dim3 blockSize(32, 32, 1); dim3 gridSize((N2 + blockSize.x - 1) / blockSize.x, (N1 + blockSize.y - 1) / blockSize.y, (Co + blockSize.z - 1) / blockSize.z); AT_DISPATCH_FLOATING_TYPES(x.type(), "Conv2d_LF", ([&] { conv2d_kernel_lf << < gridSize, blockSize >> > ( x.data(), y.data(), z.data(), N1, N2, Ci, Co, B, K); })); } void Conv2d_LB_Cuda(at::Tensor x, at::Tensor y, at::Tensor gx, at::Tensor gy, at::Tensor gz, size_t N1, size_t N2, size_t Ci, size_t Co, size_t B, size_t K) { dim3 blockSize(32, 32, 1); dim3 gridSize((N2 + blockSize.x - 1) / blockSize.x, (N1 + blockSize.y - 1) / blockSize.y, (Co + blockSize.z - 1) / blockSize.z); AT_DISPATCH_FLOATING_TYPES(x.type(), "Conv2d_LB", ([&] { conv2d_kernel_lb << < gridSize, blockSize >> > ( x.data(), y.data(), gx.data(), gy.data(), gz.data(), N1, N2, Ci, Co, B, K); })); } ================================================ FILE: exts/setup.py ================================================ from setuptools import setup from torch.utils.cpp_extension import BuildExtension, CUDAExtension setup( name='GuideConv', ext_modules=[ CUDAExtension('GuideConv', [ 'guideconv.cpp', 'guideconv_kernel.cu', ]), ], cmdclass={ 'build_ext': BuildExtension }) ================================================ FILE: models.py ================================================ #!/usr/bin/env python # -*- coding:utf-8 -*- # @Filename: model.py # @Project: GuideNet # @Author: jie # @Time: 2021/3/14 7:50 PM import torch import torch.nn as nn from scipy.stats import truncnorm import math from torch.autograd import Function import encoding import GuideConv __all__ = [ 'GN', 'GNS', ] def Conv1x1(in_planes, out_planes, stride=1): """1x1 convolution""" return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) def Conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1): """3x3 convolution with padding""" return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=dilation, groups=groups, bias=False, dilation=dilation) class Conv2dLocal_F(Function): @staticmethod def forward(ctx, input, weight): ctx.save_for_backward(input, weight) output = GuideConv.Conv2dLocal_F(input, weight) return output @staticmethod def backward(ctx, grad_output): input, weight = ctx.saved_tensors grad_output = grad_output.contiguous() grad_input, grad_weight = GuideConv.Conv2dLocal_B(input, weight, grad_output) return grad_input, grad_weight class Conv2dLocal(nn.Module): def __init__(self, ): super().__init__() def forward(self, input, weight): output = Conv2dLocal_F.apply(input, weight) return output class Basic2d(nn.Module): def __init__(self, in_channels, out_channels, norm_layer=None, kernel_size=3, padding=1): super().__init__() if norm_layer: conv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=1, padding=padding, bias=False) else: conv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=1, padding=padding, bias=True) self.conv = nn.Sequential(conv, ) if norm_layer: self.conv.add_module('bn', norm_layer(out_channels)) self.conv.add_module('relu', nn.ReLU(inplace=True)) def forward(self, x): out = self.conv(x) return out class Basic2dTrans(nn.Module): def __init__(self, in_channels, out_channels, norm_layer=None): super().__init__() if norm_layer is None: norm_layer = nn.BatchNorm2d self.conv = nn.ConvTranspose2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=2, padding=1, output_padding=1, bias=False) self.bn = norm_layer(out_channels) self.relu = nn.ReLU(inplace=True) def forward(self, x): out = self.conv(x) out = self.bn(out) out = self.relu(out) return out class Basic2dLocal(nn.Module): def __init__(self, out_channels, norm_layer=None): super().__init__() if norm_layer is None: norm_layer = nn.BatchNorm2d self.conv = Conv2dLocal() self.bn = norm_layer(out_channels) self.relu = nn.ReLU(inplace=True) def forward(self, input, weight): out = self.conv(input, weight) out = self.bn(out) out = self.relu(out) return out class Guide(nn.Module): def __init__(self, input_planes, weight_planes, norm_layer=None, weight_ks=3): super().__init__() if norm_layer is None: norm_layer = nn.BatchNorm2d self.local = Basic2dLocal(input_planes, norm_layer) self.pool = nn.AdaptiveAvgPool2d((1, 1)) self.conv11 = Basic2d(input_planes + weight_planes, input_planes, None) self.conv12 = nn.Conv2d(input_planes, input_planes * 9, kernel_size=weight_ks, padding=weight_ks // 2) self.conv21 = Basic2d(input_planes + weight_planes, input_planes, None) self.conv22 = nn.Conv2d(input_planes, input_planes * input_planes, kernel_size=1, padding=0) self.br = nn.Sequential( norm_layer(num_features=input_planes), nn.ReLU(inplace=True), ) self.conv3 = Basic2d(input_planes, input_planes, norm_layer) def forward(self, input, weight): B, Ci, H, W = input.shape weight = torch.cat([input, weight], 1) weight11 = self.conv11(weight) weight12 = self.conv12(weight11) weight21 = self.conv21(weight) weight21 = self.pool(weight21) weight22 = self.conv22(weight21).view(B, -1, Ci) out = self.local(input, weight12).view(B, Ci, -1) out = torch.bmm(weight22, out).view(B, Ci, H, W) out = self.br(out) out = self.conv3(out) return out class BasicBlock(nn.Module): expansion = 1 __constants__ = ['downsample'] def __init__(self, inplanes, planes, stride=1, downsample=None, norm_layer=None, act=True): super().__init__() if norm_layer is None: norm_layer = nn.BatchNorm2d self.conv1 = Conv3x3(inplanes, planes, stride) self.bn1 = norm_layer(planes) self.relu = nn.ReLU(inplace=True) self.conv2 = Conv3x3(planes, planes) self.bn2 = norm_layer(planes) self.downsample = downsample self.stride = stride self.act = act def forward(self, x): identity = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) if self.downsample is not None: identity = self.downsample(x) out += identity if self.act: out = self.relu(out) return out class GuideNet(nn.Module): """ Not activate at the ref Init change to trunctated norm """ def __init__(self, block=BasicBlock, bc=16, img_layers=[2, 2, 2, 2, 2], depth_layers=[2, 2, 2, 2, 2], norm_layer=nn.BatchNorm2d, guide=Guide, weight_ks=3): super().__init__() self._norm_layer = norm_layer self.conv_img = Basic2d(3, bc * 2, norm_layer=norm_layer, kernel_size=5, padding=2) in_channels = bc * 2 self.inplanes = in_channels self.layer1_img = self._make_layer(block, in_channels * 2, img_layers[0], stride=2) self.guide1 = guide(in_channels * 2, in_channels * 2, norm_layer, weight_ks) self.inplanes = in_channels * 2 * block.expansion self.layer2_img = self._make_layer(block, in_channels * 4, img_layers[1], stride=2) self.guide2 = guide(in_channels * 4, in_channels * 4, norm_layer, weight_ks) self.inplanes = in_channels * 4 * block.expansion self.layer3_img = self._make_layer(block, in_channels * 8, img_layers[2], stride=2) self.guide3 = guide(in_channels * 8, in_channels * 8, norm_layer, weight_ks) self.inplanes = in_channels * 8 * block.expansion self.layer4_img = self._make_layer(block, in_channels * 8, img_layers[3], stride=2) self.guide4 = guide(in_channels * 8, in_channels * 8, norm_layer, weight_ks) self.inplanes = in_channels * 8 * block.expansion self.layer5_img = self._make_layer(block, in_channels * 8, img_layers[4], stride=2) self.layer2d_img = Basic2dTrans(in_channels * 4, in_channels * 2, norm_layer) self.layer3d_img = Basic2dTrans(in_channels * 8, in_channels * 4, norm_layer) self.layer4d_img = Basic2dTrans(in_channels * 8, in_channels * 8, norm_layer) self.layer5d_img = Basic2dTrans(in_channels * 8, in_channels * 8, norm_layer) self.conv_lidar = Basic2d(1, bc * 2, norm_layer=None, kernel_size=5, padding=2) self.inplanes = in_channels self.layer1_lidar = self._make_layer(block, in_channels * 2, depth_layers[0], stride=2) self.inplanes = in_channels * 2 * block.expansion self.layer2_lidar = self._make_layer(block, in_channels * 4, depth_layers[1], stride=2) self.inplanes = in_channels * 4 * block.expansion self.layer3_lidar = self._make_layer(block, in_channels * 8, depth_layers[2], stride=2) self.inplanes = in_channels * 8 * block.expansion self.layer4_lidar = self._make_layer(block, in_channels * 8, depth_layers[3], stride=2) self.inplanes = in_channels * 8 * block.expansion self.layer5_lidar = self._make_layer(block, in_channels * 8, depth_layers[4], stride=2) self.layer1d = Basic2dTrans(in_channels * 2, in_channels, norm_layer) self.layer2d = Basic2dTrans(in_channels * 4, in_channels * 2, norm_layer) self.layer3d = Basic2dTrans(in_channels * 8, in_channels * 4, norm_layer) self.layer4d = Basic2dTrans(in_channels * 8, in_channels * 8, norm_layer) self.layer5d = Basic2dTrans(in_channels * 8, in_channels * 8, norm_layer) self.conv = nn.Conv2d(bc * 2, 1, kernel_size=3, stride=1, padding=1) self.ref = block(bc * 2, bc * 2, norm_layer=norm_layer, act=False) self._initialize_weights() def forward(self, img, lidar): c0_img = self.conv_img(img) c1_img = self.layer1_img(c0_img) c2_img = self.layer2_img(c1_img) c3_img = self.layer3_img(c2_img) c4_img = self.layer4_img(c3_img) c5_img = self.layer5_img(c4_img) dc5_img = self.layer5d_img(c5_img) c4_mix = dc5_img + c4_img dc4_img = self.layer4d_img(c4_mix) c3_mix = dc4_img + c3_img dc3_img = self.layer3d_img(c3_mix) c2_mix = dc3_img + c2_img dc2_img = self.layer2d_img(c2_mix) c1_mix = dc2_img + c1_img c0_lidar = self.conv_lidar(lidar) c1_lidar = self.layer1_lidar(c0_lidar) c1_lidar_dyn = self.guide1(c1_lidar, c1_mix) c2_lidar = self.layer2_lidar(c1_lidar_dyn) c2_lidar_dyn = self.guide2(c2_lidar, c2_mix) c3_lidar = self.layer3_lidar(c2_lidar_dyn) c3_lidar_dyn = self.guide3(c3_lidar, c3_mix) c4_lidar = self.layer4_lidar(c3_lidar_dyn) c4_lidar_dyn = self.guide4(c4_lidar, c4_mix) c5_lidar = self.layer5_lidar(c4_lidar_dyn) c5 = c5_img + c5_lidar dc5 = self.layer5d(c5) c4 = dc5 + c4_lidar_dyn dc4 = self.layer4d(c4) c3 = dc4 + c3_lidar_dyn dc3 = self.layer3d(c3) c2 = dc3 + c2_lidar_dyn dc2 = self.layer2d(c2) c1 = dc2 + c1_lidar_dyn dc1 = self.layer1d(c1) c0 = dc1 + c0_lidar output = self.ref(c0) output = self.conv(output) return (output,) def _make_layer(self, block, planes, blocks, stride=1): norm_layer = self._norm_layer downsample = None if stride != 1 or self.inplanes != planes * block.expansion: downsample = nn.Sequential( Conv1x1(self.inplanes, planes * block.expansion, stride), norm_layer(planes * block.expansion), ) layers = [] layers.append(block(self.inplanes, planes, stride, downsample, norm_layer)) self.inplanes = planes * block.expansion for _ in range(1, blocks): layers.append(block(self.inplanes, planes, norm_layer=norm_layer)) return nn.Sequential(*layers) def _initialize_weights(self): def truncated_normal_(num, mean=0., std=1.): lower = -2 * std upper = 2 * std X = truncnorm((lower - mean) / std, (upper - mean) / std, loc=mean, scale=std) samples = X.rvs(num) output = torch.from_numpy(samples) return output for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.in_channels data = truncated_normal_(m.weight.nelement(), mean=0, std=math.sqrt(1.3 * 2. / n)) data = data.type_as(m.weight.data) m.weight.data = data.view_as(m.weight.data) if m.bias is not None: nn.init.zeros_(m.bias) def GN(): return GuideNet(norm_layer=encoding.nn.SyncBatchNorm, guide=Guide) def GNS(): return GuideNet(norm_layer=encoding.nn.SyncBatchNorm, guide=Guide, weight_ks=1) ================================================ FILE: optimizers.py ================================================ #!/usr/bin/env python # -*- coding:utf-8 -*- # @Filename: optimizers.py # @Project: GuideNet # @Author: jie # @Time: 2021/3/15 4:59 PM """ This is a fixup as pytorch 1.4.0 can not import AdamW directly from torch.optim """ from torch.optim import * from torch.optim.adamw import AdamW ================================================ FILE: test.py ================================================ #!/usr/bin/env python # -*- coding:utf-8 -*- # @Filename: test.py # @Project: GuideNet # @Author: jie # @Time: 2021/3/16 4:47 PM import os os.environ["CUDA_VISIBLE_DEVICES"] = '0' import torch import yaml from easydict import EasyDict as edict import datasets import encoding def test(): net.eval() for batch_idx, (rgb, lidar, _, idx, ori_size) in enumerate(testloader): with torch.no_grad(): if config.tta: rgbf = torch.flip(rgb, [-1]) lidarf = torch.flip(lidar, [-1]) rgbs = torch.cat([rgb, rgbf], 0) lidars = torch.cat([lidar, lidarf], 0) rgbs, lidars = rgbs.cuda(), lidars.cuda() depth_preds, = net(rgbs, lidars) depth_pred, depth_predf = depth_preds.split(depth_preds.shape[0] // 2) depth_predf = torch.flip(depth_predf, [-1]) depth_pred = (depth_pred + depth_predf) / 2. else: rgb, lidar = rgb.cuda(), lidar.cuda() depth_pred, = net(rgb, lidar) depth_pred[depth_pred < 0] = 0 depth_pred = depth_pred.cpu().squeeze(1).numpy() idx = idx.cpu().squeeze(1).numpy() ori_size = ori_size.cpu().numpy() name = [testset.names[i] for i in idx] save_result(config, depth_pred, name, ori_size) if __name__ == '__main__': # config_name = 'GN.yaml' config_name = 'GNS.yaml' with open(os.path.join('configs', config_name), 'r') as file: config_data = yaml.load(file, Loader=yaml.FullLoader) config = edict(config_data) from utils import * transform = init_aug(config.test_aug_configs) key, params = config.data_config.popitem() dataset = getattr(datasets, key) testset = dataset(**params, mode='test', transform=transform, return_idx=True, return_size=True) testloader = torch.utils.data.DataLoader(testset, batch_size=config.batch_size, num_workers=config.num_workers, shuffle=False, pin_memory=True) print('num_test = {}'.format(len(testset))) net = init_net(config) torch.cuda.empty_cache() torch.backends.cudnn.benchmark = True net.cuda() net = encoding.parallel.DataParallelModel(net) net = resume_state(config, net) test() ================================================ FILE: train.py ================================================ #!/usr/bin/env python # -*- coding:utf-8 -*- # @Filename: train.py # @Project: GuideNet # @Author: jie # @Time: 2021/3/14 7:50 PM import os import torch import yaml from easydict import EasyDict as edict def train(epoch): global iters Avg = AverageMeter() for batch_idx, (rgb, lidar, depth) in enumerate(trainloader): if epoch >= config.test_epoch and iters % config.test_iters == 0: test() net.train() rgb, lidar, depth = rgb.cuda(), lidar.cuda(), depth.cuda() optimizer.zero_grad() output = net(rgb, lidar) loss = criterion(output, depth).mean() loss.backward() optimizer.step() Avg.update(loss.item()) iters += 1 if config.vis and batch_idx % config.vis_iters == 0: print('Epoch {} Idx {} Loss {:.4f}'.format(epoch, batch_idx, Avg.avg)) def test(): global best_metric Avg = AverageMeter() net.eval() for batch_idx, (rgb, lidar, depth) in enumerate(testloader): rgb, lidar, depth = rgb.cuda(), lidar.cuda(), depth.cuda() with torch.no_grad(): output = net(rgb, lidar) prec = metric(output, depth).mean() Avg.update(prec.item(), rgb.size(0)) if Avg.avg < best_metric: best_metric = Avg.avg save_state(config, net) print('Best Result: {:.4f}\n'.format(best_metric)) if __name__ == '__main__': # config_name = 'GN.yaml' config_name = 'GNS.yaml' with open(os.path.join('configs', config_name), 'r') as file: config_data = yaml.load(file, Loader=yaml.FullLoader) config = edict(config_data) print(config.name) os.environ["CUDA_VISIBLE_DEVICES"] = ','.join([str(gpu_id) for gpu_id in config.gpu_ids]) from utils import * init_seed(config) trainloader, testloader = init_dataset(config) net = init_net(config) criterion = init_loss(config) metric = init_metric(config) net, criterion, metric = init_cuda(net, criterion, metric) optimizer = init_optim(config, net) lr_scheduler = init_lr_scheduler(config, optimizer) iters = 0 best_metric = 100 for epoch in range(config.start_epoch, config.nepoch): train(epoch) lr_scheduler.step() print('Best Results: {:.4f}\n'.format(best_metric)) ================================================ FILE: utils.py ================================================ #!/usr/bin/env python # -*- coding:utf-8 -*- # @Filename: utils.py # @Project: GuideNet # @Author: jie # @Time: 2021/3/15 5:25 PM import os import torch import random import numpy as np import augs import models import datasets import optimizers import encoding import criteria from PIL import Image __all__ = [ 'AverageMeter', 'init_seed', 'init_aug', 'init_dataset', 'init_cuda', 'init_net', 'init_loss', 'init_metric', 'init_optim', 'init_lr_scheduler', 'save_state', 'resume_state', 'save_result', ] class AverageMeter(object): def __init__(self): self.reset() def reset(self): self.val = 0 self.avg = 0 self.sum = 0 self.count = 0 def update(self, val, n=1): self.val = val self.sum += val * n self.count += n self.avg = self.sum / self.count def config_param(model): param_groups = [] other_params = [] for name, param in model.named_parameters(): if len(param.shape) == 1: g = {'params': [param], 'weight_decay': 0.0} param_groups.append(g) else: other_params.append(param) param_groups.append({'params': other_params}) return param_groups def save_state(config, model): print('==> Saving model ...') env_name = config.name + '_' + str(config.manual_seed) save_path = os.path.join('checkpoints', env_name) os.makedirs(save_path, exist_ok=True) model_state_dict = model.state_dict() state_dict = { 'net': model_state_dict, } torch.save(state_dict, os.path.join(save_path, 'result.pth')) def resume_state(config, model): env_name = config.name + '_' + str(config.resume_seed) cp_path = os.path.join('checkpoints', env_name, 'result.pth') resume_model = torch.load(cp_path)['net'] model.load_state_dict(resume_model, strict=True) return model def pad_rep(image, ori_size): h, w = image.shape oh, ow = ori_size pl = (ow - w) // 2 pr = ow - w - pl pt = oh - h image_pad = np.pad(image, pad_width=((pt, 0), (pl, pr)), mode='edge') return image_pad def save_result(config, depths, names, ori_sizes=None): env_name = config.name + '_' + str(config.resume_seed) save_path = os.path.join('results', env_name) os.makedirs(save_path, exist_ok=True) for i in range(depths.shape[0]): depth, name = depths[i], names[i] if ori_sizes is not None: depth = pad_rep(depth, ori_sizes[i]) filename = os.path.join(save_path, name) img = (depth * 256.0).astype('uint16') Img = Image.fromarray(img) Img.save(filename) def init_seed(config): if config.manual_seed == 0: config.manual_seed = random.randint(1, 10000) print("Random Seed: ", config.manual_seed) torch.initial_seed() random.seed(config.manual_seed) np.random.seed(config.manual_seed) torch.manual_seed(config.manual_seed) torch.cuda.manual_seed_all(config.manual_seed) def init_net(config): return getattr(models, config.model)() def init_loss(config): return getattr(criteria, config.loss)() def init_metric(config): return getattr(criteria, config.metric)() def init_aug(aug_config): transform = [] for x in aug_config: print(x) if type(x) == str: transform.append(getattr(augs, x)()) else: key, params = x.popitem() transform.append(getattr(augs, key)(**params)) return augs.Compose(transform) def init_dataset(config): train_transform = init_aug(config.train_aug_configs) test_transform = init_aug(config.test_aug_configs) key, params = config.data_config.popitem() dataset = getattr(datasets, key) trainset = dataset(**params, mode='train', transform=train_transform) testset = dataset(**params, mode='selval', transform=test_transform) trainloader = torch.utils.data.DataLoader(trainset, batch_size=config.batch_size, num_workers=config.num_workers, shuffle=True, drop_last=True, pin_memory=True) testloader = torch.utils.data.DataLoader(testset, batch_size=config.batch_size, num_workers=config.num_workers, shuffle=True, drop_last=True, pin_memory=True) print('num_train = {}, num_test = {}'.format(len(trainset), len(testset))) return trainloader, testloader def init_cuda(net, criterion, metric): torch.cuda.empty_cache() net.cuda() criterion.cuda() metric.cuda() net = encoding.parallel.DataParallelModel(net) criterion = encoding.parallel.DataParallelCriterion(criterion) metric = encoding.parallel.DataParallelCriterion(metric) torch.backends.cudnn.benchmark = True return net, criterion, metric def init_optim(config, net): key, params = config.optim_config.popitem() return getattr(optimizers, key)(config_param(net), **params) def init_lr_scheduler(config, optimizer): key, params = config.lr_config.popitem() return getattr(torch.optim.lr_scheduler, key)(optimizer, **params)