Showing preview only (485K chars total). Download the full file or copy to clipboard to get everything.
Repository: castacks/DytanVO
Branch: main
Commit: ca2fe20029e9
Files: 61
Total size: 462.0 KB
Directory structure:
gitextract_hty7qque/
├── .gitignore
├── Datasets/
│ ├── __init__.py
│ ├── cowmask.py
│ ├── flowlib.py
│ ├── segmask_gt.py
│ ├── tartanTrajFlowDataset.py
│ ├── util_flow.py
│ └── utils.py
├── DytanVO.py
├── LICENSE
├── Network/
│ ├── PWC/
│ │ ├── PWCNet.py
│ │ ├── __init__.py
│ │ └── correlation.py
│ ├── VOFlowNet.py
│ ├── VONet.py
│ ├── __init__.py
│ └── rigidmask/
│ ├── .gitignore
│ ├── VCNplus.py
│ ├── __init__.py
│ ├── conv4d.py
│ ├── det.py
│ ├── det_losses.py
│ ├── det_utils.py
│ ├── networks/
│ │ ├── DCNv2/
│ │ │ ├── .gitignore
│ │ │ ├── DCN/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── dcn_v2.py
│ │ │ │ ├── src/
│ │ │ │ │ ├── cpu/
│ │ │ │ │ │ ├── dcn_v2_cpu.cpp
│ │ │ │ │ │ ├── dcn_v2_im2col_cpu.cpp
│ │ │ │ │ │ ├── dcn_v2_im2col_cpu.h
│ │ │ │ │ │ ├── dcn_v2_psroi_pooling_cpu.cpp
│ │ │ │ │ │ └── vision.h
│ │ │ │ │ ├── cuda/
│ │ │ │ │ │ ├── dcn_v2_cuda.cu
│ │ │ │ │ │ ├── dcn_v2_im2col_cuda.cu
│ │ │ │ │ │ ├── dcn_v2_im2col_cuda.h
│ │ │ │ │ │ ├── dcn_v2_psroi_pooling_cuda.cu
│ │ │ │ │ │ └── vision.h
│ │ │ │ │ ├── dcn_v2.h
│ │ │ │ │ └── vision.cpp
│ │ │ │ ├── testcpu.py
│ │ │ │ └── testcuda.py
│ │ │ ├── LICENSE
│ │ │ ├── README.md
│ │ │ ├── make.sh
│ │ │ └── setup.py
│ │ ├── dlav0.py
│ │ ├── large_hourglass.py
│ │ ├── msra_resnet.py
│ │ ├── pose_dla_dcn.py
│ │ └── resnet_dcn.py
│ └── submodule.py
├── README.md
├── environment.yml
├── evaluator/
│ ├── __init__.py
│ ├── evaluate_ate_scale.py
│ ├── evaluate_kitti.py
│ ├── evaluate_rpe.py
│ ├── evaluator_base.py
│ ├── tartanair_evaluator.py
│ ├── trajectory_transform.py
│ └── transformation.py
└── vo_trajectory_from_folder.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
*.pyc
models/
data/
__pycache__/
.DS_Store
================================================
FILE: Datasets/__init__.py
================================================
================================================
FILE: Datasets/cowmask.py
================================================
# pylint: disable=bad-indentation
# coding=utf-8
# Copyright 2022 The Google Research Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Cow mask generation.
https://github.com/google-research/google-research/blob/master/milking_cowmask/
Adapted from LAX implementation to NumPy due to PyTorch dataloader
being incompatible with JAX
Author: Shihao Shen
Date: 29th Aug 2022
"""
import math
import numpy as np
from scipy import special
from scipy.signal import convolve
_ROOT_2 = math.sqrt(2.0)
_ROOT_2_PI = math.sqrt(2.0 * math.pi)
def gaussian_kernels(sigma, max_sigma):
"""Make Gaussian kernels for Gaussian blur.
Args:
sigma: kernel sigma
max_sigma: sigma upper limit as a float (this is used to determine
the size of kernel required to fit all kernels)
Returns:
a (1, kernel_width) numpy array
"""
size = round(max_sigma * 3) * 2 + 1
x = np.arange(-size, size + 1)[None, :].astype(np.float32)
y = np.exp(-0.5 * x ** 2 / sigma ** 2)
return y / (sigma * _ROOT_2_PI)
def cow_masks(mask_size, log_sigma_range, max_sigma, prop_range):
"""Generate Cow Mask.
Args:
n_masks: number of masks to generate as an int
mask_size: image size as a `(height, width)` tuple
log_sigma_range: the range of the sigma (smoothing kernel)
parameter in log-space`(log(sigma_min), log(sigma_max))`
max_sigma: smoothing sigma upper limit
prop_range: range from which to draw the proportion `p` that
controls the proportion of pixel in a mask that are 1 vs 0
Returns:
Cow Masks as a [v, height, width, 1] numpy array
"""
# Draw the per-mask proportion p
p = np.random.uniform(prop_range[0], prop_range[1])
# Compute threshold factors
threshold_factor = special.erfinv(2 * p - 1) * _ROOT_2
sigma = np.exp(np.random.uniform(log_sigma_range[0], log_sigma_range[1]))
noise = np.random.normal(size=mask_size)
# Generate a kernel for each sigma
kernel = gaussian_kernels(sigma, max_sigma)
kernel = kernel.squeeze()
# kernels in y and x
krn_y = kernel[None, :]
krn_x = kernel[:, None]
# Apply kernels in y and x separately
smooth_noise = convolve(noise, krn_y, mode='same')
smooth_noise = convolve(smooth_noise, krn_x, mode='same')
# Compute mean and std-dev
noise_mu = smooth_noise.mean(axis=(0,1))
noise_sigma = smooth_noise.std(axis=(0,1))
# Compute thresholds
threshold = threshold_factor * noise_sigma + noise_mu
# Apply threshold
mask = (smooth_noise <= threshold).astype(bool)
return mask
if __name__=="__main__":
import time
import matplotlib.pyplot as plt
cow_sigma_range = (20, 60)
log_sigma_range = (math.log(cow_sigma_range[0]), math.log(cow_sigma_range[1]))
cow_prop_range = (0.1, 0.5)
s = time.time()
max_iou = 0
# for _ in range(1000):
# mask = cow_masks((240, 360), log_sigma_range, cow_sigma_range[1], cow_prop_range)
# max_iou = max(max_iou, np.sum(mask) / (240*360))
# print(time.time() - s)
# print(max_iou)
mask = cow_masks((240, 360), log_sigma_range, cow_sigma_range[1], cow_prop_range)
print(np.sum(mask) / (240*360))
plt.imshow(mask * 255)
plt.savefig('mask.png')
================================================
FILE: Datasets/flowlib.py
================================================
"""
# ==============================
# flowlib.py
# library for optical flow processing
# Author: Ruoteng Li
# Date: 6th Aug 2016
# ==============================
"""
import png
from util_flow import readPFM
import numpy as np
import matplotlib.colors as cl
import matplotlib.pyplot as plt
from PIL import Image
import cv2
import pdb
UNKNOWN_FLOW_THRESH = 1e7
SMALLFLOW = 0.0
LARGEFLOW = 1e8
"""
=============
Flow Section
=============
"""
def show_flow(filename):
"""
visualize optical flow map using matplotlib
:param filename: optical flow file
:return: None
"""
flow = read_flow(filename)
img = flow_to_image(flow)
plt.imshow(img)
plt.show()
def point_vec(img,flow,skip=10):
skip=20
maxsize=1000.
extendfac=2.
resize_factor = max(1,int(max(maxsize/img.shape[0], maxsize/img.shape[1])))
meshgrid = np.meshgrid(range(img.shape[1]),range(img.shape[0]))
dispimg = cv2.resize(img[:,:,::-1].copy(), None,fx=resize_factor,fy=resize_factor)
colorflow = flow_to_image(flow).astype(int)
for i in range(img.shape[1]): # x
for j in range(img.shape[0]): # y
if flow[j,i,2] != 1: continue
if j%skip!=0 or i%skip!=0: continue
xend = int((meshgrid[0][j,i]+extendfac*flow[j,i,0])*resize_factor)
yend = int((meshgrid[1][j,i]+extendfac*flow[j,i,1])*resize_factor)
leng = np.linalg.norm(flow[j,i,:2]*extendfac)
if leng<1:continue
dispimg = cv2.arrowedLine(dispimg, (meshgrid[0][j,i]*resize_factor,meshgrid[1][j,i]*resize_factor),\
(xend,yend),
(int(colorflow[j,i,2]),int(colorflow[j,i,1]),int(colorflow[j,i,0])),4,tipLength=2/leng,line_type=cv2.LINE_AA)
return dispimg
def visualize_flow(flow, mode='Y'):
"""
this function visualize the input flow
:param flow: input flow in array
:param mode: choose which color mode to visualize the flow (Y: Ccbcr, RGB: RGB color)
:return: None
"""
if mode == 'Y':
# Ccbcr color wheel
img = flow_to_image(flow)
elif mode == 'RGB':
(h, w) = flow.shape[0:2]
du = flow[:, :, 0]
dv = flow[:, :, 1]
valid = flow[:, :, 2]
max_flow = np.sqrt(du**2+dv**2).max()
img = np.zeros((h, w, 3), dtype=np.float64)
# angle layer
img[:, :, 0] = np.fmod(np.arctan2(dv, du) / (2 * np.pi)+1.,1.)
# magnitude layer, normalized to 1
img[:, :, 1] = np.sqrt(du * du + dv * dv) * 8 / max_flow
# phase layer
img[:, :, 2] = 8 - img[:, :, 1]
# clip to [0,1]
small_idx = img[:, :, 0:3] < 0
large_idx = img[:, :, 0:3] > 1
img[small_idx] = 0
img[large_idx] = 1
# convert to rgb
img = cl.hsv_to_rgb(img)
# remove invalid point
img[:, :, 0] = img[:, :, 0] * valid
img[:, :, 1] = img[:, :, 1] * valid
img[:, :, 2] = img[:, :, 2] * valid
return img
def read_flow(filename):
"""
read optical flow data from flow file
:param filename: name of the flow file
:return: optical flow data in numpy array
"""
if filename.endswith('.flo'):
flow = read_flo_file(filename)
elif filename.endswith('.png'):
flow = read_png_file(filename)
elif filename.endswith('.pfm'):
flow = read_pfm_file(filename)
else:
raise Exception('Invalid flow file format!')
return flow
import numpy as np
import os
def write_flo(flow, filename):
TAG_STRING = b'PIEH'
assert type(filename) is str, "file is not str %r" % str(filename)
assert filename[-4:] == '.flo', "file ending is not .flo %r" % file[-4:]
height, width, nBands = flow.shape
assert nBands == 2, "Number of bands = %r != 2" % nBands
u = flow[: , : , 0]
v = flow[: , : , 1]
assert u.shape == v.shape, "Invalid flow shape"
height, width = u.shape
f = open(filename,'wb')
f.write(TAG_STRING)
np.array(width).astype(np.int32).tofile(f)
np.array(height).astype(np.int32).tofile(f)
tmp = np.zeros((height, width*nBands))
tmp[:,np.arange(width)*2] = u
tmp[:,np.arange(width)*2 + 1] = v
tmp.astype(np.float32).tofile(f)
f.close()
def write_flow(flow, filename):
"""
write optical flow in Middlebury .flo format
:param flow: optical flow map
:param filename: optical flow file path to be saved
:return: None
"""
f = open(filename, 'wb')
magic = np.array([202021.25], dtype=np.float32)
(height, width) = flow.shape[0:2]
w = np.array([width], dtype=np.int32)
h = np.array([height], dtype=np.int32)
magic.tofile(f)
w.tofile(f)
h.tofile(f)
flow.tofile(f)
f.close()
def save_flow_image(flow, image_file):
"""
save flow visualization into image file
:param flow: optical flow data
:param flow_fil
:return: None
"""
flow_img = flow_to_image(flow)
img_out = Image.fromarray(flow_img)
img_out.save(image_file)
def flowfile_to_imagefile(flow_file, image_file):
"""
convert flowfile into image file
:param flow: optical flow data
:param flow_fil
:return: None
"""
flow = read_flow(flow_file)
save_flow_image(flow, image_file)
def segment_flow(flow):
h = flow.shape[0]
w = flow.shape[1]
u = flow[:, :, 0]
v = flow[:, :, 1]
idx = ((abs(u) > LARGEFLOW) | (abs(v) > LARGEFLOW))
idx2 = (abs(u) == SMALLFLOW)
class0 = (v == 0) & (u == 0)
u[idx2] = 0.00001
tan_value = v / u
class1 = (tan_value < 1) & (tan_value >= 0) & (u > 0) & (v >= 0)
class2 = (tan_value >= 1) & (u >= 0) & (v >= 0)
class3 = (tan_value < -1) & (u <= 0) & (v >= 0)
class4 = (tan_value < 0) & (tan_value >= -1) & (u < 0) & (v >= 0)
class8 = (tan_value >= -1) & (tan_value < 0) & (u > 0) & (v <= 0)
class7 = (tan_value < -1) & (u >= 0) & (v <= 0)
class6 = (tan_value >= 1) & (u <= 0) & (v <= 0)
class5 = (tan_value >= 0) & (tan_value < 1) & (u < 0) & (v <= 0)
seg = np.zeros((h, w))
seg[class1] = 1
seg[class2] = 2
seg[class3] = 3
seg[class4] = 4
seg[class5] = 5
seg[class6] = 6
seg[class7] = 7
seg[class8] = 8
seg[class0] = 0
seg[idx] = 0
return seg
def flow_error(tu, tv, u, v):
"""
Calculate average end point error
:param tu: ground-truth horizontal flow map
:param tv: ground-truth vertical flow map
:param u: estimated horizontal flow map
:param v: estimated vertical flow map
:return: End point error of the estimated flow
"""
smallflow = 0.0
'''
stu = tu[bord+1:end-bord,bord+1:end-bord]
stv = tv[bord+1:end-bord,bord+1:end-bord]
su = u[bord+1:end-bord,bord+1:end-bord]
sv = v[bord+1:end-bord,bord+1:end-bord]
'''
stu = tu[:]
stv = tv[:]
su = u[:]
sv = v[:]
idxUnknow = (abs(stu) > UNKNOWN_FLOW_THRESH) | (abs(stv) > UNKNOWN_FLOW_THRESH)
stu[idxUnknow] = 0
stv[idxUnknow] = 0
su[idxUnknow] = 0
sv[idxUnknow] = 0
ind2 = [(np.absolute(stu) > smallflow) | (np.absolute(stv) > smallflow)]
index_su = su[ind2]
index_sv = sv[ind2]
an = 1.0 / np.sqrt(index_su ** 2 + index_sv ** 2 + 1)
un = index_su * an
vn = index_sv * an
index_stu = stu[ind2]
index_stv = stv[ind2]
tn = 1.0 / np.sqrt(index_stu ** 2 + index_stv ** 2 + 1)
tun = index_stu * tn
tvn = index_stv * tn
'''
angle = un * tun + vn * tvn + (an * tn)
index = [angle == 1.0]
angle[index] = 0.999
ang = np.arccos(angle)
mang = np.mean(ang)
mang = mang * 180 / np.pi
'''
epe = np.sqrt((stu - su) ** 2 + (stv - sv) ** 2)
epe = epe[ind2]
mepe = np.mean(epe)
return mepe
def flow_to_image(flow):
"""
Convert flow into middlebury color code image
:param flow: optical flow map
:return: optical flow image in middlebury color
"""
u = flow[:, :, 0]
v = flow[:, :, 1]
maxu = -999.
maxv = -999.
minu = 999.
minv = 999.
idxUnknow = (abs(u) > UNKNOWN_FLOW_THRESH) | (abs(v) > UNKNOWN_FLOW_THRESH)
u[idxUnknow] = 0
v[idxUnknow] = 0
maxu = max(maxu, np.max(u))
minu = min(minu, np.min(u))
maxv = max(maxv, np.max(v))
minv = min(minv, np.min(v))
rad = np.sqrt(u ** 2 + v ** 2)
maxrad = max(-1, np.max(rad))
u = u/(maxrad + np.finfo(float).eps)
v = v/(maxrad + np.finfo(float).eps)
img = compute_color(u, v)
idx = np.repeat(idxUnknow[:, :, np.newaxis], 3, axis=2)
img[idx] = 0
return np.uint8(img)
def evaluate_flow_file(gt_file, pred_file):
"""
evaluate the estimated optical flow end point error according to ground truth provided
:param gt_file: ground truth file path
:param pred_file: estimated optical flow file path
:return: end point error, float32
"""
# Read flow files and calculate the errors
gt_flow = read_flow(gt_file) # ground truth flow
eva_flow = read_flow(pred_file) # predicted flow
# Calculate errors
average_pe = flow_error(gt_flow[:, :, 0], gt_flow[:, :, 1], eva_flow[:, :, 0], eva_flow[:, :, 1])
return average_pe
def evaluate_flow(gt_flow, pred_flow):
"""
gt: ground-truth flow
pred: estimated flow
"""
average_pe = flow_error(gt_flow[:, :, 0], gt_flow[:, :, 1], pred_flow[:, :, 0], pred_flow[:, :, 1])
return average_pe
"""
==============
Disparity Section
==============
"""
def read_disp_png(file_name):
"""
Read optical flow from KITTI .png file
:param file_name: name of the flow file
:return: optical flow data in matrix
"""
image_object = png.Reader(filename=file_name)
image_direct = image_object.asDirect()
image_data = list(image_direct[2])
(w, h) = image_direct[3]['size']
channel = len(image_data[0]) / w
flow = np.zeros((h, w, channel), dtype=np.uint16)
for i in range(len(image_data)):
for j in range(channel):
flow[i, :, j] = image_data[i][j::channel]
return flow[:, :, 0] / 256
def disp_to_flowfile(disp, filename):
"""
Read KITTI disparity file in png format
:param disp: disparity matrix
:param filename: the flow file name to save
:return: None
"""
f = open(filename, 'wb')
magic = np.array([202021.25], dtype=np.float32)
(height, width) = disp.shape[0:2]
w = np.array([width], dtype=np.int32)
h = np.array([height], dtype=np.int32)
empty_map = np.zeros((height, width), dtype=np.float32)
data = np.dstack((disp, empty_map))
magic.tofile(f)
w.tofile(f)
h.tofile(f)
data.tofile(f)
f.close()
"""
==============
Image Section
==============
"""
def read_image(filename):
"""
Read normal image of any format
:param filename: name of the image file
:return: image data in matrix uint8 type
"""
img = Image.open(filename)
im = np.array(img)
return im
def warp_image(im, flow):
"""
Use optical flow to warp image to the next
:param im: image to warp
:param flow: optical flow
:return: warped image
"""
from scipy import interpolate
image_height = im.shape[0]
image_width = im.shape[1]
flow_height = flow.shape[0]
flow_width = flow.shape[1]
n = image_height * image_width
(iy, ix) = np.mgrid[0:image_height, 0:image_width]
(fy, fx) = np.mgrid[0:flow_height, 0:flow_width]
fx = fx.astype(np.float64)
fy = fy.astype(np.float64)
fx += flow[:,:,0]
fy += flow[:,:,1]
mask = np.logical_or(fx <0 , fx > flow_width)
mask = np.logical_or(mask, fy < 0)
mask = np.logical_or(mask, fy > flow_height)
fx = np.minimum(np.maximum(fx, 0), flow_width)
fy = np.minimum(np.maximum(fy, 0), flow_height)
points = np.concatenate((ix.reshape(n,1), iy.reshape(n,1)), axis=1)
xi = np.concatenate((fx.reshape(n, 1), fy.reshape(n,1)), axis=1)
warp = np.zeros((image_height, image_width, im.shape[2]))
for i in range(im.shape[2]):
channel = im[:, :, i]
plt.imshow(channel, cmap='gray')
values = channel.reshape(n, 1)
new_channel = interpolate.griddata(points, values, xi, method='cubic')
new_channel = np.reshape(new_channel, [flow_height, flow_width])
new_channel[mask] = 1
warp[:, :, i] = new_channel.astype(np.uint8)
return warp.astype(np.uint8)
"""
==============
Others
==============
"""
def pfm_to_flo(pfm_file):
flow_filename = pfm_file[0:pfm_file.find('.pfm')] + '.flo'
(data, scale) = readPFM(pfm_file)
flow = data[:, :, 0:2]
write_flow(flow, flow_filename)
def scale_image(image, new_range):
"""
Linearly scale the image into desired range
:param image: input image
:param new_range: the new range to be aligned
:return: image normalized in new range
"""
min_val = np.min(image).astype(np.float32)
max_val = np.max(image).astype(np.float32)
min_val_new = np.array(min(new_range), dtype=np.float32)
max_val_new = np.array(max(new_range), dtype=np.float32)
scaled_image = (image - min_val) / (max_val - min_val) * (max_val_new - min_val_new) + min_val_new
return scaled_image.astype(np.uint8)
def compute_color(u, v):
"""
compute optical flow color map
:param u: optical flow horizontal map
:param v: optical flow vertical map
:return: optical flow in color code
"""
[h, w] = u.shape
img = np.zeros([h, w, 3])
nanIdx = np.isnan(u) | np.isnan(v)
u[nanIdx] = 0
v[nanIdx] = 0
colorwheel = make_color_wheel()
ncols = np.size(colorwheel, 0)
rad = np.sqrt(u**2+v**2)
a = np.arctan2(-v, -u) / np.pi
fk = (a+1) / 2 * (ncols - 1) + 1
k0 = np.floor(fk).astype(int)
k1 = k0 + 1
k1[k1 == ncols+1] = 1
f = fk - k0
for i in range(0, np.size(colorwheel,1)):
tmp = colorwheel[:, i]
col0 = tmp[k0-1] / 255
col1 = tmp[k1-1] / 255
col = (1-f) * col0 + f * col1
idx = rad <= 1
col[idx] = 1-rad[idx]*(1-col[idx])
notidx = np.logical_not(idx)
col[notidx] *= 0.75
img[:, :, i] = np.uint8(np.floor(255 * col*(1-nanIdx)))
return img
def make_color_wheel():
"""
Generate color wheel according Middlebury color code
:return: Color wheel
"""
RY = 15
YG = 6
GC = 4
CB = 11
BM = 13
MR = 6
ncols = RY + YG + GC + CB + BM + MR
colorwheel = np.zeros([ncols, 3])
col = 0
# RY
colorwheel[0:RY, 0] = 255
colorwheel[0:RY, 1] = np.transpose(np.floor(255*np.arange(0, RY) / RY))
col += RY
# YG
colorwheel[col:col+YG, 0] = 255 - np.transpose(np.floor(255*np.arange(0, YG) / YG))
colorwheel[col:col+YG, 1] = 255
col += YG
# GC
colorwheel[col:col+GC, 1] = 255
colorwheel[col:col+GC, 2] = np.transpose(np.floor(255*np.arange(0, GC) / GC))
col += GC
# CB
colorwheel[col:col+CB, 1] = 255 - np.transpose(np.floor(255*np.arange(0, CB) / CB))
colorwheel[col:col+CB, 2] = 255
col += CB
# BM
colorwheel[col:col+BM, 2] = 255
colorwheel[col:col+BM, 0] = np.transpose(np.floor(255*np.arange(0, BM) / BM))
col += + BM
# MR
colorwheel[col:col+MR, 2] = 255 - np.transpose(np.floor(255 * np.arange(0, MR) / MR))
colorwheel[col:col+MR, 0] = 255
return colorwheel
def read_flo_file(filename):
"""
Read from Middlebury .flo file
:param flow_file: name of the flow file
:return: optical flow data in matrix
"""
f = open(filename, 'rb')
magic = np.fromfile(f, np.float32, count=1)
data2d = None
if 202021.25 != magic:
print('Magic number incorrect. Invalid .flo file')
else:
w = np.fromfile(f, np.int32, count=1)
h = np.fromfile(f, np.int32, count=1)
#print("Reading %d x %d flow file in .flo format" % (h, w))
flow = np.ones((h[0],w[0],3))
data2d = np.fromfile(f, np.float32, count=2 * w[0] * h[0])
# reshape data into 3D array (columns, rows, channels)
data2d = np.resize(data2d, (h[0], w[0], 2))
flow[:,:,:2] = data2d
f.close()
return flow
def read_png_file(flow_file):
"""
Read from KITTI .png file
:param flow_file: name of the flow file
:return: optical flow data in matrix
"""
flow = cv2.imread(flow_file,-1)[:,:,::-1].astype(np.float64)
# flow_object = png.Reader(filename=flow_file)
# flow_direct = flow_object.asDirect()
# flow_data = list(flow_direct[2])
# (w, h) = flow_direct[3]['size']
# #print("Reading %d x %d flow file in .png format" % (h, w))
# flow = np.zeros((h, w, 3), dtype=np.float64)
# for i in range(len(flow_data)):
# flow[i, :, 0] = flow_data[i][0::3]
# flow[i, :, 1] = flow_data[i][1::3]
# flow[i, :, 2] = flow_data[i][2::3]
invalid_idx = (flow[:, :, 2] == 0)
flow[:, :, 0:2] = (flow[:, :, 0:2] - 2 ** 15) / 64.0
flow[invalid_idx, 0] = 0
flow[invalid_idx, 1] = 0
return flow
def read_pfm_file(flow_file):
"""
Read from .pfm file
:param flow_file: name of the flow file
:return: optical flow data in matrix
"""
(data, scale) = readPFM(flow_file)
return data
# fast resample layer
def resample(img, sz):
"""
img: flow map to be resampled
sz: new flow map size. Must be [height,weight]
"""
original_image_size = img.shape
in_height = img.shape[0]
in_width = img.shape[1]
out_height = sz[0]
out_width = sz[1]
out_flow = np.zeros((out_height, out_width, 2))
# find scale
height_scale = float(in_height) / float(out_height)
width_scale = float(in_width) / float(out_width)
[x,y] = np.meshgrid(range(out_width), range(out_height))
xx = x * width_scale
yy = y * height_scale
x0 = np.floor(xx).astype(np.int32)
x1 = x0 + 1
y0 = np.floor(yy).astype(np.int32)
y1 = y0 + 1
x0 = np.clip(x0,0,in_width-1)
x1 = np.clip(x1,0,in_width-1)
y0 = np.clip(y0,0,in_height-1)
y1 = np.clip(y1,0,in_height-1)
Ia = img[y0,x0,:]
Ib = img[y1,x0,:]
Ic = img[y0,x1,:]
Id = img[y1,x1,:]
wa = (y1-yy) * (x1-xx)
wb = (yy-y0) * (x1-xx)
wc = (y1-yy) * (xx-x0)
wd = (yy-y0) * (xx-x0)
out_flow[:,:,0] = (Ia[:,:,0]*wa + Ib[:,:,0]*wb + Ic[:,:,0]*wc + Id[:,:,0]*wd) * out_width / in_width
out_flow[:,:,1] = (Ia[:,:,1]*wa + Ib[:,:,1]*wb + Ic[:,:,1]*wc + Id[:,:,1]*wd) * out_height / in_height
return out_flow
================================================
FILE: Datasets/segmask_gt.py
================================================
"""
# ==============================
# segmask_gt.py
# library to generate groundtruth
# segmentation mask given flow and
# disparity change
# (Adapted from code for rigidmask)
# Author: Shihao Shen
# Date: 14th Sep 2022
# ==============================
"""
import argparse
import os
import os.path
import glob
import numpy as np
import cv2
from PIL import Image
from flowlib import read_flow, readPFM, flow_to_image
def dataloader(filepath, fpass='frames_cleanpass', level=6):
iml0 = []
iml1 = []
flowl0 = []
disp0 = []
dispc = []
calib = []
level_stars = '/*'*level
candidate_pool = glob.glob('%s/optical_flow%s'%(filepath,level_stars))
for flow_path in sorted(candidate_pool):
# if 'TEST' in flow_path: continue
if 'flower_storm_x2/into_future/right/OpticalFlowIntoFuture_0023_R.pfm' in flow_path:
print('Skipping %s' % flow_path)
continue
if 'flower_storm_x2/into_future/left/OpticalFlowIntoFuture_0023_L.pfm' in flow_path:
print('Skipping %s' % flow_path)
continue
if 'flower_storm_augmented0_x2/into_future/right/OpticalFlowIntoFuture_0023_R.pfm' in flow_path:
print('Skipping %s' % flow_path)
continue
if 'flower_storm_augmented0_x2/into_future/left/OpticalFlowIntoFuture_0023_L.pfm' in flow_path:
print('Skipping %s' % flow_path)
continue
# if 'FlyingThings' in flow_path and '_0014_' in flow_path:
# print('Skipping %s' % flow_path)
# continue
# if 'FlyingThings' in flow_path and '_0015_' in flow_path:
# print('Skipping %s' % flow_path)
# continue
idd = flow_path.split('/')[-1].split('_')[-2]
if 'into_future' in flow_path:
idd_p1 = '%04d'%(int(idd)+1)
else:
idd_p1 = '%04d'%(int(idd)-1)
if os.path.exists(flow_path.replace(idd,idd_p1)):
d0_path = flow_path.replace('/into_future/','/').replace('/into_past/','/').replace('optical_flow','disparity')
d0_path = '%s/%s.pfm'%(d0_path.rsplit('/',1)[0],idd)
dc_path = flow_path.replace('optical_flow','disparity_change')
dc_path = '%s/%s.pfm'%(dc_path.rsplit('/',1)[0],idd)
im_path = flow_path.replace('/into_future/','/').replace('/into_past/','/').replace('optical_flow',fpass)
im0_path = '%s/%s.png'%(im_path.rsplit('/',1)[0],idd)
im1_path = '%s/%s.png'%(im_path.rsplit('/',1)[0],idd_p1)
# This will skip any sequence that contains less than 10 poses in camera_data.txt
with open('%s/camera_data.txt'%(im0_path.replace(fpass,'camera_data').rsplit('/',2)[0]),'r') as f:
if 'FlyingThings' in flow_path and len(f.readlines())!=40:
print('Skipping %s' % flow_path)
continue
iml0.append(im0_path)
iml1.append(im1_path)
flowl0.append(flow_path)
disp0.append(d0_path)
dispc.append(dc_path)
calib.append('%s/camera_data.txt'%(im0_path.replace(fpass,'camera_data').rsplit('/',2)[0]))
return iml0, iml1, flowl0, disp0, dispc, calib
def default_loader(path):
return Image.open(path).convert('RGB')
def flow_loader(path):
if '.pfm' in path:
data = readPFM(path)[0]
data[:,:,2] = 1
return data
else:
return read_flow(path)
def load_exts(cam_file):
with open(cam_file, 'r') as f:
lines = f.readlines()
l_exts = []
r_exts = []
for l in lines:
if 'L ' in l:
l_exts.append(np.asarray([float(i) for i in l[2:].strip().split(' ')]).reshape(4,4))
if 'R ' in l:
r_exts.append(np.asarray([float(i) for i in l[2:].strip().split(' ')]).reshape(4,4))
return l_exts,r_exts
def disparity_loader(path):
if '.png' in path:
data = Image.open(path)
data = np.ascontiguousarray(data,dtype=np.float32)/256
return data
else:
return readPFM(path)[0]
# triangulation
def triangulation(disp, xcoord, ycoord, bl=1, fl = 450, cx = 479.5, cy = 269.5):
depth = bl*fl / disp # 450px->15mm focal length
X = (xcoord - cx) * depth / fl
Y = (ycoord - cy) * depth / fl
Z = depth
P = np.concatenate((X[np.newaxis],Y[np.newaxis],Z[np.newaxis]),0).reshape(3,-1)
P = np.concatenate((P,np.ones((1,P.shape[-1]))),0)
return P
def exp_loader(index, iml0s, iml1s, flowl0s, disp0s=None, dispcs=None, calibs=None):
'''
index: index of the frame in the file lists below
iml0s: a file list of the first frames
iml1s: a file list of the second frames
flowl0s: a file list of the optical w.r.t. iml0s
disp0s: a file list of the disparity w.r.t. iml0s
dispcs: a file list of the disparity change w.r.t. disp0s
calibs: a file list of the camera extrinsics
'''
iml0 = iml0s[index]
iml1 = iml1s[index]
flowl0 = flowl0s[index]
iml0 = default_loader(iml0)
iml1 = default_loader(iml1)
flowl0 = flow_loader(flowl0)
flowl0[:,:,-1][flowl0[:,:,0] == np.inf] = 0
flowl0[:,:,0][~flowl0[:,:,2].astype(bool)] = 0
flowl0[:,:,1][~flowl0[:,:,2].astype(bool)] = 0
flowl0 = np.ascontiguousarray(flowl0, dtype=np.float32)
flowl0[np.isnan(flowl0)] = 1e6
bl = 1
if '15mm_' in calibs[index]:
fl = 450
else:
fl = 1050
cx = 479.5
cy = 269.5
intr = [[fl],[cx],[cy],[bl],[1],[0],[0],[1],[0],[0]]
d1 = np.abs(disparity_loader(disp0s[index]))
d2 = np.abs(disparity_loader(dispcs[index]) + d1)
flowl0[:,:,2] = np.logical_and(np.logical_and(flowl0[:,:,2] == 1, d1 != 0), d2 != 0).astype(float)
shape = d1.shape
mesh = np.meshgrid(range(shape[1]), range(shape[0]))
xcoord = mesh[0].astype(float)
ycoord = mesh[1].astype(float)
# triangulation in two frames
P0 = triangulation(d1, xcoord, ycoord, bl=bl, fl=fl, cx=cx, cy=cy)
P1 = triangulation(d2, xcoord + flowl0[:,:,0], ycoord + flowl0[:,:,1], bl=bl, fl=fl, cx=cx, cy=cy)
depth0 = P0[2]
depth1 = P1[2]
depth0 = depth0.reshape(shape).astype(np.float32)
flow3d = (P1-P0)[:3].reshape((3,)+shape).transpose((1,2,0))
fid = int(flowl0s[index].split('/')[-1].split('_')[1])
with open(calibs[index], 'r') as f:
fid = fid - int(f.readline().split(' ')[-1])
l_exts, r_exts= load_exts(calibs[index])
if '/right/' in iml0s[index]:
exts = r_exts
else:
exts = l_exts
if '/into_future/' in flowl0s[index]:
if (fid + 1) > len(exts) - 1: print(flowl0s[index])
if (fid) > len(exts) - 1: print(flowl0s[index])
ext1 = exts[fid+1]
ext0 = exts[fid]
else:
if (fid - 1) > len(exts) - 1: print(flowl0s[index])
if (fid) > len(exts) - 1: print(flowl0s[index])
ext1 = exts[fid-1]
ext0 = exts[fid]
camT = np.eye(4); camT[1,1] = -1; camT[2,2] = -1 # Sceneflow uses Blender's coordinate system
RT01 = camT.dot(np.linalg.inv(ext0)).dot(ext1).dot(camT) # ext is from camera space to world space
rect_flow3d = (RT01[:3,:3].dot(P1[:3])-P0[:3]).reshape((3,)+shape).transpose((1,2,0)) # rectified scene flow
depthflow = np.concatenate((depth0[:,:,np.newaxis], rect_flow3d, flow3d), 2)
RT01 = np.concatenate((cv2.Rodrigues(RT01[:3,:3])[0][:,0], RT01[:3,-1])).astype(np.float32)
# object mask
fnum = int(iml0s[index].split('/')[-1].split('.png')[0])
obj_fname = '%s/%04d.pfm'%(flowl0s[index].replace('/optical_flow','object_index').replace('into_past/','/').replace('into_future/','/').rsplit('/',1)[0],fnum)
obj_idx = disparity_loader(obj_fname)
depthflow = np.concatenate((depthflow, obj_idx[:,:,np.newaxis]), 2)
# depthflow dimension: H x W x 8 (depth=1 + rectified_flow3d=3 + flow3d=3 + object_segmentation=1)
iml1 = np.asarray(iml1)
iml0 = np.asarray(iml0)
return iml0, iml1, flowl0, depthflow, intr, RT01
def motionmask(flowl0, depthflow, RT01):
'''
flowl0: optical flow. [H, W, 3]
depthflow: a concatenation of depth, rectified scene flow, scene flow, and object segmentation. [H, W, 8]
RT01: camera motion from the future frame to the current frame. [6, ]
'''
valid_mask = (flowl0[:,:,2] == 1) & (depthflow[:,:,0] < 100) & (depthflow[:,:,0] > 0.01) # valid flow & valid depth
Tglobal_gt = -RT01[3:, np.newaxis, np.newaxis] # background translation
Tlocal_gt = depthflow[:,:,1:4].transpose(2, 0, 1) # point translation (after removing rotation)
m3d_gt = np.linalg.norm(Tlocal_gt - Tglobal_gt, 2, 0) # abs. motion
fgmask_gt = m3d_gt * 100 > 1
fgmask_gt[~valid_mask] = False
return fgmask_gt
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='segmask_gt_generation')
parser.add_argument('--database',
help='path to the database (required)')
parser.add_argument('--debug', action='store_true', default=False,
help='generate visualization')
parser.add_argument('--frames_pass', default='frames_cleanpass',
help='which pass to use, either clean or final')
parser.add_argument('--dataset',
help='choose from FlyingThings3D, Driving, Monkaa')
args = parser.parse_args()
if args.debug:
os.makedirs('%s/%s/results_viz' % (args.database, args.dataset), exist_ok=True)
if args.dataset == 'Monkaa':
level = 4
else:
level = 6
iml0s, iml1s, flowl0s, disp0s, dispcs, calibs = dataloader('%s/%s/' % (args.database, args.dataset),
level=level, fpass=args.frames_pass)
print("Generating %s masks..." % len(flowl0s))
for i in range(len(iml0s)):
idd = flowl0s[i].split('/')[-1].split('_')[-2]
mask_fn = '%s/%s.npy' % (os.path.dirname(flowl0s[i]).replace('optical_flow', 'rigidmask'), idd)
if os.path.exists(mask_fn):
print(i)
continue
os.makedirs(os.path.dirname(mask_fn), exist_ok=True)
iml0, iml1, flowl0, depthflow, intr, RT01 = exp_loader(i, iml0s, iml1s, flowl0s, disp0s, dispcs, calibs)
fgmask = motionmask(flowl0, depthflow, RT01)
np.save(mask_fn, fgmask)
if args.debug:
if args.dataset == 'Driving' and 'rigidmask/15mm_focallength/scene_forwards/fast/left' not in mask_fn:
continue
elif args.dataset == 'Monkaa' and 'rigidmask/eating_camera2_x2/left' not in mask_fn:
continue
elif args.dataset == 'FlyingThings3D' and not ('rigidmask/TEST/A' in mask_fn and 'into_future/left' in mask_fn):
continue
print("Visualizing %s" % mask_fn)
flowl0viz = flow_to_image(flowl0)
maskviz = np.stack((fgmask * 255.0, )*3, axis=-1).astype(np.uint8)
inputs = np.concatenate([iml0, flowl0viz, maskviz], axis=1)
cv2.imwrite('%s/%s/results_viz/%s.png' % (args.database, args.dataset, str(i).zfill(5)), cv2.cvtColor(inputs, cv2.COLOR_RGB2BGR))
================================================
FILE: Datasets/tartanTrajFlowDataset.py
================================================
"""
# ==============================
# tartanTrajFlowDataset.py
# library for DytanVO data I/O
# Author: Wenshan Wang, Shihao Shen
# Date: 3rd Jan 2023
# ==============================
"""
import numpy as np
import cv2
from torch.utils.data import Dataset, DataLoader
from os import listdir
from evaluator.transformation import pos_quats2SEs, pose2motion, SEs2ses
from .utils import make_intrinsics_layer
class TrajFolderDataset(Dataset):
"""scene flow synthetic dataset. """
def __init__(self, imgfolder, transform = None,
focalx = 320.0, focaly = 320.0, centerx = 320.0, centery = 240.0):
files = listdir(imgfolder)
self.rgbfiles = [(imgfolder +'/'+ ff) for ff in files if (ff.endswith('.png') or ff.endswith('.jpg'))]
self.rgbfiles.sort()
self.imgfolder = imgfolder
print('Find {} image files in {}'.format(len(self.rgbfiles), imgfolder))
self.N = len(self.rgbfiles) - 1
# self.N = len(self.lines)
self.transform = transform
self.focalx = focalx
self.focaly = focaly
self.centerx = centerx
self.centery = centery
def __len__(self):
return self.N
def __getitem__(self, idx):
imgfile1 = self.rgbfiles[idx].strip()
imgfile2 = self.rgbfiles[idx+1].strip()
img1 = cv2.imread(imgfile1)
img2 = cv2.imread(imgfile2)
res = {'img1': img1, 'img2': img2}
h, w, _ = img1.shape
intrinsicLayer = make_intrinsics_layer(w, h, self.focalx, self.focaly, self.centerx, self.centery)
res['intrinsic'] = intrinsicLayer
if self.transform:
res = self.transform(res)
res['img1_raw'] = img1
res['img2_raw'] = img2
return res
================================================
FILE: Datasets/util_flow.py
================================================
"""
# ==============================
# util_flow.py
# library for optical flow processing
# Author: Gengshan Yang
# Date: 10th Feb 2021
# ==============================
"""
import math
import png
import struct
import array
import numpy as np
import cv2
import pdb
from io import *
UNKNOWN_FLOW_THRESH = 1e9;
UNKNOWN_FLOW = 1e10;
# Middlebury checks
TAG_STRING = 'PIEH' # use this when WRITING the file
TAG_FLOAT = 202021.25 # check for this when READING the file
def readPFM(file):
import re
file = open(file, 'rb')
color = None
width = None
height = None
scale = None
endian = None
header = file.readline().rstrip()
if header == b'PF':
color = True
elif header == b'Pf':
color = False
else:
raise Exception('Not a PFM file.')
dim_match = re.match(b'^(\d+)\s(\d+)\s$', file.readline())
if dim_match:
width, height = map(int, dim_match.groups())
else:
raise Exception('Malformed PFM header.')
scale = float(file.readline().rstrip())
if scale < 0: # little-endian
endian = '<'
scale = -scale
else:
endian = '>' # big-endian
data = np.fromfile(file, endian + 'f')
shape = (height, width, 3) if color else (height, width)
data = np.reshape(data, shape)
data = np.flipud(data)
return data, scale
def save_pfm(file, image, scale = 1):
import sys
color = None
if image.dtype.name != 'float32':
raise Exception('Image dtype must be float32.')
if len(image.shape) == 3 and image.shape[2] == 3: # color image
color = True
elif len(image.shape) == 2 or len(image.shape) == 3 and image.shape[2] == 1: # greyscale
color = False
else:
raise Exception('Image must have H x W x 3, H x W x 1 or H x W dimensions.')
file.write('PF\n' if color else 'Pf\n')
file.write('%d %d\n' % (image.shape[1], image.shape[0]))
endian = image.dtype.byteorder
if endian == '<' or endian == '=' and sys.byteorder == 'little':
scale = -scale
file.write('%f\n' % scale)
image.tofile(file)
def ReadMiddleburyFloFile(path):
""" Read .FLO file as specified by Middlebury.
Returns tuple (width, height, u, v, mask), where u, v, mask are flat
arrays of values.
"""
with open(path, 'rb') as fil:
tag = struct.unpack('f', fil.read(4))[0]
width = struct.unpack('i', fil.read(4))[0]
height = struct.unpack('i', fil.read(4))[0]
assert tag == TAG_FLOAT
#data = np.fromfile(path, dtype=np.float, count=-1)
#data = data[3:]
fmt = 'f' * width*height*2
data = struct.unpack(fmt, fil.read(4*width*height*2))
u = data[::2]
v = data[1::2]
mask = map(lambda x,y: abs(x)<UNKNOWN_FLOW_THRESH and abs(y) < UNKNOWN_FLOW_THRESH, u, v)
mask = list(mask)
u_masked = map(lambda x,y: x if y else 0, u, mask)
v_masked = map(lambda x,y: x if y else 0, v, mask)
return width, height, list(u_masked), list(v_masked), list(mask)
def ReadKittiPngFile(path):
""" Read 16-bit .PNG file as specified by KITTI-2015 (flow).
Returns a tuple, (width, height, u, v, mask), where u, v, mask
are flat arrays of values.
"""
# Read .png file.
png_reader = png.Reader(path)
data = png_reader.read()
if data[3]['bitdepth'] != 16:
raise Exception('bitdepth of ' + path + ' is not 16')
width = data[0]
height = data[1]
# Get list of rows.
rows = list(data[2])
u = array.array('f', [0]) * width*height
v = array.array('f', [0]) * width*height
mask = array.array('f', [0]) * width*height
for y, row in enumerate(rows):
for x in range(width):
ind = width*y+x
u[ind] = (row[3*x] - 2**15) / 64.0
v[ind] = (row[3*x+1] - 2**15) / 64.0
mask[ind] = row[3*x+2]
# if mask[ind] > 0:
# print(u[ind], v[ind], mask[ind], row[3*x], row[3*x+1], row[3*x+2])
#png_reader.close()
return (width, height, u, v, mask)
def WriteMiddleburyFloFile(path, width, height, u, v, mask=None):
""" Write .FLO file as specified by Middlebury.
"""
if mask is not None:
u_masked = map(lambda x,y: x if y else UNKNOWN_FLOW, u, mask)
v_masked = map(lambda x,y: x if y else UNKNOWN_FLOW, v, mask)
else:
u_masked = u
v_masked = v
fmt = 'f' * width*height*2
# Interleave lists
data = [x for t in zip(u_masked,v_masked) for x in t]
with open(path, 'wb') as fil:
fil.write(str.encode(TAG_STRING))
fil.write(struct.pack('i', width))
fil.write(struct.pack('i', height))
fil.write(struct.pack(fmt, *data))
def write_flow(path,flow):
invalid_idx = (flow[:, :, 2] == 0)
flow[:, :, 0:2] = flow[:, :, 0:2]*64.+ 2 ** 15
flow[invalid_idx, 0] = 0
flow[invalid_idx, 1] = 0
flow = flow.astype(np.uint16)
flow = cv2.imwrite(path, flow[:,:,::-1])
#WriteKittiPngFile(path,
# flow.shape[1], flow.shape[0], flow[:,:,0].flatten(),
# flow[:,:,1].flatten(), flow[:,:,2].flatten())
def WriteKittiPngFile(path, width, height, u, v, mask=None):
""" Write 16-bit .PNG file as specified by KITTI-2015 (flow).
u, v are lists of float values
mask is a list of floats, denoting the *valid* pixels.
"""
data = array.array('H',[0])*width*height*3
for i,(u_,v_,mask_) in enumerate(zip(u,v,mask)):
data[3*i] = int(u_*64.0+2**15)
data[3*i+1] = int(v_*64.0+2**15)
data[3*i+2] = int(mask_)
# if mask_ > 0:
# print(data[3*i], data[3*i+1],data[3*i+2])
with open(path, 'wb') as png_file:
png_writer = png.Writer(width=width, height=height, bitdepth=16, compression=3, greyscale=False)
png_writer.write_array(png_file, data)
def ConvertMiddleburyFloToKittiPng(src_path, dest_path):
width, height, u, v, mask = ReadMiddleburyFloFile(src_path)
WriteKittiPngFile(dest_path, width, height, u, v, mask=mask)
def ConvertKittiPngToMiddleburyFlo(src_path, dest_path):
width, height, u, v, mask = ReadKittiPngFile(src_path)
WriteMiddleburyFloFile(dest_path, width, height, u, v, mask=mask)
def ParseFilenameKitti(filename):
# Parse kitti filename (seq_frameno.xx),
# return seq, frameno, ext.
# Be aware that seq might contain the dataset name (if contained as prefix)
ext = filename[filename.rfind('.'):]
frameno = filename[filename.rfind('_')+1:filename.rfind('.')]
frameno = int(frameno)
seq = filename[:filename.rfind('_')]
return seq, frameno, ext
def read_calib_file(filepath):
"""Read in a calibration file and parse into a dictionary."""
data = {}
with open(filepath, 'r') as f:
for line in f.readlines():
key, value = line.split(':', 1)
# The only non-float values in these files are dates, which
# we don't care about anyway
try:
data[key] = np.array([float(x) for x in value.split()])
except ValueError:
pass
return data
def load_calib_cam_to_cam(cam_to_cam_file):
# We'll return the camera calibration as a dictionary
data = {}
# Load and parse the cam-to-cam calibration data
filedata = read_calib_file(cam_to_cam_file)
# Create 3x4 projection matrices
P_rect_00 = np.reshape(filedata['P_rect_00'], (3, 4))
P_rect_10 = np.reshape(filedata['P_rect_01'], (3, 4))
P_rect_20 = np.reshape(filedata['P_rect_02'], (3, 4))
P_rect_30 = np.reshape(filedata['P_rect_03'], (3, 4))
# Compute the camera intrinsics
data['K_cam0'] = P_rect_00[0:3, 0:3]
data['K_cam1'] = P_rect_10[0:3, 0:3]
data['K_cam2'] = P_rect_20[0:3, 0:3]
data['K_cam3'] = P_rect_30[0:3, 0:3]
data['b00'] = P_rect_00[0, 3] / P_rect_00[0, 0]
data['b10'] = P_rect_10[0, 3] / P_rect_10[0, 0]
data['b20'] = P_rect_20[0, 3] / P_rect_20[0, 0]
data['b30'] = P_rect_30[0, 3] / P_rect_30[0, 0]
return data
================================================
FILE: Datasets/utils.py
================================================
"""
# ==============================
# utils.py
# misc library for DytanVO
# Author: Wenshan Wang, Shihao Shen
# Date: 3rd Jan 2023
# ==============================
"""
from __future__ import division
import torch
import math
import random
import numpy as np
import numbers
import cv2
import matplotlib.pyplot as plt
import os
from scipy.spatial.transform import Rotation as R
if ( not ( "DISPLAY" in os.environ ) ):
plt.switch_backend('agg')
print("Environment variable DISPLAY is not present in the system.")
print("Switch the backend of matplotlib to agg.")
import time
# ===== general functions =====
class Compose(object):
"""Composes several transforms together.
Args:
transforms (List[Transform]): list of transforms to compose.
Example:
>>> transforms.Compose([
>>> transforms.CenterCrop(10),
>>> transforms.ToTensor(),
>>> ])
"""
def __init__(self, transforms):
self.transforms = transforms
def __call__(self, img):
for t in self.transforms:
img = t(img)
return img
class DownscaleFlow(object):
"""
Scale the flow and mask to a fixed size
"""
def __init__(self, scale=4):
'''
size: output frame size, this should be NO LARGER than the input frame size!
'''
self.downscale = 1.0/scale
def __call__(self, sample):
if self.downscale!=1 and 'flow' in sample :
sample['flow'] = cv2.resize(sample['flow'],
(0, 0), fx=self.downscale, fy=self.downscale, interpolation=cv2.INTER_LINEAR)
if self.downscale!=1 and 'intrinsic' in sample :
sample['intrinsic'] = cv2.resize(sample['intrinsic'],
(0, 0), fx=self.downscale, fy=self.downscale, interpolation=cv2.INTER_LINEAR)
if self.downscale!=1 and 'fmask' in sample :
sample['fmask'] = cv2.resize(sample['fmask'],
(0, 0), fx=self.downscale, fy=self.downscale, interpolation=cv2.INTER_LINEAR)
return sample
class CropCenter(object):
"""Crops a sample of data (tuple) at center
if the image size is not large enough, it will be first resized with fixed ratio
"""
def __init__(self, size):
if isinstance(size, numbers.Number):
self.size = (int(size), int(size))
else:
self.size = size
def __call__(self, sample):
kks = list(sample.keys())
th, tw = self.size
hh, ww = sample[kks[0]].shape[0], sample[kks[0]].shape[1]
if ww == tw and hh == th:
return sample
# resize the image if the image size is smaller than the target size
scale_h = max(1, float(th)/hh)
scale_w = max(1, float(tw)/ww)
if scale_h>1 or scale_w>1:
w = int(round(ww * scale_w)) # w after resize
h = int(round(hh * scale_h)) # h after resize
else:
w, h = ww, hh
if scale_h != 1. or scale_w != 1.: # resize the data
resizedata = ResizeData(size=(h, w))
sample = resizedata(sample)
x1 = int((w-tw)/2)
y1 = int((h-th)/2)
for kk in kks:
if sample[kk] is None:
continue
img = sample[kk]
sample[kk] = img[y1:y1+th,x1:x1+tw,...]
return sample
class ResizeData(object):
"""Resize the data in a dict
"""
def __init__(self, size):
if isinstance(size, numbers.Number):
self.size = (int(size), int(size))
else:
self.size = size
def __call__(self, sample):
kks = list(sample.keys())
th, tw = self.size
h, w = sample[kks[0]].shape[0], sample[kks[0]].shape[1]
if w == tw and h == th:
return sample
scale_w = float(tw)/w
scale_h = float(th)/h
for kk in kks:
if sample[kk] is None:
continue
sample[kk] = cv2.resize(sample[kk], (tw,th), interpolation=cv2.INTER_LINEAR)
if 'flow' in sample:
sample['flow'][...,0] = sample['flow'][...,0] * scale_w
sample['flow'][...,1] = sample['flow'][...,1] * scale_h
return sample
class ToTensor(object):
def __call__(self, sample):
kks = list(sample)
for kk in kks:
data = sample[kk]
data = data.astype(np.float32)
if len(data.shape) == 3: # transpose image-like data
data = data.transpose(2,0,1)
elif len(data.shape) == 2:
data = data.reshape((1,)+data.shape) # add a dummy channel
if len(data.shape) == 3 and data.shape[0]==3: # normalization of rgb images
data = data/255.0
sample[kk] = torch.from_numpy(data.copy()) # copy to make memory continuous
return sample
def tensor2img(tensImg,mean,std):
"""
convert a tensor a numpy array, for visualization
"""
# undo normalize
for t, m, s in zip(tensImg, mean, std):
t.mul_(s).add_(m)
tensImg = tensImg * float(255)
# undo transpose
tensImg = (tensImg.numpy().transpose(1,2,0)).astype(np.uint8)
return tensImg
def bilinear_interpolate(img, h, w):
# assert round(h)>=0 and round(h)<img.shape[0]
# assert round(w)>=0 and round(w)<img.shape[1]
h0 = int(math.floor(h))
h1 = h0 + 1
w0 = int(math.floor(w))
w1 = w0 + 1
a = h - h0
b = w - w0
h0 = max(h0, 0)
w0 = max(w0, 0)
h1 = min(h1, img.shape[0]-1)
w1 = min(w1, img.shape[1]-1)
A = img[h0,w0,:]
B = img[h1,w0,:]
C = img[h0,w1,:]
D = img[h1,w1,:]
res = (1-a)*(1-b)*A + a*(1-b)*B + (1-a)*b*C + a*b*D
return res
def calculate_angle_distance_from_du_dv(du, dv, flagDegree=False):
a = np.arctan2( dv, du )
angleShift = np.pi
if ( True == flagDegree ):
a = a / np.pi * 180
angleShift = 180
# print("Convert angle from radian to degree as demanded by the input file.")
d = np.sqrt( du * du + dv * dv )
return a, d, angleShift
def visflow(flownp, maxF=500.0, n=8, mask=None, hueMax=179, angShift=0.0):
"""
Show a optical flow field as the KITTI dataset does.
Some parts of this function is the transform of the original MATLAB code flow_to_color.m.
"""
ang, mag, _ = calculate_angle_distance_from_du_dv( flownp[:, :, 0], flownp[:, :, 1], flagDegree=False )
# Use Hue, Saturation, Value colour model
hsv = np.zeros( ( ang.shape[0], ang.shape[1], 3 ) , dtype=np.float32)
am = ang < 0
ang[am] = ang[am] + np.pi * 2
hsv[ :, :, 0 ] = np.remainder( ( ang + angShift ) / (2*np.pi), 1 )
hsv[ :, :, 1 ] = mag / maxF * n
hsv[ :, :, 2 ] = (n - hsv[:, :, 1])/n
hsv[:, :, 0] = np.clip( hsv[:, :, 0], 0, 1 ) * hueMax
hsv[:, :, 1:3] = np.clip( hsv[:, :, 1:3], 0, 1 ) * 255
hsv = hsv.astype(np.uint8)
bgr = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
if ( mask is not None ):
mask = mask != 255
bgr[mask] = np.array([0, 0 ,0], dtype=np.uint8)
return bgr
def dataset_intrinsics(dataset='tartanair', is_15mm=False):
if dataset == 'kitti':
focalx, focaly, centerx, centery = 707.0912, 707.0912, 601.8873, 183.1104
baseline = None # to be determined using load_kitti_intrinsics
elif dataset == 'airdos':
focalx, focaly, centerx, centery = 772.54834, 772.54834, 320.0, 180.0
baseline = 1
elif dataset == 'rs_d435':
focalx, focaly, centerx, centery = 384.5080871582031, 384.5080871582031, 316.88897705078125, 240.05723571777344
baseline = 0.05
elif dataset == 'sceneflow':
focalx, focaly, centerx, centery = 1050.0, 1050.0, 479.5, 269.5
if is_15mm:
focalx = focaly = 450.0
baseline = 0.5
elif dataset == 'tartanair':
focalx, focaly, centerx, centery = 320.0, 320.0, 320.0, 240.0
baseline = 1
elif dataset == 'commaai':
focalx, focaly, centerx, centery = 910.0, 910.0, 582.0, 437.0
baseline = 1
else:
return None
return focalx, focaly, centerx, centery, baseline
def plot_traj(gtposes, estposes, vis=False, savefigname=None, title=''):
fig = plt.figure(figsize=(4,4))
cm = plt.cm.get_cmap('Spectral')
plt.subplot(111)
plt.plot(gtposes[:,0],gtposes[:,1], linestyle='dashed',c='k')
plt.plot(estposes[:, 0], estposes[:, 1],c='#ff7f0e')
plt.xlabel('x (m)')
plt.ylabel('y (m)')
plt.legend(['Ground Truth','TartanVO'])
plt.title(title)
if savefigname is not None:
plt.savefig(savefigname)
if vis:
plt.show()
plt.close(fig)
def make_intrinsics_layer(w, h, fx, fy, ox, oy):
ww, hh = np.meshgrid(range(w), range(h))
ww = (ww.astype(np.float32) - ox + 0.5 )/fx
hh = (hh.astype(np.float32) - oy + 0.5 )/fy
intrinsicLayer = np.stack((ww,hh)).transpose(1,2,0)
return intrinsicLayer
def load_kiiti_intrinsics(filename):
'''
load intrinsics from kitti intrinsics file
'''
data = {}
with open(filename, 'r') as f:
for line in f.readlines():
key, value = line.split(':', 1)
# The only non-float values in these files are dates, which
# we don't care about anyway
try:
data[key] = np.array([float(x) for x in value.split()])
except ValueError:
pass
P2 = np.reshape(data['P2'], (3,4))
P3 = np.reshape(data['P3'], (3,4))
focalx, focaly, centerx, centery = float(P2[0,0]), float(P2[1,1]), float(P2[0,2]), float(P2[1,2])
baseline = P2[0,3] / P2[0,0] - P3[0,3] / P3[0,0]
return focalx, focaly, centerx, centery, baseline
def load_sceneflow_extrinsics(filename):
with open(filename, 'r') as f:
lines = f.readlines()
l_exts = []
r_exts = []
for l in lines:
if 'L ' in l:
l_exts.append(np.asarray([float(i) for i in l[2:].strip().split(' ')]).reshape(4,4))
if 'R ' in l:
r_exts.append(np.asarray([float(i) for i in l[2:].strip().split(' ')]).reshape(4,4))
if 'into_future' in filename:
fids = np.arange(0, len(l_exts))
else:
fids = np.arange(len(l_exts) - 1, -1, -1)
# assuming left camera is used by default
camT = np.eye(4); camT[1,1] = -1; camT[2,2] = -1 # Sceneflow uses Blender's coordinate system
pose_quats = []
pose = np.eye(4)
for fid in fids:
ext0 = l_exts[fid]
ext1 = l_exts[fid+1] if 'into_future' in filename else l_exts[fid-1]
motion = camT.dot(np.linalg.inv(ext0)).dot(ext1).dot(camT) # ext is from camera space to world space
pose = pose @ motion
pose_quat = np.zeros(7)
pose_quat[3:] = R.from_matrix(pose[:3,:3]).as_quat()
pose_quat[:3] = pose[:3,3]
pose_quats.append(pose_quat)
return pose_quats
================================================
FILE: DytanVO.py
================================================
# Software License Agreement (BSD License)
#
# Copyright (c) 2020, Shihao Shen, CMU
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials provided
# with the distribution.
# * Neither the name of CMU nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
import cv2
import torch
import torch.nn as nn
import numpy as np
import time
np.set_printoptions(precision=4, suppress=True, threshold=10000)
from torch.autograd import Variable
from Network.VONet import VONet
from Network.rigidmask.VCNplus import SegNet, WarpModule, flow_reg
from Datasets.utils import CropCenter, ResizeData
from Datasets.cowmask import cow_masks
from evaluator.transformation import se2SE
class DytanVO(object):
def __init__(self, vo_model_name, seg_model_name, image_height, image_width, is_kitti=False, flow_model_name=None, pose_model_name=None):
# import ipdb;ipdb.set_trace()
self.vonet = VONet()
# load VO model separately (flow + pose) or at once
if flow_model_name.endswith('.pkl') and pose_model_name.endswith('.pkl'):
modelname = 'models/' + flow_model_name
self.load_vo_model(self.vonet.flowNet, modelname)
modelname = 'models/' + pose_model_name
self.load_vo_model(self.vonet.flowPoseNet, modelname)
else:
modelname = 'models/' + vo_model_name
self.load_vo_model(self.vonet, modelname)
self.vonet.cuda()
self.test_count = 0
self.pose_norm = np.array([ 0.13, 0.13, 0.13, 0.013 , 0.013, 0.013], dtype=np.float32) # the output scale factor
self.flow_norm = 20 # scale factor for flow
# load the segmentation model
self.testres = 1.2
if is_kitti:
maxw, maxh = [int(self.testres * 1280), int(self.testres * 384)]
else:
maxw, maxh = [int(self.testres * 1024), int(self.testres * 448)]
max_h = int(maxh // 64 * 64)
max_w = int(maxw // 64 * 64)
if max_h < maxh: max_h += 64
if max_w < maxw: max_w += 64
maxh = max_h
maxw = max_w
self.segnet = SegNet([1, maxw, maxh], md=[4, 4, 4, 4, 4], fac=1, exp_unc=not ('kitti' in seg_model_name))
segmodelname = 'models/' + seg_model_name
self.segnet = self.load_seg_model(self.segnet, segmodelname)
self.segnet.cuda()
self.segnet_initialize = False
# To resize/crop segmentation mask
self.resizedata = ResizeData(size=(image_height,1226)) if is_kitti else None
self.cropdata = CropCenter((image_height, image_width))
# To transform coordinates from NED to Blender
Ry90 = np.array([[0,0,1,0], [0,1,0,0], [-1,0,0,0], [0,0,0,1]])
Rx90 = np.array([[1,0,0,0], [0,0,-1,0], [0,1,0,0], [0,0,0,1]])
self.camT = Rx90.dot(Ry90)
self.sigmoid = lambda x: 1/(1 + np.exp(-x))
def load_vo_model(self, model, modelname):
preTrainDict = torch.load(modelname)
model_dict = model.state_dict()
preTrainDictTemp = {k:v for k,v in preTrainDict.items() if k in model_dict}
if( 0 == len(preTrainDictTemp) ):
print("Does not find any module to load. Try DataParallel version.")
for k, v in preTrainDict.items():
kk = k[7:]
if ( kk in model_dict ):
preTrainDictTemp[kk] = v
if ( 0 == len(preTrainDictTemp) ):
raise Exception("Could not load model from %s." % (modelname), "load_model")
model_dict.update(preTrainDictTemp)
model.load_state_dict(model_dict)
print('VO Model %s loaded...' % modelname)
return model
def load_seg_model(self, model, modelname):
model = nn.DataParallel(model, device_ids=[0])
preTrainDict = torch.load(modelname, map_location='cpu')
self.mean_L = preTrainDict['mean_L']
self.mean_R = preTrainDict['mean_R']
preTrainDict['state_dict'] = {k:v for k,v in preTrainDict['state_dict'].items()}
model.load_state_dict(preTrainDict['state_dict'], strict=False)
print('Segmentation Model %s loaded...' % modelname)
return model
def test_batch(self, sample, intrinsics, seg_thresh, iter_num):
print("="*20)
self.test_count += 1
# import ipdb;ipdb.set_trace()
img0 = sample['img1'].cuda()
img1 = sample['img2'].cuda()
intrinsic = sample['intrinsic'].cuda() # intrinsic layer
img0_raw = sample['img1_raw'].detach().numpy().squeeze()
img1_raw = sample['img2_raw'].detach().numpy().squeeze()
if not self.segnet_initialize:
self.vonet.eval()
self.segnet.eval()
self.initialize_segnet_input(img0_raw, intrinsics)
self.segnet_initialize = True
with torch.no_grad():
imgL_noaug, imgLR = self.transform_segnet_input(img0_raw, img1_raw)
flowdc = self.segnet.module.forward_VCN(imgLR)
total_time = 0
start_time = time.time()
flow_output, _ = self.vonet([img0, img1], only_flow=True)
flownet_time = time.time() - start_time
total_time += flownet_time
print("Flownet time: %.2f" % flownet_time)
seg_thresholds = np.linspace(seg_thresh, 0.95, iter_num - 1)[::-1]
for iter in range(iter_num):
flow = flow_output.clone()
if iter == 0:
cow_sigma_range = (20, 60)
log_sigma_range = (np.log(cow_sigma_range[0]), np.log(cow_sigma_range[1]))
cow_prop_range = (0.3, 0.6)
segmask = cow_masks(flow.shape[-2:], log_sigma_range, cow_sigma_range[1], cow_prop_range).astype(np.float32)
segmask = segmask[None,None,...]
segmask = torch.from_numpy(np.concatenate((segmask,) * img0.shape[0], axis=0)).cuda()
start_time = time.time()
_, pose_output = self.vonet([img0, img1, intrinsic, flow, segmask], only_pose=True)
posenet_time = time.time() - start_time
total_time += posenet_time
print("Iter %d, Posenet time: %.2f; " % (iter, posenet_time), end='')
# Do not pass segnet in the last iteration
if iter == iter_num - 1:
break
seg_thresh = seg_thresholds[iter] if iter < iter_num-1 else seg_thresh
pose_input = pose_output.data.cpu().detach().numpy().squeeze()
pose_input = pose_input * self.pose_norm
pose_input = self.camT.T.dot(se2SE(pose_input)).dot(self.camT)
start_time = time.time()
disc_aux = [self.intr_list, imgL_noaug, pose_input[:3,:]]
fgmask = self.segnet(imgLR, disc_aux, flowdc)
segnet_time = time.time() - start_time
total_time += segnet_time
fgmask = cv2.resize(fgmask.cpu().numpy(), (self.input_size[1], self.input_size[0]), interpolation=cv2.INTER_LINEAR).astype(np.float32)
fg_probs = self.sigmoid(fgmask)
segmask = np.zeros(fgmask.shape[:2])
segmask[fg_probs > seg_thresh] = 1.0
# Resize/Crop segmask (Resize + Crop + Downscale 1/4)
dummysample = {'segmask': segmask}
if self.resizedata is not None:
dummysample = self.resizedata(dummysample)
dummysample = self.cropdata(dummysample)
segmask = dummysample['segmask']
segmask = cv2.resize(segmask, (0,0), fx=0.25, fy=0.25, interpolation=cv2.INTER_LINEAR)
segmask = segmask[None,None,...].astype(np.float32)
segmask = torch.from_numpy(np.concatenate((segmask,) * img0.shape[0], axis=0)).cuda()
print("Segnet time: %.2f" % segnet_time)
posenp = pose_output.data.cpu().detach().numpy().squeeze()
posenp = posenp * self.pose_norm # The output is normalized during training, now scale it back
flownp = flow.data.cpu().detach().numpy().squeeze()
flownp = flownp * self.flow_norm
# # calculate scale from GT posefile
# if 'motion' in sample:
# motions_gt = sample['motion']
# scale = np.linalg.norm(motions_gt[:,:3], axis=1)
# trans_est = posenp[:,:3]
# trans_est = trans_est/np.linalg.norm(trans_est,axis=1).reshape(-1,1)*scale.reshape(-1,1)
# posenp[:,:3] = trans_est
# else:
# print(' scale is not given, using 1 as the default scale value..')
print("\n{} Pose inference using {}s: \n{}\n".format(self.test_count, total_time, posenp))
return posenp, flownp
def initialize_segnet_input(self, imgL_o, intrinsics):
maxh = imgL_o.shape[0] * self.testres
maxw = imgL_o.shape[1] * self.testres
self.max_h = int(maxh // 64 * 64)
self.max_w = int(maxw // 64 * 64)
if self.max_h < maxh: self.max_h += 64
if self.max_w < maxw: self.max_w += 64
self.input_size = imgL_o.shape
# modify module according to inputs
for i in range(len(self.segnet.module.reg_modules)):
self.segnet.module.reg_modules[i] = flow_reg([1, self.max_w//(2**(6-i)), self.max_h//(2**(6-i))],
ent=getattr(self.segnet.module, 'flow_reg%d'%2**(6-i)).ent,\
maxdisp=getattr(self.segnet.module, 'flow_reg%d'%2**(6-i)).md,\
fac=getattr(self.segnet.module, 'flow_reg%d'%2**(6-i)).fac).cuda()
for i in range(len(self.segnet.module.warp_modules)):
self.segnet.module.warp_modules[i] = WarpModule([1, self.max_w//(2**(6-i)), self.max_h//(2**(6-i))]).cuda()
# foramt intrinsics input
fl, cx, cy, bl = intrinsics
fl_next = fl # assuming focal length remains the same across frames
self.intr_list = [torch.Tensor(inxx).cuda() for inxx in [[fl],[cx],[cy],[bl],[1],[0],[0],[1],[0],[0]]]
self.intr_list.append(torch.Tensor([self.input_size[1] / self.max_w]).cuda()) # delta fx
self.intr_list.append(torch.Tensor([self.input_size[0] / self.max_h]).cuda()) # delta fy
self.intr_list.append(torch.Tensor([fl_next]).cuda())
def transform_segnet_input(self, imgL_o, imgR_o):
imgL = cv2.resize(imgL_o, (self.max_w, self.max_h))
imgR = cv2.resize(imgR_o, (self.max_w, self.max_h))
imgL_noaug = torch.Tensor(imgL / 255.)[np.newaxis].float().cuda()
# flip channel, subtract mean
imgL = imgL[:,:,::-1].copy() / 255. - np.asarray(self.mean_L).mean(0)[np.newaxis,np.newaxis,:]
imgR = imgR[:,:,::-1].copy() / 255. - np.asarray(self.mean_R).mean(0)[np.newaxis,np.newaxis,:]
imgL = np.transpose(imgL, [2,0,1])[np.newaxis]
imgR = np.transpose(imgR, [2,0,1])[np.newaxis]
imgL = Variable(torch.FloatTensor(imgL).cuda())
imgR = Variable(torch.FloatTensor(imgR).cuda())
imgLR = torch.cat([imgL,imgR],0)
return imgL_noaug, imgLR
================================================
FILE: LICENSE
================================================
BSD 3-Clause License
Copyright (c) 2020, Air Lab Stacks
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
================================================
FILE: Network/PWC/PWCNet.py
================================================
"""
implementation of the PWC-DC network for optical flow estimation by Sun et al., 2018
Jinwei Gu and Zhile Ren
"""
import torch
import torch.nn as nn
import torch.nn.functional as F
import os
import numpy as np
from .correlation import FunctionCorrelation
import cv2 # debug
def conv(in_planes, out_planes, kernel_size=3, stride=1, padding=1, dilation=1):
return nn.Sequential(
nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride,
padding=padding, dilation=dilation, bias=True),
nn.LeakyReLU(0.1))
def predict_flow(in_planes):
return nn.Conv2d(in_planes,2,kernel_size=3,stride=1,padding=1,bias=True)
def deconv(in_planes, out_planes, kernel_size=4, stride=2, padding=1):
return nn.ConvTranspose2d(in_planes, out_planes, kernel_size, stride, padding, bias=True)
class PWCDCNet(nn.Module):
"""
PWC-DC net. add dilation convolution and densenet connections
"""
def __init__(self, md=4, flow_norm=20.0):
"""
input: md --- maximum displacement (for correlation. default: 4), after warpping
"""
super(PWCDCNet,self).__init__()
self.flow_norm = flow_norm
self.conv1a = conv(3, 16, kernel_size=3, stride=2)
self.conv1aa = conv(16, 16, kernel_size=3, stride=1)
self.conv1b = conv(16, 16, kernel_size=3, stride=1)
self.conv2a = conv(16, 32, kernel_size=3, stride=2)
self.conv2aa = conv(32, 32, kernel_size=3, stride=1)
self.conv2b = conv(32, 32, kernel_size=3, stride=1)
self.conv3a = conv(32, 64, kernel_size=3, stride=2)
self.conv3aa = conv(64, 64, kernel_size=3, stride=1)
self.conv3b = conv(64, 64, kernel_size=3, stride=1)
self.conv4a = conv(64, 96, kernel_size=3, stride=2)
self.conv4aa = conv(96, 96, kernel_size=3, stride=1)
self.conv4b = conv(96, 96, kernel_size=3, stride=1)
self.conv5a = conv(96, 128, kernel_size=3, stride=2)
self.conv5aa = conv(128,128, kernel_size=3, stride=1)
self.conv5b = conv(128,128, kernel_size=3, stride=1)
self.conv6aa = conv(128,196, kernel_size=3, stride=2)
self.conv6a = conv(196,196, kernel_size=3, stride=1)
self.conv6b = conv(196,196, kernel_size=3, stride=1)
# self.corr = Correlation(pad_size=md, kernel_size=1, max_displacement=md, stride1=1, stride2=1, corr_multiply=1)
self.leakyRELU = nn.LeakyReLU(0.1)
nd = (2*md+1)**2
dd = np.cumsum([128,128,96,64,32])
od = nd
self.conv6_0 = conv(od, 128, kernel_size=3, stride=1)
self.conv6_1 = conv(od+dd[0],128, kernel_size=3, stride=1)
self.conv6_2 = conv(od+dd[1],96, kernel_size=3, stride=1)
self.conv6_3 = conv(od+dd[2],64, kernel_size=3, stride=1)
self.conv6_4 = conv(od+dd[3],32, kernel_size=3, stride=1)
self.predict_flow6 = predict_flow(od+dd[4])
self.deconv6 = deconv(2, 2, kernel_size=4, stride=2, padding=1)
self.upfeat6 = deconv(od+dd[4], 2, kernel_size=4, stride=2, padding=1)
od = nd+128+4
self.conv5_0 = conv(od, 128, kernel_size=3, stride=1)
self.conv5_1 = conv(od+dd[0],128, kernel_size=3, stride=1)
self.conv5_2 = conv(od+dd[1],96, kernel_size=3, stride=1)
self.conv5_3 = conv(od+dd[2],64, kernel_size=3, stride=1)
self.conv5_4 = conv(od+dd[3],32, kernel_size=3, stride=1)
self.predict_flow5 = predict_flow(od+dd[4])
self.deconv5 = deconv(2, 2, kernel_size=4, stride=2, padding=1)
self.upfeat5 = deconv(od+dd[4], 2, kernel_size=4, stride=2, padding=1)
od = nd+96+4
self.conv4_0 = conv(od, 128, kernel_size=3, stride=1)
self.conv4_1 = conv(od+dd[0],128, kernel_size=3, stride=1)
self.conv4_2 = conv(od+dd[1],96, kernel_size=3, stride=1)
self.conv4_3 = conv(od+dd[2],64, kernel_size=3, stride=1)
self.conv4_4 = conv(od+dd[3],32, kernel_size=3, stride=1)
self.predict_flow4 = predict_flow(od+dd[4])
self.deconv4 = deconv(2, 2, kernel_size=4, stride=2, padding=1)
self.upfeat4 = deconv(od+dd[4], 2, kernel_size=4, stride=2, padding=1)
od = nd+64+4
self.conv3_0 = conv(od, 128, kernel_size=3, stride=1)
self.conv3_1 = conv(od+dd[0],128, kernel_size=3, stride=1)
self.conv3_2 = conv(od+dd[1],96, kernel_size=3, stride=1)
self.conv3_3 = conv(od+dd[2],64, kernel_size=3, stride=1)
self.conv3_4 = conv(od+dd[3],32, kernel_size=3, stride=1)
self.predict_flow3 = predict_flow(od+dd[4])
self.deconv3 = deconv(2, 2, kernel_size=4, stride=2, padding=1)
self.upfeat3 = deconv(od+dd[4], 2, kernel_size=4, stride=2, padding=1)
od = nd+32+4
self.conv2_0 = conv(od, 128, kernel_size=3, stride=1)
self.conv2_1 = conv(od+dd[0],128, kernel_size=3, stride=1)
self.conv2_2 = conv(od+dd[1],96, kernel_size=3, stride=1)
self.conv2_3 = conv(od+dd[2],64, kernel_size=3, stride=1)
self.conv2_4 = conv(od+dd[3],32, kernel_size=3, stride=1)
self.predict_flow2 = predict_flow(od+dd[4])
self.deconv2 = deconv(2, 2, kernel_size=4, stride=2, padding=1)
self.dc_conv1 = conv(od+dd[4], 128, kernel_size=3, stride=1, padding=1, dilation=1)
self.dc_conv2 = conv(128, 128, kernel_size=3, stride=1, padding=2, dilation=2)
self.dc_conv3 = conv(128, 128, kernel_size=3, stride=1, padding=4, dilation=4)
self.dc_conv4 = conv(128, 96, kernel_size=3, stride=1, padding=8, dilation=8)
self.dc_conv5 = conv(96, 64, kernel_size=3, stride=1, padding=16, dilation=16)
self.dc_conv6 = conv(64, 32, kernel_size=3, stride=1, padding=1, dilation=1)
self.dc_conv7 = predict_flow(32)
for m in self.modules():
if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
nn.init.kaiming_normal(m.weight.data, mode='fan_in')
if m.bias is not None:
m.bias.data.zero_()
def warp(self, x, flo):
"""
warp an image/tensor (im2) back to im1, according to the optical flow
x: [B, C, H, W] (im2)
flo: [B, 2, H, W] flow
"""
B, C, H, W = x.size()
# mesh grid
xx = torch.arange(0, W).view(1,-1).repeat(H,1)
yy = torch.arange(0, H).view(-1,1).repeat(1,W)
xx = xx.view(1,1,H,W).repeat(B,1,1,1)
yy = yy.view(1,1,H,W).repeat(B,1,1,1)
grid = torch.cat((xx,yy),1).float()
if x.is_cuda:
grid = grid.cuda()
vgrid = grid + flo
# scale grid to [-1,1]
vgrid[:,0,:,:] = 2.0*vgrid[:,0,:,:].clone() / max(W-1,1)-1.0
vgrid[:,1,:,:] = 2.0*vgrid[:,1,:,:].clone() / max(H-1,1)-1.0
vgrid = vgrid.permute(0,2,3,1)
output = nn.functional.grid_sample(x, vgrid, align_corners=True)
mask = torch.ones(x.size()).cuda()
mask = nn.functional.grid_sample(mask, vgrid, align_corners=True)
# if W==128:
# np.save('mask.npy', mask.cpu().data.numpy())
# np.save('warp.npy', output.cpu().data.numpy())
mask[mask<0.9999] = 0
mask[mask>0] = 1
return output*mask
def multi_scale_conv(self, conv0_func, conv1_func, conv2_func, conv3_func, conv4_func, input_feat):
x = torch.cat((conv0_func(input_feat), input_feat),1)
x = torch.cat((conv1_func(x), x),1)
x = torch.cat((conv2_func(x), x),1)
x = torch.cat((conv3_func(x), x),1)
x = torch.cat((conv4_func(x), x),1)
return x
def concate_two_layers(self, pred_func, decon_func, upfeat_func, feat_high, feat_low1, feat_low2, scale):
flow_high = pred_func(feat_high)
up_flow_high = decon_func(flow_high)
up_feat_high = upfeat_func(feat_high)
warp_feat = self.warp(feat_low2, up_flow_high*scale)
corr_low = FunctionCorrelation(tenFirst=feat_low1, tenSecond=warp_feat)
corr_low = self.leakyRELU(corr_low)
x = torch.cat((corr_low, feat_low1, up_flow_high, up_feat_high), 1)
return x, flow_high
def forward(self,x):
im1 = x[:,0:3,...]
im2 = x[:,3:6,...]
c11 = self.conv1b(self.conv1aa(self.conv1a(im1)))
c21 = self.conv1b(self.conv1aa(self.conv1a(im2)))
c12 = self.conv2b(self.conv2aa(self.conv2a(c11)))
c22 = self.conv2b(self.conv2aa(self.conv2a(c21)))
c13 = self.conv3b(self.conv3aa(self.conv3a(c12)))
c23 = self.conv3b(self.conv3aa(self.conv3a(c22)))
c14 = self.conv4b(self.conv4aa(self.conv4a(c13)))
c24 = self.conv4b(self.conv4aa(self.conv4a(c23)))
c15 = self.conv5b(self.conv5aa(self.conv5a(c14)))
c25 = self.conv5b(self.conv5aa(self.conv5a(c24)))
c16 = self.conv6b(self.conv6a(self.conv6aa(c15)))
c26 = self.conv6b(self.conv6a(self.conv6aa(c25)))
# corr6 = self.corr(c16, c26)
corr6 = FunctionCorrelation(tenFirst=c16, tenSecond=c26)
corr6 = self.leakyRELU(corr6)
x = self.multi_scale_conv(self.conv6_0, self.conv6_1, self.conv6_2, self.conv6_3, self.conv6_4, corr6)
x, flow6 = self.concate_two_layers(self.predict_flow6, self.deconv6, self.upfeat6, x, c15, c25, 0.625)
x = self.multi_scale_conv(self.conv5_0, self.conv5_1, self.conv5_2, self.conv5_3, self.conv5_4, x)
x, flow5 = self.concate_two_layers(self.predict_flow5, self.deconv5, self.upfeat5, x, c14, c24, 1.25)
x = self.multi_scale_conv(self.conv4_0, self.conv4_1, self.conv4_2, self.conv4_3, self.conv4_4, x)
x, flow4 = self.concate_two_layers(self.predict_flow4, self.deconv4, self.upfeat4, x, c13, c23, 2.5)
x = self.multi_scale_conv(self.conv3_0, self.conv3_1, self.conv3_2, self.conv3_3, self.conv3_4, x)
x, flow3 = self.concate_two_layers(self.predict_flow3, self.deconv3, self.upfeat3, x, c12, c22, 5.0)
x = self.multi_scale_conv(self.conv2_0, self.conv2_1, self.conv2_2, self.conv2_3, self.conv2_4, x)
flow2 = self.predict_flow2(x)
x = self.dc_conv4(self.dc_conv3(self.dc_conv2(self.dc_conv1(x))))
refine = self.dc_conv7(self.dc_conv6(self.dc_conv5(x)))
flow2 = flow2 + refine
return flow2
def pwc_dc_net(path=None):
model = PWCDCNet()
if path is not None:
data = torch.load(path)
if 'state_dict' in data.keys():
model.load_state_dict(data['state_dict'])
else:
model.load_state_dict(data)
return model
================================================
FILE: Network/PWC/__init__.py
================================================
from .PWCNet import *
================================================
FILE: Network/PWC/correlation.py
================================================
#!/usr/bin/env python
import torch
import cupy
import re
kernel_Correlation_rearrange = '''
extern "C" __global__ void kernel_Correlation_rearrange(
const int n,
const float* input,
float* output
) {
int intIndex = (blockIdx.x * blockDim.x) + threadIdx.x;
if (intIndex >= n) {
return;
}
int intSample = blockIdx.z;
int intChannel = blockIdx.y;
float fltValue = input[(((intSample * SIZE_1(input)) + intChannel) * SIZE_2(input) * SIZE_3(input)) + intIndex];
__syncthreads();
int intPaddedY = (intIndex / SIZE_3(input)) + 4;
int intPaddedX = (intIndex % SIZE_3(input)) + 4;
int intRearrange = ((SIZE_3(input) + 8) * intPaddedY) + intPaddedX;
output[(((intSample * SIZE_1(output) * SIZE_2(output)) + intRearrange) * SIZE_1(input)) + intChannel] = fltValue;
}
'''
kernel_Correlation_updateOutput = '''
extern "C" __global__ void kernel_Correlation_updateOutput(
const int n,
const float* rbot0,
const float* rbot1,
float* top
) {
extern __shared__ char patch_data_char[];
float *patch_data = (float *)patch_data_char;
// First (upper left) position of kernel upper-left corner in current center position of neighborhood in image 1
int x1 = blockIdx.x + 4;
int y1 = blockIdx.y + 4;
int item = blockIdx.z;
int ch_off = threadIdx.x;
// Load 3D patch into shared shared memory
for (int j = 0; j < 1; j++) { // HEIGHT
for (int i = 0; i < 1; i++) { // WIDTH
int ji_off = (j + i) * SIZE_3(rbot0);
for (int ch = ch_off; ch < SIZE_3(rbot0); ch += 32) { // CHANNELS
int idx1 = ((item * SIZE_1(rbot0) + y1+j) * SIZE_2(rbot0) + x1+i) * SIZE_3(rbot0) + ch;
int idxPatchData = ji_off + ch;
patch_data[idxPatchData] = rbot0[idx1];
}
}
}
__syncthreads();
__shared__ float sum[32];
// Compute correlation
for (int top_channel = 0; top_channel < SIZE_1(top); top_channel++) {
sum[ch_off] = 0;
int s2o = top_channel % 9 - 4;
int s2p = top_channel / 9 - 4;
for (int j = 0; j < 1; j++) { // HEIGHT
for (int i = 0; i < 1; i++) { // WIDTH
int ji_off = (j + i) * SIZE_3(rbot0);
for (int ch = ch_off; ch < SIZE_3(rbot0); ch += 32) { // CHANNELS
int x2 = x1 + s2o;
int y2 = y1 + s2p;
int idxPatchData = ji_off + ch;
int idx2 = ((item * SIZE_1(rbot0) + y2+j) * SIZE_2(rbot0) + x2+i) * SIZE_3(rbot0) + ch;
sum[ch_off] += patch_data[idxPatchData] * rbot1[idx2];
}
}
}
__syncthreads();
if (ch_off == 0) {
float total_sum = 0;
for (int idx = 0; idx < 32; idx++) {
total_sum += sum[idx];
}
const int sumelems = SIZE_3(rbot0);
const int index = ((top_channel*SIZE_2(top) + blockIdx.y)*SIZE_3(top))+blockIdx.x;
top[index + item*SIZE_1(top)*SIZE_2(top)*SIZE_3(top)] = total_sum / (float)sumelems;
}
}
}
'''
kernel_Correlation_updateGradFirst = '''
#define ROUND_OFF 50000
extern "C" __global__ void kernel_Correlation_updateGradFirst(
const int n,
const int intSample,
const float* rbot0,
const float* rbot1,
const float* gradOutput,
float* gradFirst,
float* gradSecond
) { for (int intIndex = (blockIdx.x * blockDim.x) + threadIdx.x; intIndex < n; intIndex += blockDim.x * gridDim.x) {
int n = intIndex % SIZE_1(gradFirst); // channels
int l = (intIndex / SIZE_1(gradFirst)) % SIZE_3(gradFirst) + 4; // w-pos
int m = (intIndex / SIZE_1(gradFirst) / SIZE_3(gradFirst)) % SIZE_2(gradFirst) + 4; // h-pos
// round_off is a trick to enable integer division with ceil, even for negative numbers
// We use a large offset, for the inner part not to become negative.
const int round_off = ROUND_OFF;
const int round_off_s1 = round_off;
// We add round_off before_s1 the int division and subtract round_off after it, to ensure the formula matches ceil behavior:
int xmin = (l - 4 + round_off_s1 - 1) + 1 - round_off; // ceil (l - 4)
int ymin = (m - 4 + round_off_s1 - 1) + 1 - round_off; // ceil (l - 4)
// Same here:
int xmax = (l - 4 + round_off_s1) - round_off; // floor (l - 4)
int ymax = (m - 4 + round_off_s1) - round_off; // floor (m - 4)
float sum = 0;
if (xmax>=0 && ymax>=0 && (xmin<=SIZE_3(gradOutput)-1) && (ymin<=SIZE_2(gradOutput)-1)) {
xmin = max(0,xmin);
xmax = min(SIZE_3(gradOutput)-1,xmax);
ymin = max(0,ymin);
ymax = min(SIZE_2(gradOutput)-1,ymax);
for (int p = -4; p <= 4; p++) {
for (int o = -4; o <= 4; o++) {
// Get rbot1 data:
int s2o = o;
int s2p = p;
int idxbot1 = ((intSample * SIZE_1(rbot0) + (m+s2p)) * SIZE_2(rbot0) + (l+s2o)) * SIZE_3(rbot0) + n;
float bot1tmp = rbot1[idxbot1]; // rbot1[l+s2o,m+s2p,n]
// Index offset for gradOutput in following loops:
int op = (p+4) * 9 + (o+4); // index[o,p]
int idxopoffset = (intSample * SIZE_1(gradOutput) + op);
for (int y = ymin; y <= ymax; y++) {
for (int x = xmin; x <= xmax; x++) {
int idxgradOutput = (idxopoffset * SIZE_2(gradOutput) + y) * SIZE_3(gradOutput) + x; // gradOutput[x,y,o,p]
sum += gradOutput[idxgradOutput] * bot1tmp;
}
}
}
}
}
const int sumelems = SIZE_1(gradFirst);
const int bot0index = ((n * SIZE_2(gradFirst)) + (m-4)) * SIZE_3(gradFirst) + (l-4);
gradFirst[bot0index + intSample*SIZE_1(gradFirst)*SIZE_2(gradFirst)*SIZE_3(gradFirst)] = sum / (float)sumelems;
} }
'''
kernel_Correlation_updateGradSecond = '''
#define ROUND_OFF 50000
extern "C" __global__ void kernel_Correlation_updateGradSecond(
const int n,
const int intSample,
const float* rbot0,
const float* rbot1,
const float* gradOutput,
float* gradFirst,
float* gradSecond
) { for (int intIndex = (blockIdx.x * blockDim.x) + threadIdx.x; intIndex < n; intIndex += blockDim.x * gridDim.x) {
int n = intIndex % SIZE_1(gradSecond); // channels
int l = (intIndex / SIZE_1(gradSecond)) % SIZE_3(gradSecond) + 4; // w-pos
int m = (intIndex / SIZE_1(gradSecond) / SIZE_3(gradSecond)) % SIZE_2(gradSecond) + 4; // h-pos
// round_off is a trick to enable integer division with ceil, even for negative numbers
// We use a large offset, for the inner part not to become negative.
const int round_off = ROUND_OFF;
const int round_off_s1 = round_off;
float sum = 0;
for (int p = -4; p <= 4; p++) {
for (int o = -4; o <= 4; o++) {
int s2o = o;
int s2p = p;
//Get X,Y ranges and clamp
// We add round_off before_s1 the int division and subtract round_off after it, to ensure the formula matches ceil behavior:
int xmin = (l - 4 - s2o + round_off_s1 - 1) + 1 - round_off; // ceil (l - 4 - s2o)
int ymin = (m - 4 - s2p + round_off_s1 - 1) + 1 - round_off; // ceil (l - 4 - s2o)
// Same here:
int xmax = (l - 4 - s2o + round_off_s1) - round_off; // floor (l - 4 - s2o)
int ymax = (m - 4 - s2p + round_off_s1) - round_off; // floor (m - 4 - s2p)
if (xmax>=0 && ymax>=0 && (xmin<=SIZE_3(gradOutput)-1) && (ymin<=SIZE_2(gradOutput)-1)) {
xmin = max(0,xmin);
xmax = min(SIZE_3(gradOutput)-1,xmax);
ymin = max(0,ymin);
ymax = min(SIZE_2(gradOutput)-1,ymax);
// Get rbot0 data:
int idxbot0 = ((intSample * SIZE_1(rbot0) + (m-s2p)) * SIZE_2(rbot0) + (l-s2o)) * SIZE_3(rbot0) + n;
float bot0tmp = rbot0[idxbot0]; // rbot1[l+s2o,m+s2p,n]
// Index offset for gradOutput in following loops:
int op = (p+4) * 9 + (o+4); // index[o,p]
int idxopoffset = (intSample * SIZE_1(gradOutput) + op);
for (int y = ymin; y <= ymax; y++) {
for (int x = xmin; x <= xmax; x++) {
int idxgradOutput = (idxopoffset * SIZE_2(gradOutput) + y) * SIZE_3(gradOutput) + x; // gradOutput[x,y,o,p]
sum += gradOutput[idxgradOutput] * bot0tmp;
}
}
}
}
}
const int sumelems = SIZE_1(gradSecond);
const int bot1index = ((n * SIZE_2(gradSecond)) + (m-4)) * SIZE_3(gradSecond) + (l-4);
gradSecond[bot1index + intSample*SIZE_1(gradSecond)*SIZE_2(gradSecond)*SIZE_3(gradSecond)] = sum / (float)sumelems;
} }
'''
def cupy_kernel(strFunction, objVariables):
strKernel = globals()[strFunction]
while True:
objMatch = re.search('(SIZE_)([0-4])(\()([^\)]*)(\))', strKernel)
if objMatch is None:
break
# end
intArg = int(objMatch.group(2))
strTensor = objMatch.group(4)
intSizes = objVariables[strTensor].size()
strKernel = strKernel.replace(objMatch.group(), str(intSizes[intArg]))
# end
while True:
objMatch = re.search('(VALUE_)([0-4])(\()([^\)]+)(\))', strKernel)
if objMatch is None:
break
# end
intArgs = int(objMatch.group(2))
strArgs = objMatch.group(4).split(',')
strTensor = strArgs[0]
intStrides = objVariables[strTensor].stride()
strIndex = [ '((' + strArgs[intArg + 1].replace('{', '(').replace('}', ')').strip() + ')*' + str(intStrides[intArg]) + ')' for intArg in range(intArgs) ]
strKernel = strKernel.replace(objMatch.group(0), strTensor + '[' + str.join('+', strIndex) + ']')
# end
return strKernel
# end
# @cupy.util.memoize(for_each_device=True)
def cupy_launch(strFunction, strKernel):
return cupy.cuda.compile_with_cache(strKernel).get_function(strFunction)
# end
class _FunctionCorrelation(torch.autograd.Function):
@staticmethod
def forward(self, first, second):
rbot0 = first.new_zeros([ first.shape[0], first.shape[2] + 8, first.shape[3] + 8, first.shape[1] ])
rbot1 = first.new_zeros([ first.shape[0], first.shape[2] + 8, first.shape[3] + 8, first.shape[1] ])
self.save_for_backward(first, second, rbot0, rbot1)
assert(first.is_contiguous() == True)
assert(second.is_contiguous() == True)
output = first.new_zeros([ first.shape[0], 81, first.shape[2], first.shape[3] ])
if first.is_cuda == True:
n = first.shape[2] * first.shape[3]
cupy_launch('kernel_Correlation_rearrange', cupy_kernel('kernel_Correlation_rearrange', {
'input': first,
'output': rbot0
}))(
grid=tuple([ int((n + 16 - 1) / 16), first.shape[1], first.shape[0] ]),
block=tuple([ 16, 1, 1 ]),
args=[ n, first.data_ptr(), rbot0.data_ptr() ]
)
n = second.shape[2] * second.shape[3]
cupy_launch('kernel_Correlation_rearrange', cupy_kernel('kernel_Correlation_rearrange', {
'input': second,
'output': rbot1
}))(
grid=tuple([ int((n + 16 - 1) / 16), second.shape[1], second.shape[0] ]),
block=tuple([ 16, 1, 1 ]),
args=[ n, second.data_ptr(), rbot1.data_ptr() ]
)
n = output.shape[1] * output.shape[2] * output.shape[3]
cupy_launch('kernel_Correlation_updateOutput', cupy_kernel('kernel_Correlation_updateOutput', {
'rbot0': rbot0,
'rbot1': rbot1,
'top': output
}))(
grid=tuple([ output.shape[3], output.shape[2], output.shape[0] ]),
block=tuple([ 32, 1, 1 ]),
shared_mem=first.shape[1] * 4,
args=[ n, rbot0.data_ptr(), rbot1.data_ptr(), output.data_ptr() ]
)
elif first.is_cuda == False:
raise NotImplementedError()
# end
return output
# end
@staticmethod
def backward(self, gradOutput):
first, second, rbot0, rbot1 = self.saved_tensors
assert(gradOutput.is_contiguous() == True)
gradFirst = first.new_zeros([ first.shape[0], first.shape[1], first.shape[2], first.shape[3] ]) if self.needs_input_grad[0] == True else None
gradSecond = first.new_zeros([ first.shape[0], first.shape[1], first.shape[2], first.shape[3] ]) if self.needs_input_grad[1] == True else None
if first.is_cuda == True:
if gradFirst is not None:
for intSample in range(first.shape[0]):
n = first.shape[1] * first.shape[2] * first.shape[3]
cupy_launch('kernel_Correlation_updateGradFirst', cupy_kernel('kernel_Correlation_updateGradFirst', {
'rbot0': rbot0,
'rbot1': rbot1,
'gradOutput': gradOutput,
'gradFirst': gradFirst,
'gradSecond': None
}))(
grid=tuple([ int((n + 512 - 1) / 512), 1, 1 ]),
block=tuple([ 512, 1, 1 ]),
args=[ n, intSample, rbot0.data_ptr(), rbot1.data_ptr(), gradOutput.data_ptr(), gradFirst.data_ptr(), None ]
)
# end
# end
if gradSecond is not None:
for intSample in range(first.shape[0]):
n = first.shape[1] * first.shape[2] * first.shape[3]
cupy_launch('kernel_Correlation_updateGradSecond', cupy_kernel('kernel_Correlation_updateGradSecond', {
'rbot0': rbot0,
'rbot1': rbot1,
'gradOutput': gradOutput,
'gradFirst': None,
'gradSecond': gradSecond
}))(
grid=tuple([ int((n + 512 - 1) / 512), 1, 1 ]),
block=tuple([ 512, 1, 1 ]),
args=[ n, intSample, rbot0.data_ptr(), rbot1.data_ptr(), gradOutput.data_ptr(), None, gradSecond.data_ptr() ]
)
# end
# end
elif first.is_cuda == False:
raise NotImplementedError()
# end
return gradFirst, gradSecond
# end
# end
def FunctionCorrelation(tenFirst, tenSecond):
return _FunctionCorrelation.apply(tenFirst, tenSecond)
# end
class ModuleCorrelation(torch.nn.Module):
def __init__(self):
super(ModuleCorrelation, self).__init__()
# end
def forward(self, tenFirst, tenSecond):
return _FunctionCorrelation.apply(tenFirst, tenSecond)
# end
# end
================================================
FILE: Network/VOFlowNet.py
================================================
# Software License Agreement (BSD License)
#
# Copyright (c) 2020, Wenshan Wang, CMU
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials provided
# with the distribution.
# * Neither the name of CMU nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
import torch
import torch.nn as nn
import torch.nn.functional as F
import math
def conv(in_planes, out_planes, kernel_size=3, stride=2, padding=1, dilation=1, bn_layer=False, bias=True):
if bn_layer:
return nn.Sequential(
nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, padding=padding, stride=stride, dilation=dilation, bias=bias),
nn.BatchNorm2d(out_planes),
nn.ReLU(inplace=True)
)
else:
return nn.Sequential(
nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, padding=padding, stride=stride, dilation=dilation),
nn.ReLU(inplace=True)
)
def linear(in_planes, out_planes):
return nn.Sequential(
nn.Linear(in_planes, out_planes),
nn.ReLU(inplace=True)
)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride, downsample, pad, dilation):
super(BasicBlock, self).__init__()
self.conv1 = conv(inplanes, planes, 3, stride, pad, dilation)
self.conv2 = nn.Conv2d(planes, planes, 3, 1, pad, dilation)
self.downsample = downsample
self.stride = stride
def forward(self, x):
out = self.conv1(x)
out = self.conv2(out)
if self.downsample is not None:
x = self.downsample(x)
out += x
return F.relu(out, inplace=True)
class VOFlowRes(nn.Module):
def __init__(self):
super(VOFlowRes, self).__init__()
inputnum = 5
blocknums = [2,2,3,4,6,7,3]
outputnums = [32,64,64,128,128,256,256]
self.firstconv = nn.Sequential(conv(inputnum, 32, 3, 2, 1, 1, False),
conv(32, 32, 3, 1, 1, 1),
conv(32, 32, 3, 1, 1, 1))
self.inplanes = 32
self.layer1 = self._make_layer(BasicBlock, outputnums[2], blocknums[2], 2, 1, 1) # 40 x 28
self.layer2 = self._make_layer(BasicBlock, outputnums[3], blocknums[3], 2, 1, 1) # 20 x 14
self.layer3 = self._make_layer(BasicBlock, outputnums[4], blocknums[4], 2, 1, 1) # 10 x 7
self.layer4 = self._make_layer(BasicBlock, outputnums[5], blocknums[5], 2, 1, 1) # 5 x 4
self.layer5 = self._make_layer(BasicBlock, outputnums[6], blocknums[6], 2, 1, 1) # 3 x 2
fcnum = outputnums[6] * 6
fc1_trans = linear(fcnum, 128)
fc2_trans = linear(128,32)
fc3_trans = nn.Linear(32,3)
fc1_rot = linear(fcnum, 128)
fc2_rot = linear(128,32)
fc3_rot = nn.Linear(32,3)
self.voflow_trans = nn.Sequential(fc1_trans, fc2_trans, fc3_trans)
self.voflow_rot = nn.Sequential(fc1_rot, fc2_rot, fc3_rot)
def _make_layer(self, block, planes, blocks, stride, pad, dilation):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample, pad, dilation))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes,1,None,pad,dilation))
return nn.Sequential(*layers)
def forward(self, x):
x = self.firstconv(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.layer5(x)
x = x.view(x.shape[0], -1)
x_trans = self.voflow_trans(x)
x_rot = self.voflow_rot(x)
return torch.cat((x_trans, x_rot), dim=1)
================================================
FILE: Network/VONet.py
================================================
# Software License Agreement (BSD License)
#
# Copyright (c) 2020, Wenshan Wang, Yaoyu Hu, CMU
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials provided
# with the distribution.
# * Neither the name of CMU nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
import torch
import torch.nn as nn
import torch.nn.functional as F
from .PWC import PWCDCNet as FlowNet
from .VOFlowNet import VOFlowRes as FlowPoseNet
class VONet(nn.Module):
def __init__(self):
super(VONet, self).__init__()
self.flowNet = FlowNet()
self.flowPoseNet = FlowPoseNet()
def forward(self, x, only_flow=False, only_pose=False):
'''
x[0]: rgb frame t-1
x[1]: rgb frame t
x[2]: intrinsics
x[3]: flow t-1 -> t (optional)
x[4]: motion segmentation mask
'''
# import ipdb;ipdb.set_trace()
if not only_pose:
flow_out = self.flowNet(torch.cat((x[0], x[1]), dim=1))
if only_flow:
return flow_out, None
flow = flow_out[0]
else:
assert(len(x) > 3)
flow_out = None
if len(x) > 3 and x[3] is not None:
flow_input = x[3]
else:
flow_input = flow
# Mask out input flow using the segmentation result
assert(len(x) > 4)
mask = torch.gt(x[4], 0)
for i in range(flow_input.shape[0]):
zeros = torch.cat([mask[i], ]*2, dim=0)
flow_input[i][zeros] = 0
flow_input = torch.cat((flow_input, 1 - x[4]), dim=1) # segmentation layer
flow_input = torch.cat((flow_input, x[2]), dim=1) # intrinsics layer
pose = self.flowPoseNet(flow_input)
return flow_out, pose
================================================
FILE: Network/__init__.py
================================================
================================================
FILE: Network/rigidmask/.gitignore
================================================
__pycache__
================================================
FILE: Network/rigidmask/VCNplus.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np
import math
import pdb
import cv2
import kornia
from .submodule import pspnet, bfmodule, bfmodule_feat, conv, compute_geo_costs, get_skew_mat, get_intrinsics
from .conv4d import sepConv4d, butterfly4D
from scipy.spatial.transform import Rotation
from .det import create_model
class flow_reg(nn.Module):
"""
Soft winner-take-all that selects the most likely diplacement.
Set ent=True to enable entropy output.
Set maxdisp to adjust maximum allowed displacement towards one side.
maxdisp=4 searches for a 9x9 region.
Set fac to squeeze search window.
maxdisp=4 and fac=2 gives search window of 9x5
"""
def __init__(self, size, ent=False, maxdisp = int(4), fac=1):
B,W,H = size
super(flow_reg, self).__init__()
self.ent = ent
self.md = maxdisp
self.fac = fac
self.truncated = True
self.wsize = 3 # by default using truncation 7x7
flowrangey = range(-maxdisp,maxdisp+1)
flowrangex = range(-int(maxdisp//self.fac),int(maxdisp//self.fac)+1)
meshgrid = np.meshgrid(flowrangex,flowrangey)
flowy = np.tile( np.reshape(meshgrid[0],[1,2*maxdisp+1,2*int(maxdisp//self.fac)+1,1,1]), (B,1,1,H,W) )
flowx = np.tile( np.reshape(meshgrid[1],[1,2*maxdisp+1,2*int(maxdisp//self.fac)+1,1,1]), (B,1,1,H,W) )
self.register_buffer('flowx',torch.Tensor(flowx))
self.register_buffer('flowy',torch.Tensor(flowy))
self.pool3d = nn.MaxPool3d((self.wsize*2+1,self.wsize*2+1,1),stride=1,padding=(self.wsize,self.wsize,0))
def forward(self, x):
b,u,v,h,w = x.shape
oldx = x
if self.truncated:
# truncated softmax
x = x.view(b,u*v,h,w)
idx = x.argmax(1)[:,np.newaxis]
if x.is_cuda:
mask = Variable(torch.cuda.HalfTensor(b,u*v,h,w)).fill_(0)
else:
mask = Variable(torch.FloatTensor(b,u*v,h,w)).fill_(0)
mask.scatter_(1,idx,1)
mask = mask.view(b,1,u,v,-1)
mask = self.pool3d(mask)[:,0].view(b,u,v,h,w)
ninf = x.clone().fill_(-np.inf).view(b,u,v,h,w)
x = torch.where(mask.byte(),oldx,ninf)
else:
self.wsize = (np.sqrt(u*v)-1)/2
b,u,v,h,w = x.shape
x = F.softmax(x.view(b,-1,h,w),1).view(b,u,v,h,w)
if np.isnan(x.min().detach().cpu()):
#pdb.set_trace()
x[torch.isnan(x)] = F.softmax(oldx[torch.isnan(x)])
outx = torch.sum(torch.sum(x*self.flowx,1),1,keepdim=True)
outy = torch.sum(torch.sum(x*self.flowy,1),1,keepdim=True)
if self.ent:
# local
local_entropy = (-x*torch.clamp(x,1e-9,1-1e-9).log()).sum(1).sum(1)[:,np.newaxis]
if self.wsize == 0:
local_entropy[:] = 1.
else:
local_entropy /= np.log((self.wsize*2+1)**2)
# global
x = F.softmax(oldx.view(b,-1,h,w),1).view(b,u,v,h,w)
global_entropy = (-x*torch.clamp(x,1e-9,1-1e-9).log()).sum(1).sum(1)[:,np.newaxis]
global_entropy /= np.log(x.shape[1]*x.shape[2])
return torch.cat([outx,outy],1),torch.cat([local_entropy, global_entropy],1)
else:
return torch.cat([outx,outy],1),None
class WarpModule(nn.Module):
"""
taken from https://github.com/NVlabs/PWC-Net/blob/master/PyTorch/models/PWCNet.py
"""
def __init__(self, size):
super(WarpModule, self).__init__()
B,W,H = size
# mesh grid
xx = torch.arange(0, W).view(1,-1).repeat(H,1)
yy = torch.arange(0, H).view(-1,1).repeat(1,W)
xx = xx.view(1,1,H,W).repeat(B,1,1,1)
yy = yy.view(1,1,H,W).repeat(B,1,1,1)
self.register_buffer('grid',torch.cat((xx,yy),1).float())
def forward(self, x, flo):
"""
warp an image/tensor (im2) back to im1, according to the optical flow
x: [B, C, H, W] (im2)
flo: [B, 2, H, W] flow
"""
B, C, H, W = x.size()
vgrid = self.grid + flo
# scale grid to [-1,1]
vgrid[:,0,:,:] = 2.0*vgrid[:,0,:,:]/max(W-1,1)-1.0
vgrid[:,1,:,:] = 2.0*vgrid[:,1,:,:]/max(H-1,1)-1.0
vgrid = vgrid.permute(0,2,3,1)
#output = nn.functional.grid_sample(x, vgrid)
output = nn.functional.grid_sample(x, vgrid, align_corners=True)
mask = ((vgrid[:,:,:,0].abs()<1) * (vgrid[:,:,:,1].abs()<1)) >0
return output*mask.unsqueeze(1).float(), mask
def get_grid(B,H,W):
meshgrid_base = np.meshgrid(range(0,W), range(0,H))[::-1]
basey = np.reshape(meshgrid_base[0],[1,1,1,H,W])
basex = np.reshape(meshgrid_base[1],[1,1,1,H,W])
grid = torch.tensor(np.concatenate((basex.reshape((-1,H,W,1)),basey.reshape((-1,H,W,1))),-1)).cuda().float()
return grid.view(1,1,H,W,2)
class SegNet(nn.Module):
"""
Motion Segmentation Network
"""
def __init__(self, size, md=[4,4,4,4,4], fac=1., exp_unc=True):
super(SegNet,self).__init__()
self.md = md
self.fac = fac
use_entropy = True
withbn = True
## pspnet
self.pspnet = pspnet(is_proj=False)
### Volumetric-UNet
fdima1 = 128 # 6/5/4
fdima2 = 64 # 3/2
fdimb1 = 16 # 6/5/4/3
fdimb2 = 12 # 2
full=False
self.f6 = butterfly4D(fdima1, fdimb1,withbn=withbn,full=full)
self.p6 = sepConv4d(fdimb1,fdimb1, with_bn=False, full=full)
self.f5 = butterfly4D(fdima1, fdimb1,withbn=withbn, full=full)
self.p5 = sepConv4d(fdimb1,fdimb1, with_bn=False,full=full)
self.f4 = butterfly4D(fdima1, fdimb1,withbn=withbn,full=full)
self.p4 = sepConv4d(fdimb1,fdimb1, with_bn=False,full=full)
self.f3 = butterfly4D(fdima2, fdimb1,withbn=withbn,full=full)
self.p3 = sepConv4d(fdimb1,fdimb1, with_bn=False,full=full)
full=True
self.f2 = butterfly4D(fdima2, fdimb2,withbn=withbn,full=full)
self.p2 = sepConv4d(fdimb2,fdimb2, with_bn=False,full=full)
self.flow_reg64 = flow_reg([fdimb1*size[0],size[1]//64,size[2]//64], ent=use_entropy, maxdisp=self.md[0], fac=self.fac)
self.flow_reg32 = flow_reg([fdimb1*size[0],size[1]//32,size[2]//32], ent=use_entropy, maxdisp=self.md[1])
self.flow_reg16 = flow_reg([fdimb1*size[0],size[1]//16,size[2]//16], ent=use_entropy, maxdisp=self.md[2])
self.flow_reg8 = flow_reg([fdimb1*size[0],size[1]//8,size[2]//8] , ent=use_entropy, maxdisp=self.md[3])
self.flow_reg4 = flow_reg([fdimb2*size[0],size[1]//4,size[2]//4] , ent=use_entropy, maxdisp=self.md[4])
self.warp5 = WarpModule([size[0],size[1]//32,size[2]//32])
self.warp4 = WarpModule([size[0],size[1]//16,size[2]//16])
self.warp3 = WarpModule([size[0],size[1]//8,size[2]//8])
self.warp2 = WarpModule([size[0],size[1]//4,size[2]//4])
if self.training:
self.warpx = WarpModule([size[0],size[1],size[2]])
## hypotheses fusion modules, adopted from the refinement module of PWCNet
# https://github.com/NVlabs/PWC-Net/blob/master/PyTorch/models/PWCNet.py
# c6
self.dc6_conv1 = conv(128+4*fdimb1, 128, kernel_size=3, stride=1, padding=1, dilation=1)
self.dc6_conv2 = conv(128, 128, kernel_size=3, stride=1, padding=2, dilation=2)
self.dc6_conv3 = conv(128, 128, kernel_size=3, stride=1, padding=4, dilation=4)
self.dc6_conv4 = conv(128, 96, kernel_size=3, stride=1, padding=8, dilation=8)
self.dc6_conv5 = conv(96, 64, kernel_size=3, stride=1, padding=16, dilation=16)
self.dc6_conv6 = conv(64, 32, kernel_size=3, stride=1, padding=1, dilation=1)
self.dc6_conv7 = nn.Conv2d(32,2*fdimb1,kernel_size=3,stride=1,padding=1,bias=True)
# c5
self.dc5_conv1 = conv(128+4*fdimb1*2, 128, kernel_size=3, stride=1, padding=1, dilation=1)
self.dc5_conv2 = conv(128, 128, kernel_size=3, stride=1, padding=2, dilation=2)
self.dc5_conv3 = conv(128, 128, kernel_size=3, stride=1, padding=4, dilation=4)
self.dc5_conv4 = conv(128, 96, kernel_size=3, stride=1, padding=8, dilation=8)
self.dc5_conv5 = conv(96, 64, kernel_size=3, stride=1, padding=16, dilation=16)
self.dc5_conv6 = conv(64, 32, kernel_size=3, stride=1, padding=1, dilation=1)
self.dc5_conv7 = nn.Conv2d(32,2*fdimb1*2,kernel_size=3,stride=1,padding=1,bias=True)
# c4
self.dc4_conv1 = conv(128+4*fdimb1*3, 128, kernel_size=3, stride=1, padding=1, dilation=1)
self.dc4_conv2 = conv(128, 128, kernel_size=3, stride=1, padding=2, dilation=2)
self.dc4_conv3 = conv(128, 128, kernel_size=3, stride=1, padding=4, dilation=4)
self.dc4_conv4 = conv(128, 96, kernel_size=3, stride=1, padding=8, dilation=8)
self.dc4_conv5 = conv(96, 64, kernel_size=3, stride=1, padding=16, dilation=16)
self.dc4_conv6 = conv(64, 32, kernel_size=3, stride=1, padding=1, dilation=1)
self.dc4_conv7 = nn.Conv2d(32,2*fdimb1*3,kernel_size=3,stride=1,padding=1,bias=True)
# c3
self.dc3_conv1 = conv(64+16*fdimb1, 128, kernel_size=3, stride=1, padding=1, dilation=1)
self.dc3_conv2 = conv(128, 128, kernel_size=3, stride=1, padding=2, dilation=2)
self.dc3_conv3 = conv(128, 128, kernel_size=3, stride=1, padding=4, dilation=4)
self.dc3_conv4 = conv(128, 96, kernel_size=3, stride=1, padding=8, dilation=8)
self.dc3_conv5 = conv(96, 64, kernel_size=3, stride=1, padding=16, dilation=16)
self.dc3_conv6 = conv(64, 32, kernel_size=3, stride=1, padding=1, dilation=1)
self.dc3_conv7 = nn.Conv2d(32,8*fdimb1,kernel_size=3,stride=1,padding=1,bias=True)
# c2
self.dc2_conv1 = conv(64+16*fdimb1+4*fdimb2, 128, kernel_size=3, stride=1, padding=1, dilation=1)
self.dc2_conv2 = conv(128, 128, kernel_size=3, stride=1, padding=2, dilation=2)
self.dc2_conv3 = conv(128, 128, kernel_size=3, stride=1, padding=4, dilation=4)
self.dc2_conv4 = conv(128, 96, kernel_size=3, stride=1, padding=8, dilation=8)
self.dc2_conv5 = conv(96, 64, kernel_size=3, stride=1, padding=16, dilation=16)
self.dc2_conv6 = conv(64, 32, kernel_size=3, stride=1, padding=1, dilation=1)
self.dc2_conv7 = nn.Conv2d(32,4*2*fdimb1 + 2*fdimb2,kernel_size=3,stride=1,padding=1,bias=True)
self.dc6_conv = nn.Sequential( self.dc6_conv1,
self.dc6_conv2,
self.dc6_conv3,
self.dc6_conv4,
self.dc6_conv5,
self.dc6_conv6,
self.dc6_conv7)
self.dc5_conv = nn.Sequential( self.dc5_conv1,
self.dc5_conv2,
self.dc5_conv3,
self.dc5_conv4,
self.dc5_conv5,
self.dc5_conv6,
self.dc5_conv7)
self.dc4_conv = nn.Sequential( self.dc4_conv1,
self.dc4_conv2,
self.dc4_conv3,
self.dc4_conv4,
self.dc4_conv5,
self.dc4_conv6,
self.dc4_conv7)
self.dc3_conv = nn.Sequential( self.dc3_conv1,
self.dc3_conv2,
self.dc3_conv3,
self.dc3_conv4,
self.dc3_conv5,
self.dc3_conv6,
self.dc3_conv7)
self.dc2_conv = nn.Sequential( self.dc2_conv1,
self.dc2_conv2,
self.dc2_conv3,
self.dc2_conv4,
self.dc2_conv5,
self.dc2_conv6,
self.dc2_conv7)
## Out-of-range detection
self.dc6_convo = nn.Sequential(conv(128+4*fdimb1, 128, kernel_size=3, stride=1, padding=1, dilation=1),
conv(128, 128, kernel_size=3, stride=1, padding=2, dilation=2),
conv(128, 128, kernel_size=3, stride=1, padding=4, dilation=4),
conv(128, 96, kernel_size=3, stride=1, padding=8, dilation=8),
conv(96, 64, kernel_size=3, stride=1, padding=16, dilation=16),
conv(64, 32, kernel_size=3, stride=1, padding=1, dilation=1),
nn.Conv2d(32,1,kernel_size=3,stride=1,padding=1,bias=True))
self.dc5_convo = nn.Sequential(conv(128+2*4*fdimb1, 128, kernel_size=3, stride=1, padding=1, dilation=1),
conv(128, 128, kernel_size=3, stride=1, padding=2, dilation=2),
conv(128, 128, kernel_size=3, stride=1, padding=4, dilation=4),
conv(128, 96, kernel_size=3, stride=1, padding=8, dilation=8),
conv(96, 64, kernel_size=3, stride=1, padding=16, dilation=16),
conv(64, 32, kernel_size=3, stride=1, padding=1, dilation=1),
nn.Conv2d(32,1,kernel_size=3,stride=1,padding=1,bias=True))
self.dc4_convo = nn.Sequential(conv(128+3*4*fdimb1, 128, kernel_size=3, stride=1, padding=1, dilation=1),
conv(128, 128, kernel_size=3, stride=1, padding=2, dilation=2),
conv(128, 128, kernel_size=3, stride=1, padding=4, dilation=4),
conv(128, 96, kernel_size=3, stride=1, padding=8, dilation=8),
conv(96, 64, kernel_size=3, stride=1, padding=16, dilation=16),
conv(64, 32, kernel_size=3, stride=1, padding=1, dilation=1),
nn.Conv2d(32,1,kernel_size=3,stride=1,padding=1,bias=True))
self.dc3_convo = nn.Sequential(conv(64+16*fdimb1, 128, kernel_size=3, stride=1, padding=1, dilation=1),
conv(128, 128, kernel_size=3, stride=1, padding=2, dilation=2),
conv(128, 128, kernel_size=3, stride=1, padding=4, dilation=4),
conv(128, 96, kernel_size=3, stride=1, padding=8, dilation=8),
conv(96, 64, kernel_size=3, stride=1, padding=16, dilation=16),
conv(64, 32, kernel_size=3, stride=1, padding=1, dilation=1),
nn.Conv2d(32,1,kernel_size=3,stride=1,padding=1,bias=True))
self.dc2_convo = nn.Sequential(conv(64+16*fdimb1+4*fdimb2, 128, kernel_size=3, stride=1, padding=1, dilation=1),
conv(128, 128, kernel_size=3, stride=1, padding=2, dilation=2),
conv(128, 128, kernel_size=3, stride=1, padding=4, dilation=4),
conv(128, 96, kernel_size=3, stride=1, padding=8, dilation=8),
conv(96, 64, kernel_size=3, stride=1, padding=16, dilation=16),
conv(64, 32, kernel_size=3, stride=1, padding=1, dilation=1),
nn.Conv2d(32,1,kernel_size=3,stride=1,padding=1,bias=True))
# affine-exp
self.f3d2v1 = conv(64, 32, kernel_size=3, stride=1, padding=1,dilation=1) #
self.f3d2v2 = conv(1, 32, kernel_size=3, stride=1, padding=1,dilation=1) #
self.f3d2v3 = conv(1, 32, kernel_size=3, stride=1, padding=1,dilation=1) #
self.f3d2v4 = conv(1, 32, kernel_size=3, stride=1, padding=1,dilation=1) #
self.f3d2v5 = conv(64, 32, kernel_size=3, stride=1, padding=1,dilation=1) #
self.f3d2v6 = conv(12*81, 32, kernel_size=3, stride=1, padding=1,dilation=1) #
self.f3d2 = bfmodule(128-64,1)
# depth change net
self.dcnetv1 = conv(64, 32, kernel_size=3, stride=1, padding=1,dilation=1) #
self.dcnetv2 = conv(1, 32, kernel_size=3, stride=1, padding=1,dilation=1) #
self.dcnetv3 = conv(1, 32, kernel_size=3, stride=1, padding=1,dilation=1) #
self.dcnetv4 = conv(1, 32, kernel_size=3, stride=1, padding=1,dilation=1) #
self.dcnetv5 = conv(12*81, 32, kernel_size=3, stride=1, padding=1,dilation=1) #
self.dcnetv6 = conv(4, 32, kernel_size=3, stride=1, padding=1,dilation=1) #
if exp_unc:
self.dcnet = bfmodule(128,2)
else:
self.dcnet = bfmodule(128,1)
# moseg net
self.fgnetv1 = conv(1, 16, kernel_size=3, stride=1, padding=1,dilation=1) #
self.fgnetv2 = conv(1, 16, kernel_size=3, stride=1, padding=1,dilation=1) #
self.fgnetv3 = conv(1, 16, kernel_size=3, stride=1, padding=1,dilation=1) #
self.fgnetv4 = conv(1, 16, kernel_size=3, stride=1, padding=1,dilation=1) #
self.fgnetv5 = conv(1, 16, kernel_size=3, stride=1, padding=1,dilation=1) #
self.fgnetv6 = conv(1, 16, kernel_size=3, stride=1, padding=1,dilation=1) #
self.fgnetv7 = conv(1, 16, kernel_size=3, stride=1, padding=1,dilation=1) #
self.fgnetv8 = conv(1, 16, kernel_size=3, stride=1, padding=1,dilation=1) #
self.fgnetv9 = conv(3, 16, kernel_size=3, stride=1, padding=1,dilation=1) #
self.fgnetv10 = conv(3, 16, kernel_size=3, stride=1, padding=1,dilation=1) #
self.fgnet = bfmodule_feat(208-3*16,7)
#from midas.midas_net import MidasNet
#self.midas = MidasNet('/data/gengshay/midas.pt', non_negative=True)
self.midas = torch.hub.load("intel-isl/MiDaS", "MiDaS")
# detection branch
self.det = create_model('dla_34', {'hm': 2, 'wh': 36}, 256,num_input=14)
for m in self.modules():
if isinstance(m, nn.Conv3d):
n = m.kernel_size[0] * m.kernel_size[1]*m.kernel_size[2] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
if hasattr(m.bias,'data'):
m.bias.data.zero_()
self.facs = [self.fac,1,1,1,1]
self.warp_modules = nn.ModuleList([None, self.warp5, self.warp4, self.warp3, self.warp2])
self.f_modules = nn.ModuleList([self.f6, self.f5, self.f4, self.f3, self.f2])
self.p_modules = nn.ModuleList([self.p6, self.p5, self.p4, self.p3, self.p2])
self.reg_modules = nn.ModuleList([self.flow_reg64, self.flow_reg32, self.flow_reg16, self.flow_reg8, self.flow_reg4])
self.oor_modules = nn.ModuleList([self.dc6_convo, self.dc5_convo, self.dc4_convo, self.dc3_convo, self.dc2_convo])
self.fuse_modules = nn.ModuleList([self.dc6_conv, self.dc5_conv, self.dc4_conv, self.dc3_conv, self.dc2_conv])
def corrf(self, refimg_fea, targetimg_fea,maxdisp, fac=1):
if self.training:
#fast correlation function
b,c,h,w = refimg_fea.shape
targetimg_fea = F.unfold(targetimg_fea, (2*int(maxdisp)//fac+1,2*maxdisp+1), padding=(int(maxdisp)//fac,maxdisp)).view(b,c, 2*int(maxdisp)//fac+1,2*maxdisp+1,h,w).permute(0,1,3,2,4,5).contiguous()
cost = refimg_fea.view(b,c,h,w)[:,:,np.newaxis, np.newaxis]*targetimg_fea
cost = F.leaky_relu(cost, 0.1,inplace=True)
else:
#slow correlation function
b,c,height,width = refimg_fea.shape
if refimg_fea.is_cuda:
cost = Variable(torch.cuda.FloatTensor(b,c,2*maxdisp+1,2*int(maxdisp//fac)+1,height,width)).fill_(0.) # b,c,u,v,h,w
else:
cost = Variable(torch.FloatTensor(b,c,2*maxdisp+1,2*int(maxdisp//fac)+1,height,width)).fill_(0.) # b,c,u,v,h,w
for i in range(2*maxdisp+1):
ind = i-maxdisp
for j in range(2*int(maxdisp//fac)+1):
indd = j-int(maxdisp//fac)
feata = refimg_fea[:,:,max(0,-indd):height-indd,max(0,-ind):width-ind]
featb = targetimg_fea[:,:,max(0,+indd):height+indd,max(0,ind):width+ind]
diff = (feata*featb)
cost[:, :, i,j,max(0,-indd):height-indd,max(0,-ind):width-ind] = diff # standard
cost = F.leaky_relu(cost, 0.1,inplace=True)
return cost
def cost_matching(self,up_flow, c1, c2, flowh, enth, level):
"""
up_flow: upsample coarse flow
c1: normalized feature of image 1
c2: normalized feature of image 2
flowh: flow hypotheses
enth: entropy
oor: out of range score for flow
"""
# normalize
c1n = c1 / (c1.norm(dim=1, keepdim=True)+1e-9)
c2n = c2 / (c2.norm(dim=1, keepdim=True)+1e-9)
# cost volume
if level == 0:
warp = c2n
else:
warp,_ = self.warp_modules[level](c2n, up_flow)
feat = self.corrf(c1n,warp,self.md[level],fac=self.facs[level])
feat = self.f_modules[level](feat)
cost = self.p_modules[level](feat) # b, 16, u,v,h,w
# soft WTA
b,c,u,v,h,w = cost.shape
cost = cost.view(-1,u,v,h,w) # bx16, 9,9,h,w, also predict uncertainty from here
flowhh,enthh = self.reg_modules[level](cost) # bx16, 2, h, w
flowhh = flowhh.view(b,c,2,h,w)
if level > 0:
flowhh = flowhh + up_flow[:,np.newaxis]
flowhh = flowhh.view(b,-1,h,w) # b, 16*2, h, w
enthh = enthh.view(b,-1,h,w) # b, 16*1, h, w
# append coarse hypotheses
if level == 0:
flowh = flowhh
enth = enthh
else:
flowh = torch.cat((flowhh, F.upsample(flowh.detach()*2, [flowhh.shape[2],flowhh.shape[3]], mode='bilinear')),1) # b, k2--k2, h, w
enth = torch.cat((enthh, F.upsample(enth, [flowhh.shape[2],flowhh.shape[3]], mode='bilinear')),1)
if self.training or level==4:
x = torch.cat((enth.detach(), flowh.detach(), c1),1)
oor = self.oor_modules[level](x)[:,0]
else: oor = None
# hypotheses fusion
x = torch.cat((enth.detach(), flowh.detach(), c1),1)
va = self.fuse_modules[level](x)
va = va.view(b,-1,2,h,w)
flow = ( flowh.view(b,-1,2,h,w) * F.softmax(va,1) ).sum(1) # b, 2k, 2, h, w
return flow, flowh, enth, oor
def affine(self,pref,flow, pw=1):
b,_,lh,lw=flow.shape
ptar = pref + flow
pw = 1
pref = F.unfold(pref, (pw*2+1,pw*2+1), padding=(pw)).view(b,2,(pw*2+1)**2,lh,lw)-pref[:,:,np.newaxis]
ptar = F.unfold(ptar, (pw*2+1,pw*2+1), padding=(pw)).view(b,2,(pw*2+1)**2,lh,lw)-ptar[:,:,np.newaxis] # b, 2,9,h,w
pref = pref.permute(0,3,4,1,2).reshape(b*lh*lw,2,(pw*2+1)**2)
ptar = ptar.permute(0,3,4,1,2).reshape(b*lh*lw,2,(pw*2+1)**2)
prefprefT = pref.matmul(pref.permute(0,2,1))
ppdet = prefprefT[:,0,0]*prefprefT[:,1,1]-prefprefT[:,1,0]*prefprefT[:,0,1]
ppinv = torch.cat((prefprefT[:,1,1:],-prefprefT[:,0,1:], -prefprefT[:,1:,0], prefprefT[:,0:1,0]),1).view(-1,2,2)/ppdet.clamp(1e-10,np.inf)[:,np.newaxis,np.newaxis]
Affine = ptar.matmul(pref.permute(0,2,1)).matmul(ppinv)
Error = (Affine.matmul(pref)-ptar).norm(2,1).mean(1).view(b,1,lh,lw)
Avol = (Affine[:,0,0]*Affine[:,1,1]-Affine[:,1,0]*Affine[:,0,1]).view(b,1,lh,lw).abs().clamp(1e-10,np.inf)
exp = Avol.sqrt()
mask = (exp>0.5) & (exp<2) & (Error<0.1)
mask = mask[:,0]
exp = exp.clamp(0.5,2)
exp[Error>0.1]=1
return exp, Error, mask
def forward_VCN(self, im):
bs = im.shape[0]//2
### compute optical flow
c06,c05,c04,c03,c02 = self.pspnet(im)
c16 = c06[:bs]; c26 = c06[bs:]
c15 = c05[:bs]; c25 = c05[bs:]
c14 = c04[:bs]; c24 = c04[bs:]
c13 = c03[:bs]; c23 = c03[bs:]
c12 = c02[:bs]; c22 = c02[bs:]
## matching 6
flow6, flow6h, ent6h, oor6 = self.cost_matching(None, c16, c26, None, None,level=0)
## matching 5
up_flow6 = F.upsample(flow6, [im.size()[2]//32,im.size()[3]//32], mode='bilinear')*2
flow5, flow5h, ent5h, oor5 = self.cost_matching(up_flow6, c15, c25, flow6h, ent6h,level=1)
## matching 4
up_flow5 = F.upsample(flow5, [im.size()[2]//16,im.size()[3]//16], mode='bilinear')*2
flow4, flow4h, ent4h, oor4 = self.cost_matching(up_flow5, c14, c24, flow5h, ent5h,level=2)
## matching 3
up_flow4 = F.upsample(flow4, [im.size()[2]//8,im.size()[3]//8], mode='bilinear')*2
flow3, flow3h, ent3h, oor3 = self.cost_matching(up_flow4, c13, c23, flow4h, ent4h,level=3)
## matching 2
up_flow3 = F.upsample(flow3, [im.size()[2]//4,im.size()[3]//4], mode='bilinear')*2
flow2, flow2h, ent2h, oor2 = self.cost_matching(up_flow3, c12, c22, flow3h, ent3h,level=4)
### optical expansion
b,_,h,w = flow2.shape
exp2,err2,_ = self.affine(get_grid(b,h,w)[:,0].permute(0,3,1,2).repeat(b,1,1,1).clone(), flow2.detach(),pw=1)
x = torch.cat((
self.f3d2v2(-exp2.log()),
self.f3d2v3(err2),
),1)
dchange2 = -exp2.log()+1./200*self.f3d2(x)[0]
# depth change net
iexp2 = F.upsample(dchange2.clone(), [im.size()[2],im.size()[3]], mode='bilinear')
x = torch.cat((self.dcnetv1(c12.detach()),
self.dcnetv2(dchange2.detach()),
self.dcnetv3(-exp2.log()),
self.dcnetv4(err2),
),1)
dcneto = 1./200*self.dcnet(x)[0]
dchange2 = dchange2.detach() + dcneto[:,:1]
dchange2 = F.upsample(dchange2, [im.size()[2],im.size()[3]], mode='bilinear')
if dcneto.shape[1]>1:
dc_unc = dcneto[:,1:2]
else:
dc_unc = torch.zeros_like(dcneto)
dc_unc = F.upsample(dc_unc, [im.size()[2],im.size()[3]], mode='bilinear')[:,0]
return flow2, oor2, dchange2, dc_unc
def forward(self,im,disc_aux=None,flowdc=None):
bs = im.shape[0]//2
flow2, oor2, dchange2, dc_unc = flowdc
### rigid motion segmentation
## pre-processing
Kinv, Kinv_n = get_intrinsics(disc_aux[0], noise=False)
# get full res flow/expansion inputs
H,W = im.size()[2:4]
flow = 4*F.upsample(flow2, [H,W], mode='bilinear').detach()
oor2 = F.upsample(oor2[:,np.newaxis], [H,W], mode='bilinear').detach()[:,0]
tau = (-dchange2[:,0]).exp().detach()
# use different number of correspondences for bg, obj segmentation and pose
fscale=128./H; fscalex=32./H;fscaled=448./H
hp0o = torch.cat( [torch.arange(0, W,out=torch.cuda.FloatTensor()).view(1,-1).repeat(H,1)[np.newaxis], # 1,2,H,W
torch.arange(0, H,out=torch.cuda.FloatTensor()).view(-1,1).repeat(1,W)[np.newaxis]], 0)[np.newaxis]
hp1o = hp0o + flow # b,2,H,W
# to deal with input resizing (TODO: move it inside intrinsics)
hp0o[:,0] *= disc_aux[0][10]
hp0o[:,1] *= disc_aux[0][11]
hp1o[:,0] *= disc_aux[0][10]
hp1o[:,1] *= disc_aux[0][11]
# sample correspondence for object segmentation (fscaled)
hp0d = F.interpolate(hp0o,scale_factor=fscaled,mode='nearest')
hp1d = F.interpolate(hp1o,scale_factor=fscaled,mode='nearest')
_,_,hd,wd=hp0d.shape
hp0d = hp0d.view(1,2,-1).permute(0,2,1)
hp1d = hp1d.view(bs,2,-1).permute(0,2,1)
hp0d = torch.cat((hp0d,torch.ones(1,hp0d.shape[1],1).cuda()),-1)
hp1d = torch.cat((hp1d,torch.ones(bs,hp0d.shape[1],1).cuda()),-1)
uncd = torch.cat((F.interpolate(oor2[:,np.newaxis],scale_factor=fscaled,mode='nearest'),
F.interpolate(dc_unc[:,np.newaxis].detach(),scale_factor=fscaled,mode='nearest')),1)
taud = F.interpolate(tau[:,np.newaxis],scale_factor=fscaled,mode='nearest').view(bs,1,-1)
# sample correspondence for fg/bg seg (fscale)
hp0 = F.interpolate(hp0o,scale_factor=fscale,mode='nearest')
hp1 = F.interpolate(hp1o,scale_factor=fscale,mode='nearest')
_,_,h,w=hp0.shape
hp0 = hp0.view(1,2,-1).permute(0,2,1)
hp1 = hp1.view(bs,2,-1).permute(0,2,1)
hp0 = torch.cat((hp0,torch.ones(1,hp0.shape[1],1).cuda()),-1)
hp1 = torch.cat((hp1,torch.ones(bs,hp0.shape[1],1).cuda()),-1)
unc = torch.cat((F.interpolate(oor2[:,np.newaxis],scale_factor=fscale,mode='nearest'),
F.interpolate(dc_unc[:,np.newaxis].detach(),scale_factor=fscale,mode='nearest')),1)
tau = F.interpolate(tau[:,np.newaxis],scale_factor=fscale,mode='nearest').view(bs,1,-1)
# sample correspondence for pose estimation (fscalex)
hp0x = F.interpolate(hp0o,scale_factor=fscalex,mode='nearest')
hp1x = F.interpolate(hp1o,scale_factor=fscalex,mode='nearest')
hp0x = hp0x.view(1,2,-1).permute(0,2,1)
hp1x = hp1x.view(bs,2,-1).permute(0,2,1)
hp0x = torch.cat((hp0x,torch.ones(1,hp0x.shape[1],1).cuda()),-1)
hp1x = torch.cat((hp1x,torch.ones(bs,hp0x.shape[1],1).cuda()),-1)
## camera pose estimation
# using input pose from VONet
rot = torch.from_numpy(cv2.Rodrigues(disc_aux[2][:,:3])[0][:,0].astype(np.float32)).unsqueeze(0)
trans = torch.from_numpy(disc_aux[2][:,3:].astype(np.float32)).squeeze().unsqueeze(0)
trans = trans / trans.norm(2,1)[:,np.newaxis]
rot = rot.cuda().detach()
trans = trans.cuda().detach()
Ex = get_skew_mat(trans.cpu(), rot.cpu())
## fg/bg segmentation
# rigidity cost maps
mcost00, mcost01, mcost1, mcost2, mcost3, mcost4, p3dmag,_ = compute_geo_costs(rot, trans, Ex, Kinv, hp0, hp1, tau, Kinv_n = Kinv_n)
# depth contrast cost
with torch.no_grad():
self.midas.eval()
input_im = (disc_aux[1].permute(0,3,1,2) -\
torch.Tensor([0.485, 0.456, 0.406]).cuda()[np.newaxis,:,np.newaxis,np.newaxis]) /\
torch.Tensor([0.229, 0.224, 0.225]).cuda()[np.newaxis,:,np.newaxis,np.newaxis]
wsize = int((input_im.shape[3] * 448./input_im.shape[2])//32*32)
input_im = F.interpolate(input_im, (448, wsize), mode='bilinear')
dispo = self.midas.forward(input_im)[None].clamp(1e-6,np.inf)
disp = F.interpolate(dispo, [h,w], mode='bilinear')
med_dgt = torch.median(disp.view(bs,-1),dim=-1)[0]
med_dp3d = torch.median(p3dmag.view(bs,-1),dim=-1)[0]
med_ratio = (med_dgt/med_dp3d)[:,np.newaxis,np.newaxis,np.newaxis]
# disp[disp == float('inf')] = p3dmag.view(bs,1,h,w)[disp == float('inf')] * med_ratio
log_dratio = ( med_ratio * p3dmag.view(bs,1,h,w) / disp.view(bs,1,h,w) ).log()
#pdb.set_trace()
# pseudo 3D point compute
depth = (1./ disp).view(bs,1,-1)
depth = depth.clamp(depth.median()/10, depth.median()*10)
p03d = depth * Kinv.matmul(hp0.permute(0,2,1))
p13d = depth/tau*Kinv_n.matmul(hp1.permute(0,2,1))
p13d = kornia.angle_axis_to_rotation_matrix(rot).matmul(p13d) # remove rotation
pts = torch.cat([p03d, p13d],-1) # bs, 3, 2*N
# normalize it
for i in range(bs):
pts[i] = pts[i] - pts[i].mean(-1,keepdims=True) # zero mean
pts[i] = pts[i] / pts[i].flatten().std() # unit std
p03d = pts[:,:,:p03d.shape[-1]]
p13d = pts[:,:,p03d.shape[-1]:]
# fg/bg segmentation network
# the constants are empirical values multiplied to cost maps to
# ensure they have similar scales
costs = torch.cat((
self.fgnetv1( 0.01*(mcost00+mcost01).view(bs,1,h,w).detach()),
self.fgnetv2( 2e3* mcost1.view(bs,1,h,w).detach()),
self.fgnetv3( mcost2.view(bs,1,h,w).detach()),
self.fgnetv4( 30* mcost3.view(bs,1,h,w).detach()),
self.fgnetv5( mcost4.view(bs,1,h,w).detach()),
self.fgnetv6( 0.2* unc[:,:1].view(bs,1,h,w).detach()),
self.fgnetv7( 0.2* unc[:,1:].view(bs,1,h,w).detach()),
self.fgnetv8( 3* log_dratio.view(bs,1,h,w).detach()),
self.fgnetv9( p03d.view(bs,3,h,w).detach()),
self.fgnetv10( p13d.view(bs,3,h,w).detach()),
),1)
x,featx = self.fgnet(costs)
fg_va = 1./20*x[:,:-1]
fg_res = 1./200*x[:,-1:]
fg_hps = torch.cat( (
0.01*(mcost00+mcost01).view(bs,1,h,w).detach(),
2e3* mcost1.view(bs,1,h,w).detach(),
mcost2.view(bs,1,h,w).detach(),
30* mcost3.view(bs,1,h,w).detach(),
mcost4.view(bs,1,h,w).detach(),
3* log_dratio.view(bs,1,h,w).detach(),
),1)
# fgmask: prelogits of 0-1 probability foreground vs background
fgmask = (fg_va * fg_hps).sum(1, keepdims=True) + fg_res
fgmask = F.upsample(fgmask, [im.size()[2],im.size()[3]], mode='bilinear')
return fgmask[0,0]
================================================
FILE: Network/rigidmask/__init__.py
================================================
================================================
FILE: Network/rigidmask/conv4d.py
================================================
import pdb
import torch.nn as nn
import math
import torch
from torch.nn.parameter import Parameter
import torch.nn.functional as F
from torch.nn import Module
from torch.nn.modules.conv import _ConvNd
from torch.nn.modules.utils import _quadruple
from torch.autograd import Variable
from torch.nn import Conv2d
def conv4d(data,filters,bias=None,permute_filters=True,use_half=False):
"""
This is done by stacking results of multiple 3D convolutions, and is very slow.
Taken from https://github.com/ignacio-rocco/ncnet
"""
b,c,h,w,d,t=data.size()
data=data.permute(2,0,1,3,4,5).contiguous() # permute to avoid making contiguous inside loop
# Same permutation is done with filters, unless already provided with permutation
if permute_filters:
filters=filters.permute(2,0,1,3,4,5).contiguous() # permute to avoid making contiguous inside loop
c_out=filters.size(1)
if use_half:
output = Variable(torch.HalfTensor(h,b,c_out,w,d,t),requires_grad=data.requires_grad)
else:
output = Variable(torch.zeros(h,b,c_out,w,d,t),requires_grad=data.requires_grad)
padding=filters.size(0)//2
if use_half:
Z=Variable(torch.zeros(padding,b,c,w,d,t).half())
else:
Z=Variable(torch.zeros(padding,b,c,w,d,t))
if data.is_cuda:
Z=Z.cuda(data.get_device())
output=output.cuda(data.get_device())
data_padded = torch.cat((Z,data,Z),0)
for i in range(output.size(0)): # loop on first feature dimension
# convolve with center channel of filter (at position=padding)
output[i,:,:,:,:,:]=F.conv3d(data_padded[i+padding,:,:,:,:,:],
filters[padding,:,:,:,:,:], bias=bias, stride=1, padding=padding)
# convolve with upper/lower channels of filter (at postions [:padding] [padding+1:])
for p in range(1,padding+1):
output[i,:,:,:,:,:]=output[i,:,:,:,:,:]+F.conv3d(data_padded[i+padding-p,:,:,:,:,:],
filters[padding-p,:,:,:,:,:], bias=None, stride=1, padding=padding)
output[i,:,:,:,:,:]=output[i,:,:,:,:,:]+F.conv3d(data_padded[i+padding+p,:,:,:,:,:],
filters[padding+p,:,:,:,:,:], bias=None, stride=1, padding=padding)
output=output.permute(1,2,0,3,4,5).contiguous()
return output
class Conv4d(_ConvNd):
"""Applies a 4D convolution over an input signal composed of several input
planes.
"""
def __init__(self, in_channels, out_channels, kernel_size, bias=True, pre_permuted_filters=True):
# stride, dilation and groups !=1 functionality not tested
stride=1
dilation=1
groups=1
# zero padding is added automatically in conv4d function to preserve tensor size
padding = 0
kernel_size = _quadruple(kernel_size)
stride = _quadruple(stride)
padding = _quadruple(padding)
dilation = _quadruple(dilation)
super(Conv4d, self).__init__(
in_channels, out_channels, kernel_size, stride, padding, dilation,
False, _quadruple(0), groups, bias)
# weights will be sliced along one dimension during convolution loop
# make the looping dimension to be the first one in the tensor,
# so that we don't need to call contiguous() inside the loop
self.pre_permuted_filters=pre_permuted_filters
if self.pre_permuted_filters:
self.weight.data=self.weight.data.permute(2,0,1,3,4,5).contiguous()
self.use_half=False
# self.isbias = bias
# if not self.isbias:
# self.bn = torch.nn.BatchNorm1d(out_channels)
def forward(self, input):
out = conv4d(input, self.weight, bias=self.bias,permute_filters=not self.pre_permuted_filters,use_half=self.use_half) # filters pre-permuted in constructor
# if not self.isbias:
# b,c,u,v,h,w = out.shape
# out = self.bn(out.view(b,c,-1)).view(b,c,u,v,h,w)
return out
class fullConv4d(torch.nn.Module):
def __init__(self, in_channels, out_channels, kernel_size, bias=True, pre_permuted_filters=True):
super(fullConv4d, self).__init__()
self.conv = Conv4d(in_channels, out_channels, kernel_size, bias=bias, pre_permuted_filters=pre_permuted_filters)
self.isbias = bias
if not self.isbias:
self.bn = torch.nn.BatchNorm1d(out_channels)
def forward(self, input):
out = self.conv(input)
if not self.isbias:
b,c,u,v,h,w = out.shape
out = self.bn(out.view(b,c,-1)).view(b,c,u,v,h,w)
return out
class butterfly4D(torch.nn.Module):
'''
butterfly 4d
'''
def __init__(self, fdima, fdimb, withbn=True, full=True,groups=1):
super(butterfly4D, self).__init__()
self.proj = nn.Sequential(projfeat4d(fdima, fdimb, 1, with_bn=withbn,groups=groups),
nn.ReLU(inplace=True),)
self.conva1 = sepConv4dBlock(fdimb,fdimb,with_bn=withbn, stride=(2,1,1),full=full,groups=groups)
self.conva2 = sepConv4dBlock(fdimb,fdimb,with_bn=withbn, stride=(2,1,1),full=full,groups=groups)
self.convb3 = sepConv4dBlock(fdimb,fdimb,with_bn=withbn, stride=(1,1,1),full=full,groups=groups)
self.convb2 = sepConv4dBlock(fdimb,fdimb,with_bn=withbn, stride=(1,1,1),full=full,groups=groups)
self.convb1 = sepConv4dBlock(fdimb,fdimb,with_bn=withbn, stride=(1,1,1),full=full,groups=groups)
#@profile
def forward(self,x):
out = self.proj(x)
b,c,u,v,h,w = out.shape # 9x9
out1 = self.conva1(out) # 5x5, 3
_,c1,u1,v1,h1,w1 = out1.shape
out2 = self.conva2(out1) # 3x3, 9
_,c2,u2,v2,h2,w2 = out2.shape
out2 = self.convb3(out2) # 3x3, 9
tout1 = F.upsample(out2.view(b,c,u2,v2,-1),(u1,v1,h2*w2),mode='trilinear').view(b,c,u1,v1,h2,w2) # 5x5
tout1 = F.upsample(tout1.view(b,c,-1,h2,w2),(u1*v1,h1,w1),mode='trilinear').view(b,c,u1,v1,h1,w1) # 5x5
out1 = tout1 + out1
out1 = self.convb2(out1)
tout = F.upsample(out1.view(b,c,u1,v1,-1),(u,v,h1*w1),mode='trilinear').view(b,c,u,v,h1,w1)
tout = F.upsample(tout.view(b,c,-1,h1,w1),(u*v,h,w),mode='trilinear').view(b,c,u,v,h,w)
out = tout + out
out = self.convb1(out)
return out
class projfeat4d(torch.nn.Module):
'''
Turn 3d projection into 2d projection
'''
def __init__(self, in_planes, out_planes, stride, with_bn=True,groups=1):
super(projfeat4d, self).__init__()
self.with_bn = with_bn
self.stride = stride
self.conv1 = nn.Conv3d(in_planes, out_planes, 1, (stride,stride,1), padding=0,bias=not with_bn,groups=groups)
self.bn = nn.BatchNorm3d(out_planes)
def forward(self,x):
b,c,u,v,h,w = x.size()
x = self.conv1(x.view(b,c,u,v,h*w))
if self.with_bn:
x = self.bn(x)
_,c,u,v,_ = x.shape
x = x.view(b,c,u,v,h,w)
return x
class sepConv4d(torch.nn.Module):
'''
Separable 4d convolution block as 2 3D convolutions
'''
def __init__(self, in_planes, out_planes, stride=(1,1,1), with_bn=True, ksize=3, full=True,groups=1):
super(sepConv4d, self).__init__()
bias = not with_bn
self.isproj = False
self.stride = stride[0]
expand = 1
if with_bn:
if in_planes != out_planes:
self.isproj = True
self.proj = nn.Sequential(nn.Conv2d(in_planes, out_planes, 1, bias=bias, padding=0,groups=groups),
nn.BatchNorm2d(out_planes))
if full:
self.conv1 = nn.Sequential(nn.Conv3d(in_planes*expand, in_planes, (1,ksize,ksize), stride=(1,self.stride,self.stride), bias=bias, padding=(0,ksize//2,ksize//2),groups=groups),
nn.BatchNorm3d(in_planes))
else:
self.conv1 = nn.Sequential(nn.Conv3d(in_planes*expand, in_planes, (1,ksize,ksize), stride=1, bias=bias, padding=(0,ksize//2,ksize//2),groups=groups),
nn.BatchNorm3d(in_planes))
self.conv2 = nn.Sequential(nn.Conv3d(in_planes, in_planes*expand, (ksize,ksize,1), stride=(self.stride,self.stride,1), bias=bias, padding=(ksize//2,ksize//2,0),groups=groups),
nn.BatchNorm3d(in_planes*expand))
else:
if in_planes != out_planes:
self.isproj = True
self.proj = nn.Conv2d(in_planes, out_planes, 1, bias=bias, padding=0,groups=groups)
if full:
self.conv1 = nn.Conv3d(in_planes*expand, in_planes, (1,ksize,ksize), stride=(1,self.stride,self.stride), bias=bias, padding=(0,ksize//2,ksize//2),groups=groups)
else:
self.conv1 = nn.Conv3d(in_planes*expand, in_planes, (1,ksize,ksize), stride=1, bias=bias, padding=(0,ksize//2,ksize//2),groups=groups)
self.conv2 = nn.Conv3d(in_planes, in_planes*expand, (ksize,ksize,1), stride=(self.stride,self.stride,1), bias=bias, padding=(ksize//2,ksize//2,0),groups=groups)
self.relu = nn.ReLU(inplace=True)
#@profile
def forward(self,x):
b,c,u,v,h,w = x.shape
x = self.conv2(x.view(b,c,u,v,-1))
b,c,u,v,_ = x.shape
x = self.relu(x)
x = self.conv1(x.view(b,c,-1,h,w))
b,c,_,h,w = x.shape
if self.isproj:
x = self.proj(x.view(b,c,-1,w))
x = x.view(b,-1,u,v,h,w)
return x
class sepConv4dBlock(torch.nn.Module):
'''
Separable 4d convolution block as 2 2D convolutions and a projection
layer
'''
def __init__(self, in_planes, out_planes, stride=(1,1,1), with_bn=True, full=True,groups=1):
super(sepConv4dBlock, self).__init__()
if in_planes == out_planes and stride==(1,1,1):
self.downsample = None
else:
if full:
self.downsample = sepConv4d(in_planes, out_planes, stride, with_bn=with_bn,ksize=1, full=full,groups=groups)
else:
self.downsample = projfeat4d(in_planes, out_planes,stride[0], with_bn=with_bn,groups=groups)
self.conv1 = sepConv4d(in_planes, out_planes, stride, with_bn=with_bn, full=full ,groups=groups)
self.conv2 = sepConv4d(out_planes, out_planes,(1,1,1), with_bn=with_bn, full=full,groups=groups)
self.relu1 = nn.ReLU(inplace=True)
self.relu2 = nn.ReLU(inplace=True)
#@profile
def forward(self,x):
out = self.relu1(self.conv1(x))
if self.downsample:
x = self.downsample(x)
out = self.relu2(x + self.conv2(out))
return out
##import torch.backends.cudnn as cudnn
##cudnn.benchmark = True
#import time
##im = torch.randn(9,64,9,160,224).cuda()
##net = torch.nn.Conv3d(64, 64, 3).cuda()
##net = Conv4d(1,1,3,bias=True,pre_permuted_filters=True).cuda()
##net = sepConv4dBlock(2,2,stride=(1,1,1)).cuda()
#
##im = torch.randn(1,16,9,9,96,320).cuda()
##net = sepConv4d(16,16,with_bn=False).cuda()
#
##im = torch.randn(1,16,81,96,320).cuda()
##net = torch.nn.Conv3d(16,16,(1,3,3),padding=(0,1,1)).cuda()
#
##im = torch.randn(1,16,9,9,96*320).cuda()
##net = torch.nn.Conv3d(16,16,(3,3,1),padding=(1,1,0)).cuda()
#
##im = torch.randn(10000,10,9,9).cuda()
##net = torch.nn.Conv2d(10,10,3,padding=1).cuda()
#
##im = torch.randn(81,16,96,320).cuda()
##net = torch.nn.Conv2d(16,16,3,padding=1).cuda()
#c= int(16 *1)
#cp = int(16 *1)
#h=int(96 *4)
#w=int(320 *4)
#k=3
#im = torch.randn(1,c,h,w).cuda()
#net = torch.nn.Conv2d(c,cp,k,padding=k//2).cuda()
#
#im2 = torch.randn(cp,k*k*c).cuda()
#im1 = F.unfold(im, (k,k), padding=k//2)[0]
#
#
#net(im)
#net(im)
#torch.mm(im2,im1)
#torch.mm(im2,im1)
#torch.cuda.synchronize()
#beg = time.time()
#for i in range(100):
# net(im)
# #im1 = F.unfold(im, (k,k), padding=k//2)[0]
# torch.mm(im2,im1)
#torch.cuda.synchronize()
#print('%f'%((time.time()-beg)*10.))
================================================
FILE: Network/rigidmask/det.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import torchvision.models as models
import torch
import torch.nn as nn
import os
from .networks.msra_resnet import get_pose_net
from .networks.dlav0 import get_pose_net as get_dlav0
from .networks.pose_dla_dcn import get_pose_net as get_dla_dcn
from .networks.resnet_dcn import get_pose_net as get_pose_net_dcn
from .networks.large_hourglass import get_large_hourglass_net
_model_factory = {
'res': get_pose_net, # default Resnet with deconv
'dlav0': get_dlav0, # default DLAup
'dla': get_dla_dcn,
'resdcn': get_pose_net_dcn,
'hourglass': get_large_hourglass_net,
}
def create_model(arch, heads, head_conv,num_input):
num_layers = int(arch[arch.find('_') + 1:]) if '_' in arch else 0
arch = arch[:arch.find('_')] if '_' in arch else arch
get_model = _model_factory[arch]
model = get_model(num_layers=num_layers, heads=heads, head_conv=head_conv,num_input=num_input)
return model
def load_model(model, model_path, optimizer=None, resume=False,
lr=None, lr_step=None):
start_epoch = 0
checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage)
print('loaded {}, epoch {}'.format(model_path, checkpoint['epoch']))
state_dict_ = checkpoint['state_dict']
state_dict = {}
# convert data_parallal to model
for k in state_dict_:
if k.startswith('module') and not k.startswith('module_list'):
state_dict[k[7:]] = state_dict_[k]
else:
state_dict[k] = state_dict_[k]
model_state_dict = model.state_dict()
# check loaded parameters and created model parameters
msg = 'If you see this, your model does not fully load the ' + \
'pre-trained weight. Please make sure ' + \
'you have correctly specified --arch xxx ' + \
'or set the correct --num_classes for your own dataset.'
for k in state_dict:
if k in model_state_dict:
if state_dict[k].shape != model_state_dict[k].shape:
print('Skip loading parameter {}, required shape{}, '\
'loaded shape{}. {}'.format(
k, model_state_dict[k].shape, state_dict[k].shape, msg))
state_dict[k] = model_state_dict[k]
else:
print('Drop parameter {}.'.format(k) + msg)
for k in model_state_dict:
if not (k in state_dict):
print('No param {}.'.format(k) + msg)
state_dict[k] = model_state_dict[k]
model.load_state_dict(state_dict, strict=False)
# resume optimizer parameters
if optimizer is not None and resume:
if 'optimizer' in checkpoint:
optimizer.load_state_dict(checkpoint['optimizer'])
start_epoch = checkpoint['epoch']
start_lr = lr
for step in lr_step:
if start_epoch >= step:
start_lr *= 0.1
for param_group in optimizer.param_groups:
param_group['lr'] = start_lr
print('Resumed optimizer with start lr', start_lr)
else:
print('No optimizer parameters in checkpoint.')
if optimizer is not None:
return model, optimizer, start_epoch
else:
return model
def save_model(path, epoch, model, optimizer=None):
if isinstance(model, torch.nn.DataParallel):
state_dict = model.module.state_dict()
else:
state_dict = model.state_dict()
data = {'epoch': epoch,
'state_dict': state_dict}
if not (optimizer is None):
data['optimizer'] = optimizer.state_dict()
torch.save(data, path)
================================================
FILE: Network/rigidmask/det_losses.py
================================================
# ------------------------------------------------------------------------------
# Portions of this code are from
# CornerNet (https://github.com/princeton-vl/CornerNet)
# Copyright (c) 2018, University of Michigan
# Licensed under the BSD 3-Clause License
# ------------------------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import pdb
import torch
import torch.nn as nn
from .det_utils import _transpose_and_gather_feat
import torch.nn.functional as F
def _slow_neg_loss(pred, gt):
'''focal loss from CornerNet'''
pos_inds = gt.eq(1)
neg_inds = gt.lt(1)
neg_weights = torch.pow(1 - gt[neg_inds], 4)
loss = 0
pos_pred = pred[pos_inds]
neg_pred = pred[neg_inds]
pos_loss = torch.log(pos_pred) * torch.pow(1 - pos_pred, 2)
neg_loss = torch.log(1 - neg_pred) * torch.pow(neg_pred, 2) * neg_weights
num_pos = pos_inds.float().sum()
pos_loss = pos_loss.sum()
neg_loss = neg_loss.sum()
if pos_pred.nelement() == 0:
loss = loss - neg_loss
else:
loss = loss - (pos_loss + neg_loss) / num_pos
return loss
def _neg_loss(pred, gt, heat_logits):
''' Modified focal loss. Exactly the same as CornerNet.
Runs faster and costs a little bit more memory
Arguments:
pred (batch x c x h x w)
gt_regr (batch x c x h x w)
'''
pos_inds = gt.eq(1).float()
neg_inds = gt.lt(1).float()
neg_weights = torch.pow(1 - gt, 4)
loss = 0
logpred = torch.nn.functional.log_softmax(heat_logits,1)
pos_loss = logpred[:,0:1] * torch.pow(1 - pred, 2) * pos_inds
neg_loss = logpred[:,1:2] * torch.pow(pred, 2) * neg_weights * neg_inds
num_pos = pos_inds.float().sum()
pos_loss = pos_loss.sum()
neg_loss = neg_loss.sum()
if num_pos == 0:
loss = loss - neg_loss
else:
loss = loss - (pos_loss + neg_loss) / num_pos
return loss
def _not_faster_neg_loss(pred, gt):
pos_inds = gt.eq(1).float()
neg_inds = gt.lt(1).float()
num_pos = pos_inds.float().sum()
neg_weights = torch.pow(1 - gt, 4)
loss = 0
trans_pred = pred * neg_inds + (1 - pred) * pos_inds
weight = neg_weights * neg_inds + pos_inds
all_loss = torch.log(1 - trans_pred) * torch.pow(trans_pred, 2) * weight
all_loss = all_loss.sum()
if num_pos > 0:
all_loss /= num_pos
loss -= all_loss
return loss
def _slow_reg_loss(regr, gt_regr, mask):
num = mask.float().sum()
mask = mask.unsqueeze(2).expand_as(gt_regr)
regr = regr[mask]
gt_regr = gt_regr[mask]
regr_loss = nn.functional.smooth_l1_loss(regr, gt_regr, size_average=False)
regr_loss = regr_loss / (num + 1e-4)
return regr_loss
def _reg_loss(regr, gt_regr, mask):
''' L1 regression loss
Arguments:
regr (batch x max_objects x dim)
gt_regr (batch x max_objects x dim)
mask (batch x max_objects)
'''
num = mask.float().sum()
mask = mask.unsqueeze(2).expand_as(gt_regr).float()
regr = regr * mask
gt_regr = gt_regr * mask
regr_loss = nn.functional.smooth_l1_loss(regr, gt_regr, size_average=False)
regr_loss = regr_loss / (num + 1e-4)
return regr_loss
class FocalLoss(nn.Module):
'''nn.Module warpper for focal loss'''
def __init__(self):
super(FocalLoss, self).__init__()
self.neg_loss = _neg_loss
def forward(self, out, target, logits):
return self.neg_loss(out, target, logits)
class RegLoss(nn.Module):
'''Regression loss for an output tensor
Arguments:
output (batch x dim x h x w)
mask (batch x max_objects)
ind (batch x max_objects)
target (batch x max_objects x dim)
'''
def __init__(self):
super(RegLoss, self).__init__()
def forward(self, output, mask, ind, target):
pred = _transpose_and_gather_feat(output, ind)
loss = _reg_loss(pred, target, mask)
return loss
class RegL1Loss(nn.Module):
def __init__(self):
super(RegL1Loss, self).__init__()
def forward(self, output, mask, ind, target):
pred = _transpose_and_gather_feat(output, ind)
mask = mask.unsqueeze(2).expand_as(pred).float()
# loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean')
loss = F.l1_loss(pred * mask, target * mask, size_average=False)
loss = loss / (mask.sum() + 1e-4)
return loss
class NormRegL1Loss(nn.Module):
def __init__(self):
super(NormRegL1Loss, self).__init__()
def forward(self, output, mask, ind, target):
pred = _transpose_and_gather_feat(output, ind)
mask = mask.unsqueeze(2).expand_as(pred).float()
# loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean')
pred = pred / (target + 1e-4)
target = target * 0 + 1
loss = F.l1_loss(pred * mask, target * mask, size_average=False)
loss = loss / (mask.sum() + 1e-4)
return loss
class RegWeightedL1Loss(nn.Module):
def __init__(self):
super(RegWeightedL1Loss, self).__init__()
def forward(self, output, mask, ind, target):
pred = _transpose_and_gather_feat(output, ind)
mask = mask.float()
# loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean')
loss = F.l1_loss(pred * mask, target * mask, size_average=False)
loss = loss / (mask.sum() + 1e-4)
return loss
class L1Loss(nn.Module):
def __init__(self):
super(L1Loss, self).__init__()
def forward(self, output, mask, ind, target):
pred = _transpose_and_gather_feat(output, ind)
mask = mask.unsqueeze(2).expand_as(pred).float()
loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean')
return loss
class BinRotLoss(nn.Module):
def __init__(self):
super(BinRotLoss, self).__init__()
def forward(self, output, mask, ind, rotbin, rotres):
pred = _transpose_and_gather_feat(output, ind)
loss = compute_rot_loss(pred, rotbin, rotres, mask)
return loss
def compute_res_loss(output, target):
return F.smooth_l1_loss(output, target, reduction='elementwise_mean')
# TODO: weight
def compute_bin_loss(output, target, mask):
mask = mask.expand_as(output)
output = output * mask.float()
return F.cross_entropy(output, target, reduction='elementwise_mean')
def compute_rot_loss(output, target_bin, target_res, mask):
# output: (B, 128, 8) [bin1_cls[0], bin1_cls[1], bin1_sin, bin1_cos,
# bin2_cls[0], bin2_cls[1], bin2_sin, bin2_cos]
# target_bin: (B, 128, 2) [bin1_cls, bin2_cls]
# target_res: (B, 128, 2) [bin1_res, bin2_res]
# mask: (B, 128, 1)
# import pdb; pdb.set_trace()
output = output.view(-1, 8)
target_bin = target_bin.view(-1, 2)
target_res = target_res.view(-1, 2)
mask = mask.view(-1, 1)
loss_bin1 = compute_bin_loss(output[:, 0:2], target_bin[:, 0], mask)
loss_bin2 = compute_bin_loss(output[:, 4:6], target_bin[:, 1], mask)
loss_res = torch.zeros_like(loss_bin1)
if target_bin[:, 0].nonzero().shape[0] > 0:
idx1 = target_bin[:, 0].nonzero()[:, 0]
valid_output1 = torch.index_select(output, 0, idx1.long())
valid_target_res1 = torch.index_select(target_res, 0, idx1.long())
loss_sin1 = compute_res_loss(
valid_output1[:, 2], torch.sin(valid_target_res1[:, 0]))
loss_cos1 = compute_res_loss(
valid_output1[:, 3], torch.cos(valid_target_res1[:, 0]))
loss_res += loss_sin1 + loss_cos1
if target_bin[:, 1].nonzero().shape[0] > 0:
idx2 = target_bin[:, 1].nonzero()[:, 0]
valid_output2 = torch.index_select(output, 0, idx2.long())
valid_target_res2 = torch.index_select(target_res, 0, idx2.long())
loss_sin2 = compute_res_loss(
valid_output2[:, 6], torch.sin(valid_target_res2[:, 1]))
loss_cos2 = compute_res_loss(
valid_output2[:, 7], torch.cos(valid_target_res2[:, 1]))
loss_res += loss_sin2 + loss_cos2
return loss_bin1 + loss_bin2 + loss_res
================================================
FILE: Network/rigidmask/det_utils.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import torch
import torch.nn as nn
def _sigmoid(x):
y = torch.clamp(x.sigmoid_(), min=1e-4, max=1-1e-4)
return y
def _gather_feat(feat, ind, mask=None):
dim = feat.size(2)
ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim)
feat = feat.gather(1, ind)
if mask is not None:
mask = mask.unsqueeze(2).expand_as(feat)
feat = feat[mask]
feat = feat.view(-1, dim)
return feat
def _transpose_and_gather_feat(feat, ind):
feat = feat.permute(0, 2, 3, 1).contiguous()
feat = feat.view(feat.size(0), -1, feat.size(3))
feat = _gather_feat(feat, ind)
return feat
def flip_tensor(x):
return torch.flip(x, [3])
# tmp = x.detach().cpu().numpy()[..., ::-1].copy()
# return torch.from_numpy(tmp).to(x.device)
def flip_lr(x, flip_idx):
tmp = x.detach().cpu().numpy()[..., ::-1].copy()
shape = tmp.shape
for e in flip_idx:
tmp[:, e[0], ...], tmp[:, e[1], ...] = \
tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy()
return torch.from_numpy(tmp.reshape(shape)).to(x.device)
def flip_lr_off(x, flip_idx):
tmp = x.detach().cpu().numpy()[..., ::-1].copy()
shape = tmp.shape
tmp = tmp.reshape(tmp.shape[0], 17, 2,
tmp.shape[2], tmp.shape[3])
tmp[:, :, 0, :, :] *= -1
for e in flip_idx:
tmp[:, e[0], ...], tmp[:, e[1], ...] = \
tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy()
return torch.from_numpy(tmp.reshape(shape)).to(x.device)
================================================
FILE: Network/rigidmask/networks/DCNv2/.gitignore
================================================
.vscode
.idea
*.so
*.o
*pyc
_ext
build
DCNv2.egg-info
dist
================================================
FILE: Network/rigidmask/networks/DCNv2/DCN/__init__.py
================================================
from .dcn_v2 import *
================================================
FILE: Network/rigidmask/networks/DCNv2/DCN/dcn_v2.py
================================================
#!/usr/bin/env python
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
import math
import torch
from torch import nn
from torch.autograd import Function
from torch.nn.modules.utils import _pair
from torch.autograd.function import once_differentiable
import _ext as _backend
class _DCNv2(Function):
@staticmethod
def forward(ctx, input, offset, mask, weight, bias,
stride, padding, dilation, deformable_groups):
ctx.stride = _pair(stride)
ctx.padding = _pair(padding)
ctx.dilation = _pair(dilation)
ctx.kernel_size = _pair(weight.shape[2:4])
ctx.deformable_groups = deformable_groups
output = _backend.dcn_v2_forward(input, weight, bias,
offset, mask,
ctx.kernel_size[0], ctx.kernel_size[1],
ctx.stride[0], ctx.stride[1],
ctx.padding[0], ctx.padding[1],
ctx.dilation[0], ctx.dilation[1],
ctx.deformable_groups)
ctx.save_for_backward(input, offset, mask, weight, bias)
return output
@staticmethod
@once_differentiable
def backward(ctx, grad_output):
input, offset, mask, weight, bias = ctx.saved_tensors
grad_input, grad_offset, grad_mask, grad_weight, grad_bias = \
_backend.dcn_v2_backward(input, weight,
bias,
offset, mask,
grad_output,
ctx.kernel_size[0], ctx.kernel_size[1],
ctx.stride[0], ctx.stride[1],
ctx.padding[0], ctx.padding[1],
ctx.dilation[0], ctx.dilation[1],
ctx.deformable_groups)
return grad_input, grad_offset, grad_mask, grad_weight, grad_bias,\
None, None, None, None,
dcn_v2_conv = _DCNv2.apply
class DCNv2(nn.Module):
def __init__(self, in_channels, out_channels,
kernel_size, stride, padding, dilation=1, deformable_groups=1):
super(DCNv2, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = _pair(kernel_size)
self.stride = _pair(stride)
self.padding = _pair(padding)
self.dilation = _pair(dilation)
self.deformable_groups = deformable_groups
self.weight = nn.Parameter(torch.Tensor(
out_channels, in_channels, *self.kernel_size))
self.bias = nn.Parameter(torch.Tensor(out_channels))
self.reset_parameters()
def reset_parameters(self):
n = self.in_channels
for k in self.kernel_size:
n *= k
stdv = 1. / math.sqrt(n)
self.weight.data.uniform_(-stdv, stdv)
self.bias.data.zero_()
def forward(self, input, offset, mask):
assert 2 * self.deformable_groups * self.kernel_size[0] * self.kernel_size[1] == \
offset.shape[1]
assert self.deformable_groups * self.kernel_size[0] * self.kernel_size[1] == \
mask.shape[1]
return dcn_v2_conv(input, offset, mask,
self.weight,
self.bias,
self.stride,
self.padding,
self.dilation,
self.deformable_groups)
class DCN(DCNv2):
def __init__(self, in_channels, out_channels,
kernel_size, stride, padding,
dilation=1, deformable_groups=1):
super(DCN, self).__init__(in_channels, out_channels,
kernel_size, stride, padding, dilation, deformable_groups)
channels_ = self.deformable_groups * 3 * self.kernel_size[0] * self.kernel_size[1]
self.conv_offset_mask = nn.Conv2d(self.in_channels,
channels_,
kernel_size=self.kernel_size,
stride=self.stride,
padding=self.padding,
bias=True)
self.init_offset()
def init_offset(self):
self.conv_offset_mask.weight.data.zero_()
self.conv_offset_mask.bias.data.zero_()
def forward(self, input):
out = self.conv_offset_mask(input)
o1, o2, mask = torch.chunk(out, 3, dim=1)
offset = torch.cat((o1, o2), dim=1)
mask = torch.sigmoid(mask)
return dcn_v2_conv(input, offset, mask,
self.weight, self.bias,
self.stride,
self.padding,
self.dilation,
self.deformable_groups)
class _DCNv2Pooling(Function):
@staticmethod
def forward(ctx, input, rois, offset,
spatial_scale,
pooled_size,
output_dim,
no_trans,
group_size=1,
part_size=None,
sample_per_part=4,
trans_std=.0):
ctx.spatial_scale = spatial_scale
ctx.no_trans = int(no_trans)
ctx.output_dim = output_dim
ctx.group_size = group_size
ctx.pooled_size = pooled_size
ctx.part_size = pooled_size if part_size is None else part_size
ctx.sample_per_part = sample_per_part
ctx.trans_std = trans_std
output, output_count = \
_backend.dcn_v2_psroi_pooling_forward(input, rois, offset,
ctx.no_trans, ctx.spatial_scale,
ctx.output_dim, ctx.group_size,
ctx.pooled_size, ctx.part_size,
ctx.sample_per_part, ctx.trans_std)
ctx.save_for_backward(input, rois, offset, output_count)
return output
@staticmethod
@once_differentiable
def backward(ctx, grad_output):
input, rois, offset, output_count = ctx.saved_tensors
grad_input, grad_offset = \
_backend.dcn_v2_psroi_pooling_backward(grad_output,
input,
rois,
offset,
output_count,
ctx.no_trans,
ctx.spatial_scale,
ctx.output_dim,
ctx.group_size,
ctx.pooled_size,
ctx.part_size,
ctx.sample_per_part,
ctx.trans_std)
return grad_input, None, grad_offset, \
None, None, None, None, None, None, None, None
dcn_v2_pooling = _DCNv2Pooling.apply
class DCNv2Pooling(nn.Module):
def __init__(self,
spatial_scale,
pooled_size,
output_dim,
no_trans,
group_size=1,
part_size=None,
sample_per_part=4,
trans_std=.0):
super(DCNv2Pooling, self).__init__()
self.spatial_scale = spatial_scale
self.pooled_size = pooled_size
self.output_dim = output_dim
self.no_trans = no_trans
self.group_size = group_size
self.part_size = pooled_size if part_size is None else part_size
self.sample_per_part = sample_per_part
self.trans_std = trans_std
def forward(self, input, rois, offset):
assert input.shape[1] == self.output_dim
if self.no_trans:
offset = input.new()
return dcn_v2_pooling(input, rois, offset,
self.spatial_scale,
self.pooled_size,
self.output_dim,
self.no_trans,
self.group_size,
self.part_size,
self.sample_per_part,
self.trans_std)
class DCNPooling(DCNv2Pooling):
def __init__(self,
spatial_scale,
pooled_size,
output_dim,
no_trans,
group_size=1,
part_size=None,
sample_per_part=4,
trans_std=.0,
deform_fc_dim=1024):
super(DCNPooling, self).__init__(spatial_scale,
pooled_size,
output_dim,
no_trans,
group_size,
part_size,
sample_per_part,
trans_std)
self.deform_fc_dim = deform_fc_dim
if not no_trans:
self.offset_mask_fc = nn.Sequential(
nn.Linear(self.pooled_size * self.pooled_size *
self.output_dim, self.deform_fc_dim),
nn.ReLU(inplace=True),
nn.Linear(self.deform_fc_dim, self.deform_fc_dim),
nn.ReLU(inplace=True),
nn.Linear(self.deform_fc_dim, self.pooled_size *
self.pooled_size * 3)
)
self.offset_mask_fc[4].weight.data.zero_()
self.offset_mask_fc[4].bias.data.zero_()
def forward(self, input, rois):
offset = input.new()
if not self.no_trans:
# do roi_align first
n = rois.shape[0]
roi = dcn_v2_pooling(input, rois, offset,
self.spatial_scale,
self.pooled_size,
self.output_dim,
True, # no trans
self.group_size,
self.part_size,
self.sample_per_part,
self.trans_std)
# build mask and offset
offset_mask = self.offset_mask_fc(roi.view(n, -1))
offset_mask = offset_mask.view(
n, 3, self.pooled_size, self.pooled_size)
o1, o2, mask = torch.chunk(offset_mask, 3, dim=1)
offset = torch.cat((o1, o2), dim=1)
mask = torch.sigmoid(mask)
# do pooling with offset and mask
return dcn_v2_pooling(input, rois, offset,
self.spatial_scale,
self.pooled_size,
self.output_dim,
self.no_trans,
self.group_size,
self.part_size,
self.sample_per_part,
self.trans_std) * mask
# only roi_align
return dcn_v2_pooling(input, rois, offset,
self.spatial_scale,
self.pooled_size,
self.output_dim,
self.no_trans,
self.group_size,
self.part_size,
self.sample_per_part,
self.trans_std)
================================================
FILE: Network/rigidmask/networks/DCNv2/DCN/src/cpu/dcn_v2_cpu.cpp
================================================
#include <vector>
#include "cpu/dcn_v2_im2col_cpu.h"
#include <iostream>
#include <ATen/ATen.h>
//#include <ATen/cuda/CUDAContext.h>
#include <TH/TH.h>
//#include <THC/THCAtomics.cuh>
//#include <THC/THCDeviceUtils.cuh>
//extern THCState *state;
// author: Charles Shang
// https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu
// modified from the CUDA version for CPU use by Daniel K. Suhendro
// edit by: James Bockman and Matthew Howe
// modified for torch implementation to remove use of deprecated torch access to Blas
at::Tensor
dcn_v2_cpu_forward(const at::Tensor &input,
const at::Tensor &weight,
const at::Tensor &bias,
const at::Tensor &offset,
const at::Tensor &mask,
const int kernel_h,
const int kernel_w,
const int stride_h,
const int stride_w,
const int pad_h,
const int pad_w,
const int dilation_h,
const int dilation_w,
const int deformable_group)
{
// THCAssertSameGPU(THCudaTensor_checkGPU(state, 5, input, weight, bias, offset, mask));
/*AT_ASSERTM(input.type().is_cuda(), "input must be a CUDA tensor");
AT_ASSERTM(weight.type().is_cuda(), "weight must be a CUDA tensor");
AT_ASSERTM(bias.type().is_cuda(), "bias must be a CUDA tensor");
AT_ASSERTM(offset.type().is_cuda(), "offset must be a CUDA tensor");
AT_ASSERTM(mask.type().is_cuda(), "mask must be a CUDA tensor");*/
const int batch = input.size(0);
const int channels = input.size(1);
const int height = input.size(2);
const int width = input.size(3);
const int channels_out = weight.size(0);
const int channels_kernel = weight.size(1);
const int kernel_h_ = weight.size(2);
const int kernel_w_ = weight.size(3);
// printf("Kernels: %d %d %d %d\n", kernel_h_, kernel_w_, kernel_w, kernel_h);
// printf("Channels: %d %d\n", channels, channels_kernel);
// printf("Channels: %d %d\n", channels_out, channels_kernel);
AT_ASSERTM(kernel_h_ == kernel_h && kernel_w_ == kernel_w,
"Input shape and kernel shape wont match: (%d x %d vs %d x %d).", kernel_h_, kernel_w, kernel_h_, kernel_w_);
AT_ASSERTM(channels == channels_kernel,
"Input shape and kernel channels wont match: (%d vs %d).", channels, channels_kernel);
const int height_out = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
const int width_out = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
// auto ones = at::ones({height_out, width_out}, input.options());
auto ones = at::ones({bias.sizes()[0], height_out, width_out}, input.options());
auto columns = at::empty({channels * kernel_h * kernel_w, 1 * height_out * width_out}, input.options());
auto output = at::zeros({batch, channels_out, height_out, width_out}, input.options());
using scalar_t = float;
for (int b = 0; b < batch; b++)
{
auto input_n = input.select(0, b);
auto offset_n = offset.select(0, b);
auto mask_n = mask.select(0, b);
auto output_n = output.select(0, b);
// std::cout << "output_n: " << output_n << "output.select(0,b): " << output.select(0,b) << "\n";
// Do Bias first:
// M,N,K are dims of matrix A and B
// (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
// (N x 1) (1 x M)
// torch implementation
auto ones_T = at::transpose(ones.contiguous(), 2, 0);
ones_T = at::mul(ones_T, bias.contiguous());
ones_T = at::transpose(ones_T, 2, 0);
output_n = at::add(output_n, ones_T);
modulated_deformable_im2col_cpu(input_n.data_ptr<scalar_t>(),
offset_n.data_ptr<scalar_t>(),
mask_n.data_ptr<scalar_t>(),
1, channels, height, width,
height_out, width_out, kernel_h, kernel_w,
pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w,
deformable_group,
columns.data_ptr<scalar_t>());
//(k * m) x (m * n)
// Y = WC
// torch implementation
auto weight_flat = weight.view({channels_out, channels * kernel_h * kernel_w});
auto product = at::matmul(weight_flat, columns);
output.select(0, b) = at::add(output_n, product.view({channels_out, height_out, width_out}));
}
return output;
}
std::vector<at::Tensor> dcn_v2_cpu_backward(const at::Tensor &input,
const at::Tensor &weight,
const at::Tensor &bias,
const at::Tensor &offset,
const at::Tensor &mask,
const at::Tensor &grad_output,
int kernel_h, int kernel_w,
int stride_h, int stride_w,
int pad_h, int pad_w,
int dilation_h, int dilation_w,
int deformable_group)
{
THArgCheck(input.is_contiguous(), 1, "input tensor has to be contiguous");
THArgCheck(weight.is_contiguous(), 2, "weight tensor has to be contiguous");
/*AT_ASSERTM(input.type().is_cuda(), "input must be a CUDA tensor");
AT_ASSERTM(weight.type().is_cuda(), "weight must be a CUDA tensor");
AT_ASSERTM(bias.type().is_cuda(), "bias must be a CUDA tensor");
AT_ASSERTM(offset.type().is_cuda(), "offset must be a CUDA tensor");
AT_ASSERTM(mask.type().is_cuda(), "mask must be a CUDA tensor");*/
const int batch = input.size(0);
const int channels = input.size(1);
const int height = input.size(2);
const int width = input.size(3);
const int channels_out = weight.size(0);
const int channels_kernel = weight.size(1);
const int kernel_h_ = weight.size(2);
const int kernel_w_ = weight.size(3);
AT_ASSERTM(kernel_h_ == kernel_h && kernel_w_ == kernel_w,
"Input shape and kernel shape wont match: (%d x %d vs %d x %d).", kernel_h_, kernel_w, kernel_h_, kernel_w_);
AT_ASSERTM(channels == channels_kernel,
"Input shape and kernel channels wont match: (%d vs %d).", channels, channels_kernel);
const int height_out = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
const int width_out = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
auto ones = at::ones({height_out, width_out}, input.options());
auto columns = at::zeros({channels * kernel_h * kernel_w, 1 * height_out * width_out}, input.options());
auto output = at::empty({batch, channels_out, height_out, width_out}, input.options());
auto grad_input = at::zeros_like(input);
auto grad_weight = at::zeros_like(weight);
auto grad_bias = at::zeros_like(bias);
auto grad_offset = at::zeros_like(offset);
auto grad_mask = at::zeros_like(mask);
using scalar_t = float;
for (int b = 0; b < batch; b++)
{
auto input_n = input.select(0, b);
auto offset_n = offset.select(0, b);
auto mask_n = mask.select(0, b);
auto grad_output_n = grad_output.select(0, b);
auto grad_input_n = grad_input.select(0, b);
auto grad_offset_n = grad_offset.select(0, b);
auto grad_mask_n = grad_mask.select(0, b);
// Torch implementation
auto weight_flat = weight.view({channels_out, channels*kernel_h*kernel_w});
weight_flat = at::transpose(weight_flat, 1, 0);
auto grad_output_n_flat = grad_output_n.view({channels_out, height_out*width_out});
columns = at::matmul(weight_flat, grad_output_n_flat);
// gradient w.r.t. input coordinate data
modulated_deformable_col2im_coord_cpu(columns.data_ptr<scalar_t>(),
input_n.data_ptr<scalar_t>(),
offset_n.data_ptr<scalar_t>(),
mask_n.data_ptr<scalar_t>(),
1, channels, height, width,
height_out, width_out, kernel_h, kernel_w,
pad_h, pad_w, stride_h, stride_w,
dilation_h, dilation_w, deformable_group,
grad_offset_n.data_ptr<scalar_t>(),
grad_mask_n.data_ptr<scalar_t>());
// gradient w.r.t. input data
modulated_deformable_col2im_cpu(columns.data_ptr<scalar_t>(),
offset_n.data_ptr<scalar_t>(),
mask_n.data_ptr<scalar_t>(),
1, channels, height, width,
height_out, width_out, kernel_h, kernel_w,
pad_h, pad_w, stride_h, stride_w,
dilation_h, dilation_w, deformable_group,
grad_input_n.data_ptr<scalar_t>());
// gradient w.r.t. weight, dWeight should accumulate across the batch and group
modulated_deformable_im2col_cpu(input_n.data_ptr<scalar_t>(),
offset_n.data_ptr<scalar_t>(),
mask_n.data_ptr<scalar_t>(),
1, channels, height, width,
height_out, width_out, kernel_h, kernel_w,
pad_h, pad_w, stride_h, stride_w,
dilation_h, dilation_w, deformable_group,
columns.data_ptr<scalar_t>());
// Torch implementation
auto product = at::matmul(grad_output_n_flat, at::transpose(columns, 1, 0));
grad_weight = at::add(grad_weight, product.view({channels_out, channels, kernel_h, kernel_w}));
// Torch implementation
auto ones_flat = ones.view({height_out*width_out});
product = at::matmul(grad_output_n_flat, ones_flat);
grad_bias = at::add(grad_bias, product);
}
return {
grad_input, grad_offset, grad_mask, grad_weight, grad_bias
};
}
================================================
FILE: Network/rigidmask/networks/DCNv2/DCN/src/cpu/dcn_v2_im2col_cpu.cpp
================================================
#include "dcn_v2_im2col_cpu.h"
#include <cstdio>
#include <algorithm>
#include <cstring>
#include <ATen/ATen.h>
//#include <ATen/cuda/CUDAContext.h>
#include <TH/TH.h>
//#include <THC/THCAtomics.cuh>
//#include <THC/THCDeviceUtils.cuh>
// modified from the CUDA version for CPU use by Daniel K. Suhendro
/*#define CUDA_KERNEL_LOOP(i, n) \
for (int i = blockIdx.x * blockDim.x + threadIdx.x; \
i < (n); \
i += blockDim.x * gridDim.x)
const int CUDA_NUM_THREADS = 1024;
inline int GET_BLOCKS(const int N)
{
return (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS;
}*/
float dmcn_im2col_bilinear_cpu(const float *bottom_data, const int data_width,
const int height, const int width, float h, float w)
{
int h_low = floor(h);
int w_low = floor(w);
int h_high = h_low + 1;
int w_high = w_low + 1;
float lh = h - h_low;
float lw = w - w_low;
float hh = 1 - lh, hw = 1 - lw;
float v1 = 0;
if (h_low >= 0 && w_low >= 0)
v1 = bottom_data[h_low * data_width + w_low];
float v2 = 0;
if (h_low >= 0 && w_high <= width - 1)
v2 = bottom_data[h_low * data_width + w_high];
float v3 = 0;
if (h_high <= height - 1 && w_low >= 0)
v3 = bottom_data[h_high * data_width + w_low];
float v4 = 0;
if (h_high <= height - 1 && w_high <= width - 1)
v4 = bottom_data[h_high * data_width + w_high];
float w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;
float val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
return val;
}
float dmcn_get_gradient_weight_cpu(float argmax_h, float argmax_w,
const int h, const int w, const int height, const int width)
{
if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width)
{
//empty
return 0;
}
int argmax_h_low = floor(argmax_h);
int argmax_w_low = floor(argmax_w);
int argmax_h_high = argmax_h_low + 1;
int argmax_w_high = argmax_w_low + 1;
float weight = 0;
if (h == argmax_h_low && w == argmax_w_low)
weight = (h + 1 - argmax_h) * (w + 1 - argmax_w);
if (h == argmax_h_low && w == argmax_w_high)
weight = (h + 1 - argmax_h) * (argmax_w + 1 - w);
if (h == argmax_h_high && w == argmax_w_low)
weight = (argmax_h + 1 - h) * (w + 1 - argmax_w);
if (h == argmax_h_high && w == argmax_w_high)
weight = (argmax_h + 1 - h) * (argmax_w + 1 - w);
return weight;
}
float dmcn_get_coordinate_weight_cpu(float argmax_h, float argmax_w,
const int height, const int width, const float *im_data,
const int data_width, const int bp_dir)
{
if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width)
{
//empty
return 0;
}
int argmax_h_low = floor(argmax_h);
int argmax_w_low = floor(argmax_w);
int argmax_h_high = argmax_h_low + 1;
int argmax_w_high = argmax_w_low + 1;
float weight = 0;
if (bp_dir == 0)
{
if (argmax_h_low >= 0 && argmax_w_low >= 0)
weight += -1 * (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_low * data_width + argmax_w_low];
if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
weight += -1 * (argmax_w - argmax_w_low) * im_data[argmax_h_low * data_width + argmax_w_high];
if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
weight += (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_high * data_width + argmax_w_low];
if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
weight += (argmax_w - argmax_w_low) * im_data[argmax_h_high * data_width + argmax_w_high];
}
else if (bp_dir == 1)
{
if (argmax_h_low >= 0 && argmax_w_low >= 0)
weight += -1 * (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_low];
if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
weight += (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_high];
if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
weight += -1 * (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_low];
if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
weight += (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_high];
}
return weight;
}
void modulated_deformable_im2col_cpu_kernel(const int n, const float *data_im, const float *data_offset, const float *data_mask,
const int height, const int width, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w,
const int stride_h, const int stride_w,
const int dilation_h, const int dilation_w,
const int channel_per_deformable_group,
const int batch_size, const int num_channels, const int deformable_group,
const int height_col, const int width_col,
float *data_col)
{
// launch channels * batch_size * height_col * width_col cores
for(int index=0; index<n; index++)
{
// NOTE(CharlesShang): different from Dai Jifeng's MXNet implementation, col_buffer is of shape (c*kw*kh, N, oh, ow)
// here columns is of shape (N, c*kw*kh, oh * ow), need to adapt axis
// index index of output matrix
const int w_col = index % width_col;
const int h_col = (index / width_col) % height_col;
// const int b_col = (index / width_col / height_col) % batch_size;
const int b_col = (index / width_col / height_col / num_channels) % batch_size;
// const int c_im = (index / width_col / height_col) / batch_size;
const int c_im = (index / width_col / height_col) % num_channels;
// const int c_col = c_im * kernel_h * kernel_w;
const int c_col = c_im * kernel_h * kernel_w;
// compute deformable group index
const int deformable_group_index = c_im / channel_per_deformable_group;
const int h_in = h_col * stride_h - pad_h;
const int w_in = w_col * stride_w - pad_w;
// float *data_col_ptr = data_col + ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col;
float *data_col_ptr = data_col + ((b_col * num_channels * kernel_w * kernel_h + c_col) * height_col + h_col) * width_col + w_col;
//const float* data_im_ptr = data_im + ((b_col * num_channels + c_im) * height + h_in) * width + w_in;
const float *data_im_ptr = data_im + (b_col * num_channels + c_im) * height * width;
const float *data_offset_ptr = data_offset + (b_col * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col;
const float *data_mask_ptr = data_mask + (b_col * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col;
for (int i = 0; i < kernel_h; ++i)
{
for (int j = 0; j < kernel_w; ++j)
{
const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col;
const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col + w_col;
const int data_mask_hw_ptr = ((i * kernel_w + j) * height_col + h_col) * width_col + w_col;
const float offset_h = data_offset_ptr[data_offset_h_ptr];
const float offset_w = data_offset_ptr[data_offset_w_ptr];
const float mask = data_mask_ptr[data_mask_hw_ptr];
float val = static_cast<float>(0);
const float h_im = h_in + i * dilation_h + offset_h;
const float w_im = w_in + j * dilation_w + offset_w;
//if (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) {
if (h_im > -1 && w_im > -1 && h_im < height && w_im < width)
{
//const float map_h = i * dilation_h + offset_h;
//const float map_w = j * dilation_w + offset_w;
//const int cur_height = height - h_in;
//const int cur_width = width - w_in;
//val = dmcn_im2col_bilinear_cpu(data_im_ptr, width, cur_height, cur_width, map_h, map_w);
val = dmcn_im2col_bilinear_cpu(data_im_ptr, width, height, width, h_im, w_im);
}
*data_col_ptr = val * mask;
// data_col_ptr += batch_size * height_col * width_col;
data_col_ptr += height_col * width_col;
}
}
}
}
void modulated_deformable_col2im_cpu_kernel(const int n, const float *data_col, const float *data_offset, const float *data_mask,
const int channels, const int height, const int width,
const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w,
const int stride_h, const int stride_w,
const int dilation_h, const int dilation_w,
const int channel_per_deformable_group,
const int batch_size, const int deformable_group,
const int height_col, const int width_col,
float *grad_im)
{
for(int index = 0; index < n; index++)
{
const int j = (index / width_col / height_col / batch_size) % kernel_w;
const int i = (index / width_col / height_col / batch_size / kernel_w) % kernel_h;
const int c = index / width_col / height_col / batch_size / kernel_w / kernel_h;
// compute the start and end of the output
const int deformable_group_index = c / channel_per_deformable_group;
int w_out = index % width_col;
int h_out = (index / width_col) % height_col;
int b = (index / width_col / height_col) % batch_size;
int w_in = w_out * stride_w - pad_w;
int h_in = h_out * stride_h - pad_h;
const float *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col;
const float *data_mask_ptr = data_mask + (b * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col;
const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out;
const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out;
const int data_mask_hw_ptr = ((i * kernel_w + j) * height_col + h_out) * width_col + w_out;
const float offset_h = data_offset_ptr[data_offset_h_ptr];
const float offset_w = data_offset_ptr[data_offset_w_ptr];
const float mask = data_mask_ptr[data_mask_hw_ptr];
const float cur_inv_h_data = h_in + i * dilation_h + offset_h;
const float cur_inv_w_data = w_in + j * dilation_w + offset_w;
const float cur_top_grad = data_col[index] * mask;
const int cur_h = (int)cur_inv_h_data;
const int cur_w = (int)cur_inv_w_data;
for (int dy = -2; dy <= 2; dy++)
{
for (int dx = -2; dx <= 2; dx++)
{
if (cur_h + dy >= 0 && cur_h + dy < height &&
cur_w + dx >= 0 && cur_w + dx < width &&
abs(cur_inv_h_data - (cur_h + dy)) < 1 &&
abs(cur_inv_w_data - (cur_w + dx)) < 1)
{
int cur_bottom_grad_pos = ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx;
float weight = dmcn_get_gradient_weight_cpu(cur_inv_h_data, cur_inv_w_data, cur_h + dy, cur_w + dx, height, width);
//atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad);
*(grad_im + cur_bottom_grad_pos) += weight * cur_top_grad;
}
}
}
}
}
void modulated_deformable_col2im_coord_cpu_kernel(const int n, const float *data_col, const float *data_im,
const float *data_offset, const float *data_mask,
const int channels, const int height, const int width,
const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w,
const int stride_h, const int stride_w,
const int dilation_h, const int dilation_w,
const int channel_per_deformable_group,
gitextract_hty7qque/ ├── .gitignore ├── Datasets/ │ ├── __init__.py │ ├── cowmask.py │ ├── flowlib.py │ ├── segmask_gt.py │ ├── tartanTrajFlowDataset.py │ ├── util_flow.py │ └── utils.py ├── DytanVO.py ├── LICENSE ├── Network/ │ ├── PWC/ │ │ ├── PWCNet.py │ │ ├── __init__.py │ │ └── correlation.py │ ├── VOFlowNet.py │ ├── VONet.py │ ├── __init__.py │ └── rigidmask/ │ ├── .gitignore │ ├── VCNplus.py │ ├── __init__.py │ ├── conv4d.py │ ├── det.py │ ├── det_losses.py │ ├── det_utils.py │ ├── networks/ │ │ ├── DCNv2/ │ │ │ ├── .gitignore │ │ │ ├── DCN/ │ │ │ │ ├── __init__.py │ │ │ │ ├── dcn_v2.py │ │ │ │ ├── src/ │ │ │ │ │ ├── cpu/ │ │ │ │ │ │ ├── dcn_v2_cpu.cpp │ │ │ │ │ │ ├── dcn_v2_im2col_cpu.cpp │ │ │ │ │ │ ├── dcn_v2_im2col_cpu.h │ │ │ │ │ │ ├── dcn_v2_psroi_pooling_cpu.cpp │ │ │ │ │ │ └── vision.h │ │ │ │ │ ├── cuda/ │ │ │ │ │ │ ├── dcn_v2_cuda.cu │ │ │ │ │ │ ├── dcn_v2_im2col_cuda.cu │ │ │ │ │ │ ├── dcn_v2_im2col_cuda.h │ │ │ │ │ │ ├── dcn_v2_psroi_pooling_cuda.cu │ │ │ │ │ │ └── vision.h │ │ │ │ │ ├── dcn_v2.h │ │ │ │ │ └── vision.cpp │ │ │ │ ├── testcpu.py │ │ │ │ └── testcuda.py │ │ │ ├── LICENSE │ │ │ ├── README.md │ │ │ ├── make.sh │ │ │ └── setup.py │ │ ├── dlav0.py │ │ ├── large_hourglass.py │ │ ├── msra_resnet.py │ │ ├── pose_dla_dcn.py │ │ └── resnet_dcn.py │ └── submodule.py ├── README.md ├── environment.yml ├── evaluator/ │ ├── __init__.py │ ├── evaluate_ate_scale.py │ ├── evaluate_kitti.py │ ├── evaluate_rpe.py │ ├── evaluator_base.py │ ├── tartanair_evaluator.py │ ├── trajectory_transform.py │ └── transformation.py └── vo_trajectory_from_folder.py
SYMBOL INDEX (498 symbols across 38 files)
FILE: Datasets/cowmask.py
function gaussian_kernels (line 34) | def gaussian_kernels(sigma, max_sigma):
function cow_masks (line 49) | def cow_masks(mask_size, log_sigma_range, max_sigma, prop_range):
FILE: Datasets/flowlib.py
function show_flow (line 30) | def show_flow(filename):
function point_vec (line 42) | def point_vec(img,flow,skip=10):
function visualize_flow (line 63) | def visualize_flow(flow, mode='Y'):
function read_flow (line 101) | def read_flow(filename):
function write_flo (line 122) | def write_flo(flow, filename):
function write_flow (line 147) | def write_flow(flow, filename):
function save_flow_image (line 166) | def save_flow_image(flow, image_file):
function flowfile_to_imagefile (line 178) | def flowfile_to_imagefile(flow_file, image_file):
function segment_flow (line 189) | def segment_flow(flow):
function flow_error (line 226) | def flow_error(tu, tv, u, v):
function flow_to_image (line 281) | def flow_to_image(flow):
function evaluate_flow_file (line 319) | def evaluate_flow_file(gt_file, pred_file):
function evaluate_flow (line 334) | def evaluate_flow(gt_flow, pred_flow):
function read_disp_png (line 350) | def read_disp_png(file_name):
function disp_to_flowfile (line 368) | def disp_to_flowfile(disp, filename):
function read_image (line 396) | def read_image(filename):
function warp_image (line 407) | def warp_image(im, flow):
function pfm_to_flo (line 452) | def pfm_to_flo(pfm_file):
function scale_image (line 459) | def scale_image(image, new_range):
function compute_color (line 474) | def compute_color(u, v):
function make_color_wheel (line 518) | def make_color_wheel():
function read_flo_file (line 568) | def read_flo_file(filename):
function read_png_file (line 593) | def read_png_file(flow_file):
function read_pfm_file (line 618) | def read_pfm_file(flow_file):
function resample (line 629) | def resample(img, sz):
FILE: Datasets/segmask_gt.py
function dataloader (line 22) | def dataloader(filepath, fpass='frames_cleanpass', level=6):
function default_loader (line 79) | def default_loader(path):
function flow_loader (line 82) | def flow_loader(path):
function load_exts (line 90) | def load_exts(cam_file):
function disparity_loader (line 103) | def disparity_loader(path):
function triangulation (line 112) | def triangulation(disp, xcoord, ycoord, bl=1, fl = 450, cx = 479.5, cy =...
function exp_loader (line 121) | def exp_loader(index, iml0s, iml1s, flowl0s, disp0s=None, dispcs=None, c...
function motionmask (line 214) | def motionmask(flowl0, depthflow, RT01):
FILE: Datasets/tartanTrajFlowDataset.py
class TrajFolderDataset (line 16) | class TrajFolderDataset(Dataset):
method __init__ (line 19) | def __init__(self, imgfolder, transform = None,
method __len__ (line 38) | def __len__(self):
method __getitem__ (line 41) | def __getitem__(self, idx):
FILE: Datasets/util_flow.py
function readPFM (line 26) | def readPFM(file):
function save_pfm (line 65) | def save_pfm(file, image, scale = 1):
function ReadMiddleburyFloFile (line 92) | def ReadMiddleburyFloFile(path):
function ReadKittiPngFile (line 122) | def ReadKittiPngFile(path):
function WriteMiddleburyFloFile (line 159) | def WriteMiddleburyFloFile(path, width, height, u, v, mask=None):
function write_flow (line 181) | def write_flow(path,flow):
function WriteKittiPngFile (line 197) | def WriteKittiPngFile(path, width, height, u, v, mask=None):
function ConvertMiddleburyFloToKittiPng (line 219) | def ConvertMiddleburyFloToKittiPng(src_path, dest_path):
function ConvertKittiPngToMiddleburyFlo (line 223) | def ConvertKittiPngToMiddleburyFlo(src_path, dest_path):
function ParseFilenameKitti (line 228) | def ParseFilenameKitti(filename):
function read_calib_file (line 239) | def read_calib_file(filepath):
function load_calib_cam_to_cam (line 255) | def load_calib_cam_to_cam(cam_to_cam_file):
FILE: Datasets/utils.py
class Compose (line 29) | class Compose(object):
method __init__ (line 42) | def __init__(self, transforms):
method __call__ (line 45) | def __call__(self, img):
class DownscaleFlow (line 51) | class DownscaleFlow(object):
method __init__ (line 56) | def __init__(self, scale=4):
method __call__ (line 62) | def __call__(self, sample):
class CropCenter (line 76) | class CropCenter(object):
method __init__ (line 81) | def __init__(self, size):
method __call__ (line 87) | def __call__(self, sample):
class ResizeData (line 119) | class ResizeData(object):
method __init__ (line 123) | def __init__(self, size):
method __call__ (line 129) | def __call__(self, sample):
class ToTensor (line 149) | class ToTensor(object):
method __call__ (line 150) | def __call__(self, sample):
function tensor2img (line 168) | def tensor2img(tensImg,mean,std):
function bilinear_interpolate (line 180) | def bilinear_interpolate(img, h, w):
function calculate_angle_distance_from_du_dv (line 206) | def calculate_angle_distance_from_du_dv(du, dv, flagDegree=False):
function visflow (line 220) | def visflow(flownp, maxF=500.0, n=8, mask=None, hueMax=179, angShift=0.0):
function dataset_intrinsics (line 251) | def dataset_intrinsics(dataset='tartanair', is_15mm=False):
function plot_traj (line 278) | def plot_traj(gtposes, estposes, vis=False, savefigname=None, title=''):
function make_intrinsics_layer (line 295) | def make_intrinsics_layer(w, h, fx, fy, ox, oy):
function load_kiiti_intrinsics (line 303) | def load_kiiti_intrinsics(filename):
function load_sceneflow_extrinsics (line 326) | def load_sceneflow_extrinsics(filename):
FILE: DytanVO.py
class DytanVO (line 48) | class DytanVO(object):
method __init__ (line 49) | def __init__(self, vo_model_name, seg_model_name, image_height, image_...
method load_vo_model (line 100) | def load_vo_model(self, model, modelname):
method load_seg_model (line 120) | def load_seg_model(self, model, modelname):
method test_batch (line 130) | def test_batch(self, sample, intrinsics, seg_thresh, iter_num):
method initialize_segnet_input (line 229) | def initialize_segnet_input(self, imgL_o, intrinsics):
method transform_segnet_input (line 255) | def transform_segnet_input(self, imgL_o, imgR_o):
FILE: Network/PWC/PWCNet.py
function conv (line 16) | def conv(in_planes, out_planes, kernel_size=3, stride=1, padding=1, dila...
function predict_flow (line 22) | def predict_flow(in_planes):
function deconv (line 25) | def deconv(in_planes, out_planes, kernel_size=4, stride=2, padding=1):
class PWCDCNet (line 30) | class PWCDCNet(nn.Module):
method __init__ (line 35) | def __init__(self, md=4, flow_norm=20.0):
method warp (line 133) | def warp(self, x, flo):
method multi_scale_conv (line 171) | def multi_scale_conv(self, conv0_func, conv1_func, conv2_func, conv3_f...
method concate_two_layers (line 179) | def concate_two_layers(self, pred_func, decon_func, upfeat_func, feat_...
method forward (line 191) | def forward(self,x):
function pwc_dc_net (line 236) | def pwc_dc_net(path=None):
FILE: Network/PWC/correlation.py
function cupy_kernel (line 235) | def cupy_kernel(strFunction, objVariables):
function cupy_launch (line 274) | def cupy_launch(strFunction, strKernel):
class _FunctionCorrelation (line 278) | class _FunctionCorrelation(torch.autograd.Function):
method forward (line 280) | def forward(self, first, second):
method backward (line 333) | def backward(self, gradOutput):
function FunctionCorrelation (line 385) | def FunctionCorrelation(tenFirst, tenSecond):
class ModuleCorrelation (line 389) | class ModuleCorrelation(torch.nn.Module):
method __init__ (line 390) | def __init__(self):
method forward (line 394) | def forward(self, tenFirst, tenSecond):
FILE: Network/VOFlowNet.py
function conv (line 38) | def conv(in_planes, out_planes, kernel_size=3, stride=2, padding=1, dila...
function linear (line 51) | def linear(in_planes, out_planes):
class BasicBlock (line 57) | class BasicBlock(nn.Module):
method __init__ (line 59) | def __init__(self, inplanes, planes, stride, downsample, pad, dilation):
method forward (line 68) | def forward(self, x):
class VOFlowRes (line 78) | class VOFlowRes(nn.Module):
method __init__ (line 79) | def __init__(self):
method _make_layer (line 111) | def _make_layer(self, block, planes, blocks, stride, pad, dilation):
method forward (line 125) | def forward(self, x):
FILE: Network/VONet.py
class VONet (line 39) | class VONet(nn.Module):
method __init__ (line 40) | def __init__(self):
method forward (line 46) | def forward(self, x, only_flow=False, only_pose=False):
FILE: Network/rigidmask/VCNplus.py
class flow_reg (line 16) | class flow_reg(nn.Module):
method __init__ (line 25) | def __init__(self, size, ent=False, maxdisp = int(4), fac=1):
method forward (line 44) | def forward(self, x):
class WarpModule (line 91) | class WarpModule(nn.Module):
method __init__ (line 95) | def __init__(self, size):
method forward (line 105) | def forward(self, x, flo):
function get_grid (line 127) | def get_grid(B,H,W):
class SegNet (line 135) | class SegNet(nn.Module):
method __init__ (line 139) | def __init__(self, size, md=[4,4,4,4,4], fac=1., exp_unc=True):
method corrf (line 365) | def corrf(self, refimg_fea, targetimg_fea,maxdisp, fac=1):
method cost_matching (line 390) | def cost_matching(self,up_flow, c1, c2, flowh, enth, level):
method affine (line 445) | def affine(self,pref,flow, pw=1):
method forward_VCN (line 470) | def forward_VCN(self, im):
method forward (line 528) | def forward(self,im,disc_aux=None,flowdc=None):
FILE: Network/rigidmask/conv4d.py
function conv4d (line 13) | def conv4d(data,filters,bias=None,permute_filters=True,use_half=False):
class Conv4d (line 59) | class Conv4d(_ConvNd):
method __init__ (line 64) | def __init__(self, in_channels, out_channels, kernel_size, bias=True, ...
method forward (line 90) | def forward(self, input):
class fullConv4d (line 97) | class fullConv4d(torch.nn.Module):
method __init__ (line 98) | def __init__(self, in_channels, out_channels, kernel_size, bias=True, ...
method forward (line 105) | def forward(self, input):
class butterfly4D (line 112) | class butterfly4D(torch.nn.Module):
method __init__ (line 116) | def __init__(self, fdima, fdimb, withbn=True, full=True,groups=1):
method forward (line 127) | def forward(self,x):
class projfeat4d (line 153) | class projfeat4d(torch.nn.Module):
method __init__ (line 157) | def __init__(self, in_planes, out_planes, stride, with_bn=True,groups=1):
method forward (line 164) | def forward(self,x):
class sepConv4d (line 173) | class sepConv4d(torch.nn.Module):
method __init__ (line 177) | def __init__(self, in_planes, out_planes, stride=(1,1,1), with_bn=True...
method forward (line 209) | def forward(self,x):
class sepConv4dBlock (line 223) | class sepConv4dBlock(torch.nn.Module):
method __init__ (line 228) | def __init__(self, in_planes, out_planes, stride=(1,1,1), with_bn=True...
method forward (line 243) | def forward(self,x):
FILE: Network/rigidmask/det.py
function create_model (line 24) | def create_model(arch, heads, head_conv,num_input):
function load_model (line 31) | def load_model(model, model_path, optimizer=None, resume=False,
function save_model (line 86) | def save_model(path, epoch, model, optimizer=None):
FILE: Network/rigidmask/det_losses.py
function _slow_neg_loss (line 18) | def _slow_neg_loss(pred, gt):
function _neg_loss (line 43) | def _neg_loss(pred, gt, heat_logits):
function _not_faster_neg_loss (line 71) | def _not_faster_neg_loss(pred, gt):
function _slow_reg_loss (line 88) | def _slow_reg_loss(regr, gt_regr, mask):
function _reg_loss (line 99) | def _reg_loss(regr, gt_regr, mask):
class FocalLoss (line 116) | class FocalLoss(nn.Module):
method __init__ (line 118) | def __init__(self):
method forward (line 122) | def forward(self, out, target, logits):
class RegLoss (line 125) | class RegLoss(nn.Module):
method __init__ (line 133) | def __init__(self):
method forward (line 136) | def forward(self, output, mask, ind, target):
class RegL1Loss (line 141) | class RegL1Loss(nn.Module):
method __init__ (line 142) | def __init__(self):
method forward (line 145) | def forward(self, output, mask, ind, target):
class NormRegL1Loss (line 153) | class NormRegL1Loss(nn.Module):
method __init__ (line 154) | def __init__(self):
method forward (line 157) | def forward(self, output, mask, ind, target):
class RegWeightedL1Loss (line 167) | class RegWeightedL1Loss(nn.Module):
method __init__ (line 168) | def __init__(self):
method forward (line 171) | def forward(self, output, mask, ind, target):
class L1Loss (line 179) | class L1Loss(nn.Module):
method __init__ (line 180) | def __init__(self):
method forward (line 183) | def forward(self, output, mask, ind, target):
class BinRotLoss (line 189) | class BinRotLoss(nn.Module):
method __init__ (line 190) | def __init__(self):
method forward (line 193) | def forward(self, output, mask, ind, rotbin, rotres):
function compute_res_loss (line 198) | def compute_res_loss(output, target):
function compute_bin_loss (line 202) | def compute_bin_loss(output, target, mask):
function compute_rot_loss (line 207) | def compute_rot_loss(output, target_bin, target_res, mask):
FILE: Network/rigidmask/det_utils.py
function _sigmoid (line 8) | def _sigmoid(x):
function _gather_feat (line 12) | def _gather_feat(feat, ind, mask=None):
function _transpose_and_gather_feat (line 22) | def _transpose_and_gather_feat(feat, ind):
function flip_tensor (line 28) | def flip_tensor(x):
function flip_lr (line 33) | def flip_lr(x, flip_idx):
function flip_lr_off (line 41) | def flip_lr_off(x, flip_idx):
FILE: Network/rigidmask/networks/DCNv2/DCN/dcn_v2.py
class _DCNv2 (line 16) | class _DCNv2(Function):
method forward (line 18) | def forward(ctx, input, offset, mask, weight, bias,
method backward (line 37) | def backward(ctx, grad_output):
class DCNv2 (line 57) | class DCNv2(nn.Module):
method __init__ (line 59) | def __init__(self, in_channels, out_channels,
method reset_parameters (line 75) | def reset_parameters(self):
method forward (line 83) | def forward(self, input, offset, mask):
class DCN (line 97) | class DCN(DCNv2):
method __init__ (line 99) | def __init__(self, in_channels, out_channels,
method init_offset (line 114) | def init_offset(self):
method forward (line 118) | def forward(self, input):
class _DCNv2Pooling (line 132) | class _DCNv2Pooling(Function):
method forward (line 134) | def forward(ctx, input, rois, offset,
method backward (line 163) | def backward(ctx, grad_output):
class DCNv2Pooling (line 187) | class DCNv2Pooling(nn.Module):
method __init__ (line 189) | def __init__(self,
method forward (line 208) | def forward(self, input, rois, offset):
class DCNPooling (line 223) | class DCNPooling(DCNv2Pooling):
method __init__ (line 225) | def __init__(self,
method forward (line 259) | def forward(self, input, rois):
FILE: Network/rigidmask/networks/DCNv2/DCN/src/cpu/dcn_v2_cpu.cpp
function dcn_v2_cpu_forward (line 22) | at::Tensor
function dcn_v2_cpu_backward (line 113) | std::vector<at::Tensor> dcn_v2_cpu_backward(const at::Tensor &input,
FILE: Network/rigidmask/networks/DCNv2/DCN/src/cpu/dcn_v2_im2col_cpu.cpp
function dmcn_im2col_bilinear_cpu (line 27) | float dmcn_im2col_bilinear_cpu(const float *bottom_data, const int data_...
function dmcn_get_gradient_weight_cpu (line 58) | float dmcn_get_gradient_weight_cpu(float argmax_h, float argmax_w,
function dmcn_get_coordinate_weight_cpu (line 84) | float dmcn_get_coordinate_weight_cpu(float argmax_h, float argmax_w,
function modulated_deformable_im2col_cpu_kernel (line 127) | void modulated_deformable_im2col_cpu_kernel(const int n, const float *da...
function modulated_deformable_col2im_cpu_kernel (line 198) | void modulated_deformable_col2im_cpu_kernel(const int n, const float *da...
function modulated_deformable_col2im_coord_cpu_kernel (line 259) | void modulated_deformable_col2im_coord_cpu_kernel(const int n, const flo...
function modulated_deformable_im2col_cpu (line 331) | void modulated_deformable_im2col_cpu(const float* data_im, const float* ...
function modulated_deformable_col2im_cpu (line 353) | void modulated_deformable_col2im_cpu(const float* data_col, const float*...
function modulated_deformable_col2im_coord_cpu (line 375) | void modulated_deformable_col2im_coord_cpu(const float* data_col, const ...
FILE: Network/rigidmask/networks/DCNv2/DCN/src/cpu/dcn_v2_psroi_pooling_cpu.cpp
function T (line 34) | T bilinear_interp_cpu(
function DeformablePSROIPoolForwardKernelCpu (line 59) | void DeformablePSROIPoolForwardKernelCpu(
function DeformablePSROIPoolBackwardAccKernelCpu (line 149) | void DeformablePSROIPoolBackwardAccKernelCpu(
function dcn_v2_psroi_pooling_cpu_forward (line 278) | std::tuple<at::Tensor, at::Tensor>
function dcn_v2_psroi_pooling_cpu_backward (line 350) | std::tuple<at::Tensor, at::Tensor>
FILE: Network/rigidmask/networks/DCNv2/DCN/src/vision.cpp
function PYBIND11_MODULE (line 4) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
FILE: Network/rigidmask/networks/DCNv2/DCN/testcpu.py
function conv_identify (line 20) | def conv_identify(weight, bias):
function check_zero_offset (line 32) | def check_zero_offset():
function check_gradient_dconv (line 69) | def check_gradient_dconv():
function check_pooling_zero_offset (line 100) | def check_pooling_zero_offset():
function check_gradient_dpooling (line 134) | def check_gradient_dpooling():
function example_dconv (line 169) | def example_dconv():
function example_dpooling (line 183) | def example_dpooling():
function example_mdpooling (line 226) | def example_mdpooling():
FILE: Network/rigidmask/networks/DCNv2/DCN/testcuda.py
function conv_identify (line 20) | def conv_identify(weight, bias):
function check_zero_offset (line 32) | def check_zero_offset():
function check_gradient_dconv (line 69) | def check_gradient_dconv():
function check_pooling_zero_offset (line 100) | def check_pooling_zero_offset():
function check_gradient_dpooling (line 134) | def check_gradient_dpooling():
function example_dconv (line 169) | def example_dconv():
function example_dpooling (line 183) | def example_dpooling():
function example_mdpooling (line 226) | def example_mdpooling():
FILE: Network/rigidmask/networks/DCNv2/setup.py
function get_extensions (line 18) | def get_extensions():
FILE: Network/rigidmask/networks/dlav0.py
function get_model_url (line 18) | def get_model_url(data='imagenet', name='dla34', hash='ba72cf86'):
function conv3x3 (line 22) | def conv3x3(in_planes, out_planes, stride=1):
class BasicBlock (line 28) | class BasicBlock(nn.Module):
method __init__ (line 29) | def __init__(self, inplanes, planes, stride=1, dilation=1):
method forward (line 42) | def forward(self, x, residual=None):
class Bottleneck (line 59) | class Bottleneck(nn.Module):
method __init__ (line 62) | def __init__(self, inplanes, planes, stride=1, dilation=1):
method forward (line 79) | def forward(self, x, residual=None):
class BottleneckX (line 100) | class BottleneckX(nn.Module):
method __init__ (line 104) | def __init__(self, inplanes, planes, stride=1, dilation=1):
method forward (line 123) | def forward(self, x, residual=None):
class Root (line 144) | class Root(nn.Module):
method __init__ (line 145) | def __init__(self, in_channels, out_channels, kernel_size, residual):
method forward (line 154) | def forward(self, *x):
class Tree (line 165) | class Tree(nn.Module):
method __init__ (line 166) | def __init__(self, levels, block, in_channels, out_channels, stride=1,
method forward (line 205) | def forward(self, x, residual=None, children=None):
class DLA (line 221) | class DLA(nn.Module):
method __init__ (line 222) | def __init__(self, levels, channels, num_classes=1000,
method _make_level (line 260) | def _make_level(self, block, inplanes, planes, blocks, stride=1):
method _make_conv_level (line 277) | def _make_conv_level(self, inplanes, planes, convs, stride=1, dilation...
method forward (line 289) | def forward(self, x):
method load_pretrained_model (line 304) | def load_pretrained_model(self, data='imagenet', name='dla34', hash='...
function dla34 (line 319) | def dla34(pretrained, **kwargs): # DLA-34
function dla46_c (line 328) | def dla46_c(pretrained=None, **kwargs): # DLA-46-C
function dla46x_c (line 338) | def dla46x_c(pretrained=None, **kwargs): # DLA-X-46-C
function dla60x_c (line 348) | def dla60x_c(pretrained, **kwargs): # DLA-X-60-C
function dla60 (line 358) | def dla60(pretrained=None, **kwargs): # DLA-60
function dla60x (line 368) | def dla60x(pretrained=None, **kwargs): # DLA-X-60
function dla102 (line 378) | def dla102(pretrained=None, **kwargs): # DLA-102
function dla102x (line 387) | def dla102x(pretrained=None, **kwargs): # DLA-X-102
function dla102x2 (line 396) | def dla102x2(pretrained=None, **kwargs): # DLA-X-102 64
function dla169 (line 405) | def dla169(pretrained=None, **kwargs): # DLA-169
function set_bn (line 414) | def set_bn(bn):
class Identity (line 420) | class Identity(nn.Module):
method __init__ (line 421) | def __init__(self):
method forward (line 424) | def forward(self, x):
function fill_up_weights (line 428) | def fill_up_weights(up):
class IDAUp (line 440) | class IDAUp(nn.Module):
method __init__ (line 441) | def __init__(self, node_kernel, out_dim, channels, up_factors):
method forward (line 482) | def forward(self, layers):
class DLAUp (line 499) | class DLAUp(nn.Module):
method __init__ (line 500) | def __init__(self, channels, scales=(1, 2, 4, 8, 16), in_channels=None):
method forward (line 515) | def forward(self, layers):
function fill_fc_weights (line 524) | def fill_fc_weights(layers):
class DLASeg (line 533) | class DLASeg(nn.Module):
method __init__ (line 534) | def __init__(self, base_name, heads,
method forward (line 600) | def forward(self, x):
function get_pose_net (line 642) | def get_pose_net(num_layers, heads, head_conv=256, down_ratio=4):
FILE: Network/rigidmask/networks/large_hourglass.py
class convolution (line 17) | class convolution(nn.Module):
method __init__ (line 18) | def __init__(self, k, inp_dim, out_dim, stride=1, with_bn=True):
method forward (line 26) | def forward(self, x):
class fully_connected (line 32) | class fully_connected(nn.Module):
method __init__ (line 33) | def __init__(self, inp_dim, out_dim, with_bn=True):
method forward (line 42) | def forward(self, x):
class residual (line 48) | class residual(nn.Module):
method __init__ (line 49) | def __init__(self, k, inp_dim, out_dim, stride=1, with_bn=True):
method forward (line 65) | def forward(self, x):
function make_layer (line 76) | def make_layer(k, inp_dim, out_dim, modules, layer=convolution, **kwargs):
function make_layer_revr (line 82) | def make_layer_revr(k, inp_dim, out_dim, modules, layer=convolution, **k...
class MergeUp (line 89) | class MergeUp(nn.Module):
method forward (line 90) | def forward(self, up1, up2):
function make_merge_layer (line 93) | def make_merge_layer(dim):
function make_pool_layer (line 99) | def make_pool_layer(dim):
function make_unpool_layer (line 102) | def make_unpool_layer(dim):
function make_kp_layer (line 105) | def make_kp_layer(cnv_dim, curr_dim, out_dim):
function make_inter_layer (line 111) | def make_inter_layer(dim):
function make_cnv_layer (line 114) | def make_cnv_layer(inp_dim, out_dim):
class kp_module (line 117) | class kp_module(nn.Module):
method __init__ (line 118) | def __init__(
method forward (line 167) | def forward(self, x):
class exkp (line 176) | class exkp(nn.Module):
method __init__ (line 177) | def __init__(
method forward (line 253) | def forward(self, image):
function make_hg_layer (line 277) | def make_hg_layer(kernel, dim0, dim1, mod, layer=convolution, **kwargs):
class HourglassNet (line 283) | class HourglassNet(exkp):
method __init__ (line 284) | def __init__(self, heads, num_stacks=2):
function get_large_hourglass_net (line 298) | def get_large_hourglass_net(num_layers, heads, head_conv):
FILE: Network/rigidmask/networks/msra_resnet.py
function conv3x3 (line 28) | def conv3x3(in_planes, out_planes, stride=1):
class BasicBlock (line 34) | class BasicBlock(nn.Module):
method __init__ (line 37) | def __init__(self, inplanes, planes, stride=1, downsample=None):
method forward (line 47) | def forward(self, x):
class Bottleneck (line 66) | class Bottleneck(nn.Module):
method __init__ (line 69) | def __init__(self, inplanes, planes, stride=1, downsample=None):
method forward (line 84) | def forward(self, x):
class PoseResNet (line 107) | class PoseResNet(nn.Module):
method __init__ (line 109) | def __init__(self, block, layers, heads, head_conv, **kwargs):
method _make_layer (line 154) | def _make_layer(self, block, planes, blocks, stride=1):
method _get_deconv_cfg (line 171) | def _get_deconv_cfg(self, deconv_kernel, index):
method _make_deconv_layer (line 184) | def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
method forward (line 211) | def forward(self, x):
method init_weights (line 228) | def init_weights(self, num_layers, pretrained=True):
function get_pose_net (line 275) | def get_pose_net(num_layers, heads, head_conv):
FILE: Network/rigidmask/networks/pose_dla_dcn.py
function get_model_url (line 21) | def get_model_url(data='imagenet', name='dla34', hash='ba72cf86'):
function conv3x3 (line 25) | def conv3x3(in_planes, out_planes, stride=1):
class BasicBlock (line 31) | class BasicBlock(nn.Module):
method __init__ (line 32) | def __init__(self, inplanes, planes, stride=1, dilation=1):
method forward (line 45) | def forward(self, x, residual=None):
class Bottleneck (line 62) | class Bottleneck(nn.Module):
method __init__ (line 65) | def __init__(self, inplanes, planes, stride=1, dilation=1):
method forward (line 82) | def forward(self, x, residual=None):
class BottleneckX (line 103) | class BottleneckX(nn.Module):
method __init__ (line 107) | def __init__(self, inplanes, planes, stride=1, dilation=1):
method forward (line 126) | def forward(self, x, residual=None):
class Root (line 147) | class Root(nn.Module):
method __init__ (line 148) | def __init__(self, in_channels, out_channels, kernel_size, residual):
method forward (line 157) | def forward(self, *x):
class Tree (line 168) | class Tree(nn.Module):
method __init__ (line 169) | def __init__(self, levels, block, in_channels, out_channels, stride=1,
method forward (line 208) | def forward(self, x, residual=None, children=None):
class DLA (line 224) | class DLA(nn.Module):
method __init__ (line 225) | def __init__(self, levels, channels, num_classes=1000,
method _make_level (line 257) | def _make_level(self, block, inplanes, planes, blocks, stride=1):
method _make_conv_level (line 274) | def _make_conv_level(self, inplanes, planes, convs, stride=1, dilation...
method forward (line 286) | def forward(self, x):
method load_pretrained_model (line 294) | def load_pretrained_model(self, data='imagenet', name='dla34', hash='b...
function dla34 (line 309) | def dla34(pretrained=True, **kwargs): # DLA-34
class Identity (line 317) | class Identity(nn.Module):
method __init__ (line 319) | def __init__(self):
method forward (line 322) | def forward(self, x):
function fill_fc_weights (line 326) | def fill_fc_weights(layers):
function fill_up_weights (line 333) | def fill_up_weights(up):
class DeformConv (line 345) | class DeformConv(nn.Module):
method __init__ (line 346) | def __init__(self, chi, cho):
method forward (line 354) | def forward(self, x):
class IDAUp (line 360) | class IDAUp(nn.Module):
method __init__ (line 362) | def __init__(self, o, channels, up_f):
method forward (line 380) | def forward(self, layers, startp, endp):
class DLAUp (line 390) | class DLAUp(nn.Module):
method __init__ (line 391) | def __init__(self, startp, channels, scales, in_channels=None):
method forward (line 407) | def forward(self, layers):
class Interpolate (line 416) | class Interpolate(nn.Module):
method __init__ (line 417) | def __init__(self, scale, mode):
method forward (line 422) | def forward(self, x):
class DLASeg (line 427) | class DLASeg(nn.Module):
method __init__ (line 428) | def __init__(self, base_name, heads, pretrained, down_ratio, final_ker...
method forward (line 470) | def forward(self, x):
function get_pose_net (line 485) | def get_pose_net(num_layers, heads, head_conv=256, down_ratio=4,num_inpu...
FILE: Network/rigidmask/networks/resnet_dcn.py
function conv3x3 (line 32) | def conv3x3(in_planes, out_planes, stride=1):
class BasicBlock (line 38) | class BasicBlock(nn.Module):
method __init__ (line 41) | def __init__(self, inplanes, planes, stride=1, downsample=None):
method forward (line 51) | def forward(self, x):
class Bottleneck (line 70) | class Bottleneck(nn.Module):
method __init__ (line 73) | def __init__(self, inplanes, planes, stride=1, downsample=None):
method forward (line 88) | def forward(self, x):
function fill_up_weights (line 110) | def fill_up_weights(up):
function fill_fc_weights (line 121) | def fill_fc_weights(layers):
class PoseResNet (line 130) | class PoseResNet(nn.Module):
method __init__ (line 132) | def __init__(self, block, layers, heads, head_conv):
method _make_layer (line 179) | def _make_layer(self, block, planes, blocks, stride=1):
method _get_deconv_cfg (line 196) | def _get_deconv_cfg(self, deconv_kernel, index):
method _make_deconv_layer (line 209) | def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
method forward (line 248) | def forward(self, x):
method init_weights (line 265) | def init_weights(self, num_layers):
function get_pose_net (line 285) | def get_pose_net(num_layers, heads, head_conv=256):
FILE: Network/rigidmask/submodule.py
class residualBlock (line 12) | class residualBlock(nn.Module):
method __init__ (line 15) | def __init__(self, in_channels, n_filters, stride=1, downsample=None,d...
method forward (line 31) | def forward(self, x):
function conv (line 43) | def conv(in_planes, out_planes, kernel_size=3, stride=1, padding=1, dila...
class conv2DBatchNorm (line 51) | class conv2DBatchNorm(nn.Module):
method __init__ (line 52) | def __init__(self, in_channels, n_filters, k_size, stride, padding, d...
method forward (line 71) | def forward(self, inputs):
class conv2DBatchNormRelu (line 75) | class conv2DBatchNormRelu(nn.Module):
method __init__ (line 76) | def __init__(self, in_channels, n_filters, k_size, stride, padding, d...
method forward (line 95) | def forward(self, inputs):
class pyramidPooling (line 99) | class pyramidPooling(nn.Module):
method __init__ (line 101) | def __init__(self, in_channels, with_bn=True, levels=4):
method forward (line 111) | def forward(self, x):
class pspnet (line 133) | class pspnet(nn.Module):
method __init__ (line 137) | def __init__(self, is_proj=True,groups=1):
method _make_layer (line 193) | def _make_layer(self, block, planes, blocks, stride=1):
method forward (line 206) | def forward(self, x):
class pspnet_s (line 249) | class pspnet_s(nn.Module):
method __init__ (line 253) | def __init__(self, is_proj=True,groups=1):
method _make_layer (line 309) | def _make_layer(self, block, planes, blocks, stride=1):
method forward (line 322) | def forward(self, x):
class bfmodule (line 366) | class bfmodule(nn.Module):
method __init__ (line 367) | def __init__(self, inplanes, outplanes):
method _make_layer (line 409) | def _make_layer(self, block, planes, blocks, stride=1):
method forward (line 422) | def forward(self, x):
class bfmodule_feat (line 452) | class bfmodule_feat(nn.Module):
method __init__ (line 453) | def __init__(self, inplanes, outplanes):
method _make_layer (line 495) | def _make_layer(self, block, planes, blocks, stride=1):
method forward (line 508) | def forward(self, x):
function compute_geo_costs (line 539) | def compute_geo_costs(rot, trans, Ex, Kinv, hp0, hp1, tau, Kinv_n=None):
function get_skew_mat (line 568) | def get_skew_mat(transx,rotx):
function sampson_err (line 581) | def sampson_err(x1h, x2h, F):
function get_intrinsics (line 589) | def get_intrinsics(intr, noise=False):
function testEss (line 661) | def testEss(K0,K1,R,T,p1,p2):
FILE: evaluator/evaluate_ate_scale.py
function align (line 49) | def align(model,data,calc_scale=False):
function plot_traj (line 102) | def plot_traj(ax,stamps,traj,style,color,label):
FILE: evaluator/evaluate_kitti.py
function trajectory_distances (line 9) | def trajectory_distances(poses):
function last_frame_from_segment_length (line 19) | def last_frame_from_segment_length(dist,first_frame,length):
function rotation_error (line 25) | def rotation_error(pose_error):
function translation_error (line 33) | def translation_error(pose_error):
function calculate_sequence_error (line 45) | def calculate_sequence_error(poses_gt,poses_result,lengths=[10,20,30,40,...
function calculate_ave_errors (line 86) | def calculate_ave_errors(errors,lengths=[10,20,30,40,50,60,70,80]):
function evaluate (line 105) | def evaluate(gt, data,kittitype=True):
function main (line 114) | def main():
FILE: evaluator/evaluate_rpe.py
function ominus (line 44) | def ominus(a,b):
function compute_distance (line 57) | def compute_distance(transform):
function compute_angle (line 63) | def compute_angle(transform):
function distances_along_trajectory (line 70) | def distances_along_trajectory(traj):
function evaluate_trajectory (line 83) | def evaluate_trajectory(traj_gt, traj_est, param_max_pairs=10000, param_...
FILE: evaluator/evaluator_base.py
function transform_trajs (line 13) | def transform_trajs(gt_traj, est_traj, cal_scale):
function quats2SEs (line 22) | def quats2SEs(gt_traj, est_traj):
function per_frame_scale_alignment (line 27) | def per_frame_scale_alignment(gt_motions, est_motions):
class ATEEvaluator (line 37) | class ATEEvaluator(object):
method __init__ (line 38) | def __init__(self):
method evaluate (line 42) | def evaluate(self, gt_traj, est_traj, scale):
class RPEEvaluator (line 69) | class RPEEvaluator(object):
method __init__ (line 70) | def __init__(self):
method evaluate (line 74) | def evaluate(self, gt_SEs, est_SEs):
class KittiEvaluator (line 90) | class KittiEvaluator(object):
method __init__ (line 91) | def __init__(self):
method evaluate (line 95) | def evaluate(self, gt_SEs, est_SEs, kittitype):
FILE: evaluator/tartanair_evaluator.py
class TartanAirEvaluator (line 10) | class TartanAirEvaluator:
method __init__ (line 11) | def __init__(self, scale = False, round=1):
method evaluate_one_trajectory (line 16) | def evaluate_one_trajectory(self, gt_traj, est_traj, scale=False, kitt...
FILE: evaluator/trajectory_transform.py
function shift0 (line 7) | def shift0(traj):
function ned2cam (line 21) | def ned2cam(traj):
function cam2ned (line 39) | def cam2ned(traj):
function trajectory_transform (line 58) | def trajectory_transform(gt_traj, est_traj):
function rescale_bk (line 71) | def rescale_bk(poses_gt, poses):
function pose2trans (line 88) | def pose2trans(pose_data):
function rescale (line 98) | def rescale(poses_gt, poses):
function trajectory_scale (line 118) | def trajectory_scale(traj, scale):
function timestamp_associate (line 123) | def timestamp_associate(first_list, second_list, max_difference):
FILE: evaluator/transformation.py
function line2mat (line 8) | def line2mat(line_data):
function mat2line (line 16) | def mat2line(mat_data):
function motion2pose (line 24) | def motion2pose(data):
function pose2motion (line 41) | def pose2motion(data, skip=0):
function SE2se (line 56) | def SE2se(SE_data):
function SO2so (line 62) | def SO2so(SO_data):
function so2SO (line 65) | def so2SO(so_data):
function se2SE (line 68) | def se2SE(se_data):
function se_mean (line 74) | def se_mean(se_datas):
function ses_mean (line 84) | def ses_mean(se_datas):
function ses2poses (line 93) | def ses2poses(data):
function ses2poses_quat (line 106) | def ses2poses_quat(data):
function SEs2ses (line 122) | def SEs2ses(motion_data):
function so2quat (line 131) | def so2quat(so_data):
function quat2so (line 140) | def quat2so(quat_data):
function sos2quats (line 151) | def sos2quats(so_datas,mean_std=[[1],[1]]):
function SO2quat (line 163) | def SO2quat(SO_data):
function quat2SO (line 167) | def quat2SO(quat_data):
function pos_quat2SE (line 171) | def pos_quat2SE(quat_data):
function pos_quats2SEs (line 180) | def pos_quats2SEs(quat_datas):
function pos_quats2SE_matrices (line 189) | def pos_quats2SE_matrices(quat_datas):
function SE2pos_quat (line 200) | def SE2pos_quat(SE_data):
function SEs2ses (line 206) | def SEs2ses(data):
function ses2SEs (line 217) | def ses2SEs(data):
function SE2quat (line 228) | def SE2quat(SE_data):
function quat2SE (line 238) | def quat2SE(quat_data):
function SEs2quats (line 249) | def SEs2quats(SEs_data):
function quats2SEs (line 261) | def quats2SEs(quat_datas):
function motion_ses2pose_quats (line 273) | def motion_ses2pose_quats(data):
function pose_quats2motion_ses (line 283) | def pose_quats2motion_ses(data):
function kitti2tartan (line 293) | def kitti2tartan(traj):
function tartan2kitti (line 313) | def tartan2kitti(traj):
FILE: vo_trajectory_from_folder.py
function get_args (line 16) | def get_args():
Condensed preview — 61 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (491K chars).
[
{
"path": ".gitignore",
"chars": 42,
"preview": "*.pyc\nmodels/\ndata/\n__pycache__/\n.DS_Store"
},
{
"path": "Datasets/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "Datasets/cowmask.py",
"chars": 3660,
"preview": "# pylint: disable=bad-indentation\n# coding=utf-8\n# Copyright 2022 The Google Research Authors.\n#\n# Licensed under the Ap"
},
{
"path": "Datasets/flowlib.py",
"chars": 18503,
"preview": "\"\"\"\n# ==============================\n# flowlib.py\n# library for optical flow processing\n# Author: Ruoteng Li\n# Date: 6th"
},
{
"path": "Datasets/segmask_gt.py",
"chars": 11176,
"preview": "\"\"\"\n# ==============================\n# segmask_gt.py\n# library to generate groundtruth \n# segmentation mask given flow a"
},
{
"path": "Datasets/tartanTrajFlowDataset.py",
"chars": 1771,
"preview": "\"\"\"\n# ==============================\n# tartanTrajFlowDataset.py\n# library for DytanVO data I/O\n# Author: Wenshan Wang, S"
},
{
"path": "Datasets/util_flow.py",
"chars": 8042,
"preview": "\"\"\"\n# ==============================\n# util_flow.py\n# library for optical flow processing\n# Author: Gengshan Yang\n# Date"
},
{
"path": "Datasets/utils.py",
"chars": 10928,
"preview": "\"\"\"\n# ==============================\n# utils.py\n# misc library for DytanVO\n# Author: Wenshan Wang, Shihao Shen\n# Date: 3"
},
{
"path": "DytanVO.py",
"chars": 12577,
"preview": "# Software License Agreement (BSD License)\n#\n# Copyright (c) 2020, Shihao Shen, CMU\n# All rights reserved.\n#\n# Redistrib"
},
{
"path": "LICENSE",
"chars": 1522,
"preview": "BSD 3-Clause License\n\nCopyright (c) 2020, Air Lab Stacks\nAll rights reserved.\n\nRedistribution and use in source and bina"
},
{
"path": "Network/PWC/PWCNet.py",
"chars": 10764,
"preview": "\"\"\"\nimplementation of the PWC-DC network for optical flow estimation by Sun et al., 2018\n\nJinwei Gu and Zhile Ren\n\n\"\"\"\n\n"
},
{
"path": "Network/PWC/__init__.py",
"chars": 22,
"preview": "from .PWCNet import *\n"
},
{
"path": "Network/PWC/correlation.py",
"chars": 13539,
"preview": "#!/usr/bin/env python\n\nimport torch\n\nimport cupy\nimport re\n\nkernel_Correlation_rearrange = '''\n\textern \"C\" __global__ vo"
},
{
"path": "Network/VOFlowNet.py",
"chars": 5275,
"preview": "# Software License Agreement (BSD License)\n#\n# Copyright (c) 2020, Wenshan Wang, CMU\n# All rights reserved.\n#\n# Redistri"
},
{
"path": "Network/VONet.py",
"chars": 3092,
"preview": "# Software License Agreement (BSD License)\n#\n# Copyright (c) 2020, Wenshan Wang, Yaoyu Hu, CMU\n# All rights reserved.\n#"
},
{
"path": "Network/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "Network/rigidmask/.gitignore",
"chars": 12,
"preview": "__pycache__\n"
},
{
"path": "Network/rigidmask/VCNplus.py",
"chars": 34308,
"preview": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom torch.autograd import Variable\nimport numpy as n"
},
{
"path": "Network/rigidmask/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "Network/rigidmask/conv4d.py",
"chars": 12200,
"preview": "import pdb\nimport torch.nn as nn\nimport math\nimport torch\nfrom torch.nn.parameter import Parameter\nimport torch.nn.funct"
},
{
"path": "Network/rigidmask/det.py",
"chars": 3445,
"preview": "from __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tor"
},
{
"path": "Network/rigidmask/det_losses.py",
"chars": 7940,
"preview": "# ------------------------------------------------------------------------------\n# Portions of this code are from\n# Corn"
},
{
"path": "Network/rigidmask/det_utils.py",
"chars": 1571,
"preview": "from __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tor"
},
{
"path": "Network/rigidmask/networks/DCNv2/.gitignore",
"chars": 58,
"preview": ".vscode\n.idea\n*.so\n*.o\n*pyc\n_ext\nbuild\nDCNv2.egg-info\ndist"
},
{
"path": "Network/rigidmask/networks/DCNv2/DCN/__init__.py",
"chars": 22,
"preview": "from .dcn_v2 import *\n"
},
{
"path": "Network/rigidmask/networks/DCNv2/DCN/dcn_v2.py",
"chars": 12081,
"preview": "#!/usr/bin/env python\nfrom __future__ import absolute_import\nfrom __future__ import print_function\nfrom __future__ impor"
},
{
"path": "Network/rigidmask/networks/DCNv2/DCN/src/cpu/dcn_v2_cpu.cpp",
"chars": 10885,
"preview": "#include <vector>\n#include \"cpu/dcn_v2_im2col_cpu.h\"\n#include <iostream>\n\n#include <ATen/ATen.h>\n//#include <ATen/cuda/C"
},
{
"path": "Network/rigidmask/networks/DCNv2/DCN/src/cpu/dcn_v2_im2col_cpu.cpp",
"chars": 19948,
"preview": "#include \"dcn_v2_im2col_cpu.h\"\n#include <cstdio>\n#include <algorithm>\n#include <cstring>\n\n#include <ATen/ATen.h>\n//#incl"
},
{
"path": "Network/rigidmask/networks/DCNv2/DCN/src/cpu/dcn_v2_im2col_cpu.h",
"chars": 5105,
"preview": "\n/*!\n ******************* BEGIN Caffe Copyright Notice and Disclaimer ****************\n *\n * COPYRIGHT\n *\n * All contrib"
},
{
"path": "Network/rigidmask/networks/DCNv2/DCN/src/cpu/dcn_v2_psroi_pooling_cpu.cpp",
"chars": 17007,
"preview": "/*!\n * Copyright (c) 2017 Microsoft\n * Licensed under The MIT License [see LICENSE for details]\n * \\file deformable_psro"
},
{
"path": "Network/rigidmask/networks/DCNv2/DCN/src/cpu/vision.h",
"chars": 2665,
"preview": "#pragma once\n#include <torch/extension.h>\n\nat::Tensor\ndcn_v2_cpu_forward(const at::Tensor &input,\n co"
},
{
"path": "Network/rigidmask/networks/DCNv2/DCN/src/cuda/dcn_v2_cuda.cu",
"chars": 15024,
"preview": "#include <vector>\n#include \"cuda/dcn_v2_im2col_cuda.h\"\n\n#include <ATen/ATen.h>\n#include <ATen/cuda/CUDAContext.h>\n\n#incl"
},
{
"path": "Network/rigidmask/networks/DCNv2/DCN/src/cuda/dcn_v2_im2col_cuda.cu",
"chars": 20335,
"preview": "#include \"dcn_v2_im2col_cuda.h\"\n#include <cstdio>\n#include <algorithm>\n#include <cstring>\n\n#include <ATen/ATen.h>\n#inclu"
},
{
"path": "Network/rigidmask/networks/DCNv2/DCN/src/cuda/dcn_v2_im2col_cuda.h",
"chars": 5226,
"preview": "\n/*!\n ******************* BEGIN Caffe Copyright Notice and Disclaimer ****************\n *\n * COPYRIGHT\n *\n * All contrib"
},
{
"path": "Network/rigidmask/networks/DCNv2/DCN/src/cuda/dcn_v2_psroi_pooling_cuda.cu",
"chars": 16240,
"preview": "/*!\n * Copyright (c) 2017 Microsoft\n * Licensed under The MIT License [see LICENSE for details]\n * \\file deformable_psro"
},
{
"path": "Network/rigidmask/networks/DCNv2/DCN/src/cuda/vision.h",
"chars": 2669,
"preview": "#pragma once\n#include <torch/extension.h>\n\nat::Tensor\ndcn_v2_cuda_forward(const at::Tensor &input,\n c"
},
{
"path": "Network/rigidmask/networks/DCNv2/DCN/src/dcn_v2.h",
"chars": 7715,
"preview": "#pragma once\n\n#include \"cpu/vision.h\"\n\n#ifdef WITH_CUDA\n#include \"cuda/vision.h\"\n#endif\n\nat::Tensor\ndcn_v2_forward(const"
},
{
"path": "Network/rigidmask/networks/DCNv2/DCN/src/vision.cpp",
"chars": 405,
"preview": "\n#include \"dcn_v2.h\"\n\nPYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {\n m.def(\"dcn_v2_forward\", &dcn_v2_forward, \"dcn_v2_forw"
},
{
"path": "Network/rigidmask/networks/DCNv2/DCN/testcpu.py",
"chars": 8233,
"preview": "#!/usr/bin/env python\nfrom __future__ import absolute_import\nfrom __future__ import print_function\nfrom __future__ impor"
},
{
"path": "Network/rigidmask/networks/DCNv2/DCN/testcuda.py",
"chars": 8514,
"preview": "#!/usr/bin/env python\nfrom __future__ import absolute_import\nfrom __future__ import print_function\nfrom __future__ impor"
},
{
"path": "Network/rigidmask/networks/DCNv2/LICENSE",
"chars": 1520,
"preview": "BSD 3-Clause License\n\nCopyright (c) 2019, Charles Shang\nAll rights reserved.\n\nRedistribution and use in source and binar"
},
{
"path": "Network/rigidmask/networks/DCNv2/README.md",
"chars": 913,
"preview": "## Deformable Convolutional Networks V2 with Pytorch 1.X\n\n### Build\n```bash\n ./make.sh # build\n python tes"
},
{
"path": "Network/rigidmask/networks/DCNv2/make.sh",
"chars": 50,
"preview": "#!/usr/bin/env bash\npython setup.py build develop\n"
},
{
"path": "Network/rigidmask/networks/DCNv2/setup.py",
"chars": 2091,
"preview": "#!/usr/bin/env python\n\nimport os\nimport glob\n\nimport torch\n\nfrom torch.utils.cpp_extension import CUDA_HOME\nfrom torch.u"
},
{
"path": "Network/rigidmask/networks/dlav0.py",
"chars": 22682,
"preview": "#!/usr/bin/env python\n# -*- coding: utf-8 -*-\nfrom __future__ import absolute_import\nfrom __future__ import division\nfro"
},
{
"path": "Network/rigidmask/networks/large_hourglass.py",
"chars": 9942,
"preview": "# ------------------------------------------------------------------------------\n# This code is base on \n# CornerNet (ht"
},
{
"path": "Network/rigidmask/networks/msra_resnet.py",
"chars": 10167,
"preview": "# ------------------------------------------------------------------------------\n# Copyright (c) Microsoft\n# Licensed un"
},
{
"path": "Network/rigidmask/networks/pose_dla_dcn.py",
"chars": 17721,
"preview": "from __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport os\n"
},
{
"path": "Network/rigidmask/networks/resnet_dcn.py",
"chars": 10058,
"preview": "# ------------------------------------------------------------------------------\n# Copyright (c) Microsoft\n# Licensed un"
},
{
"path": "Network/rigidmask/submodule.py",
"chars": 31165,
"preview": "from __future__ import print_function\nimport torch\nimport torch.nn as nn\nimport torch.utils.data\nfrom torch.autograd imp"
},
{
"path": "README.md",
"chars": 8824,
"preview": "# DytanVO: Joint Refinement of Visual Odometry and Motion Segmentation in Dynamic Environments\r\n\r\n<p align=\"center\">\r\n "
},
{
"path": "environment.yml",
"chars": 5107,
"preview": "name: dytanvo\nchannels:\n - pytorch3d\n - pytorch\n - conda-forge\n - defaults\ndependencies:\n - _libgcc_mutex=0.1=main\n"
},
{
"path": "evaluator/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "evaluator/evaluate_ate_scale.py",
"chars": 4561,
"preview": "#!/usr/bin/python\n\n# Modified by Wenshan Wang\n# Modified by Raul Mur-Artal\n# Automatically compute the optimal scale fac"
},
{
"path": "evaluator/evaluate_kitti.py",
"chars": 4464,
"preview": "# Copyright (c) 2020 Carnegie Mellon University, Wenshan Wang <wenshanw@andrew.cmu.edu>\n# For License information please"
},
{
"path": "evaluator/evaluate_rpe.py",
"chars": 5002,
"preview": "#!/usr/bin/python\n# Software License Agreement (BSD License)\n#\n# Modified by Wenshan Wang\n# Copyright (c) 2013, Juergen "
},
{
"path": "evaluator/evaluator_base.py",
"chars": 3128,
"preview": "# Copyright (c) 2020 Carnegie Mellon University, Wenshan Wang <wenshanw@andrew.cmu.edu>\n# For License information please"
},
{
"path": "evaluator/tartanair_evaluator.py",
"chars": 2036,
"preview": "# Copyright (c) 2020 Carnegie Mellon University, Wenshan Wang <wenshanw@andrew.cmu.edu>\n# For License information please"
},
{
"path": "evaluator/trajectory_transform.py",
"chars": 5049,
"preview": "# Copyright (c) 2020 Carnegie Mellon University, Wenshan Wang <wenshanw@andrew.cmu.edu>\n# For License information please"
},
{
"path": "evaluator/transformation.py",
"chars": 9106,
"preview": "# Copyright (c) 2020 Carnegie Mellon University, Wenshan Wang <wenshanw@andrew.cmu.edu>\n# For License information please"
},
{
"path": "vo_trajectory_from_folder.py",
"chars": 6973,
"preview": "from torch.utils.data import DataLoader\nfrom Datasets.utils import ToTensor, Compose, CropCenter, ResizeData, dataset_in"
}
]
About this extraction
This page contains the full source code of the castacks/DytanVO GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 61 files (462.0 KB), approximately 131.0k tokens, and a symbol index with 498 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.