Full Code of castacks/DytanVO for AI

main ca2fe20029e9 cached
61 files
462.0 KB
131.0k tokens
498 symbols
1 requests
Download .txt
Showing preview only (485K chars total). Download the full file or copy to clipboard to get everything.
Repository: castacks/DytanVO
Branch: main
Commit: ca2fe20029e9
Files: 61
Total size: 462.0 KB

Directory structure:
gitextract_hty7qque/

├── .gitignore
├── Datasets/
│   ├── __init__.py
│   ├── cowmask.py
│   ├── flowlib.py
│   ├── segmask_gt.py
│   ├── tartanTrajFlowDataset.py
│   ├── util_flow.py
│   └── utils.py
├── DytanVO.py
├── LICENSE
├── Network/
│   ├── PWC/
│   │   ├── PWCNet.py
│   │   ├── __init__.py
│   │   └── correlation.py
│   ├── VOFlowNet.py
│   ├── VONet.py
│   ├── __init__.py
│   └── rigidmask/
│       ├── .gitignore
│       ├── VCNplus.py
│       ├── __init__.py
│       ├── conv4d.py
│       ├── det.py
│       ├── det_losses.py
│       ├── det_utils.py
│       ├── networks/
│       │   ├── DCNv2/
│       │   │   ├── .gitignore
│       │   │   ├── DCN/
│       │   │   │   ├── __init__.py
│       │   │   │   ├── dcn_v2.py
│       │   │   │   ├── src/
│       │   │   │   │   ├── cpu/
│       │   │   │   │   │   ├── dcn_v2_cpu.cpp
│       │   │   │   │   │   ├── dcn_v2_im2col_cpu.cpp
│       │   │   │   │   │   ├── dcn_v2_im2col_cpu.h
│       │   │   │   │   │   ├── dcn_v2_psroi_pooling_cpu.cpp
│       │   │   │   │   │   └── vision.h
│       │   │   │   │   ├── cuda/
│       │   │   │   │   │   ├── dcn_v2_cuda.cu
│       │   │   │   │   │   ├── dcn_v2_im2col_cuda.cu
│       │   │   │   │   │   ├── dcn_v2_im2col_cuda.h
│       │   │   │   │   │   ├── dcn_v2_psroi_pooling_cuda.cu
│       │   │   │   │   │   └── vision.h
│       │   │   │   │   ├── dcn_v2.h
│       │   │   │   │   └── vision.cpp
│       │   │   │   ├── testcpu.py
│       │   │   │   └── testcuda.py
│       │   │   ├── LICENSE
│       │   │   ├── README.md
│       │   │   ├── make.sh
│       │   │   └── setup.py
│       │   ├── dlav0.py
│       │   ├── large_hourglass.py
│       │   ├── msra_resnet.py
│       │   ├── pose_dla_dcn.py
│       │   └── resnet_dcn.py
│       └── submodule.py
├── README.md
├── environment.yml
├── evaluator/
│   ├── __init__.py
│   ├── evaluate_ate_scale.py
│   ├── evaluate_kitti.py
│   ├── evaluate_rpe.py
│   ├── evaluator_base.py
│   ├── tartanair_evaluator.py
│   ├── trajectory_transform.py
│   └── transformation.py
└── vo_trajectory_from_folder.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
*.pyc
models/
data/
__pycache__/
.DS_Store

================================================
FILE: Datasets/__init__.py
================================================


================================================
FILE: Datasets/cowmask.py
================================================
# pylint: disable=bad-indentation
# coding=utf-8
# Copyright 2022 The Google Research Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Cow mask generation. 
https://github.com/google-research/google-research/blob/master/milking_cowmask/
Adapted from LAX implementation to NumPy due to PyTorch dataloader 
being incompatible with JAX
Author: Shihao Shen
Date: 29th Aug 2022
"""
import math
import numpy as np
from scipy import special
from scipy.signal import convolve

_ROOT_2 = math.sqrt(2.0)
_ROOT_2_PI = math.sqrt(2.0 * math.pi)


def gaussian_kernels(sigma, max_sigma):
	"""Make Gaussian kernels for Gaussian blur.
	Args:
		sigma: kernel sigma
		max_sigma: sigma upper limit as a float (this is used to determine
			the size of kernel required to fit all kernels)
  	Returns:
	  	a (1, kernel_width) numpy array
  	"""
	size = round(max_sigma * 3) * 2 + 1
	x = np.arange(-size, size + 1)[None, :].astype(np.float32)
	y = np.exp(-0.5 * x ** 2 / sigma ** 2)
	return y / (sigma * _ROOT_2_PI)


def cow_masks(mask_size, log_sigma_range, max_sigma, prop_range):
	"""Generate Cow Mask.
	Args:
      n_masks: number of masks to generate as an int
      mask_size: image size as a `(height, width)` tuple
      log_sigma_range: the range of the sigma (smoothing kernel)
          parameter in log-space`(log(sigma_min), log(sigma_max))`
      max_sigma: smoothing sigma upper limit
      prop_range: range from which to draw the proportion `p` that
        controls the proportion of pixel in a mask that are 1 vs 0
  Returns:
      Cow Masks as a [v, height, width, 1] numpy array
	"""

	# Draw the per-mask proportion p
	p = np.random.uniform(prop_range[0], prop_range[1])
	# Compute threshold factors
	threshold_factor = special.erfinv(2 * p - 1) * _ROOT_2

	sigma = np.exp(np.random.uniform(log_sigma_range[0], log_sigma_range[1]))

	noise = np.random.normal(size=mask_size)

	# Generate a kernel for each sigma
	kernel = gaussian_kernels(sigma, max_sigma)
	kernel = kernel.squeeze()
	# kernels in y and x
	krn_y = kernel[None, :]
	krn_x = kernel[:, None]

	# Apply kernels in y and x separately
	smooth_noise = convolve(noise, krn_y, mode='same')
	smooth_noise = convolve(smooth_noise, krn_x, mode='same')

	# Compute mean and std-dev
	noise_mu = smooth_noise.mean(axis=(0,1))
	noise_sigma = smooth_noise.std(axis=(0,1))
	# Compute thresholds
	threshold = threshold_factor * noise_sigma + noise_mu
	# Apply threshold
	mask = (smooth_noise <= threshold).astype(bool)

	return mask


if __name__=="__main__":
    import time
    import matplotlib.pyplot as plt

    cow_sigma_range = (20, 60)
    log_sigma_range = (math.log(cow_sigma_range[0]), math.log(cow_sigma_range[1]))
    cow_prop_range = (0.1, 0.5)
    s = time.time()
    max_iou = 0
    # for _ in range(1000):
    #     mask = cow_masks((240, 360), log_sigma_range, cow_sigma_range[1], cow_prop_range)
    #     max_iou = max(max_iou, np.sum(mask) / (240*360))
    # print(time.time() - s)
    # print(max_iou)

    mask = cow_masks((240, 360), log_sigma_range, cow_sigma_range[1], cow_prop_range)
    print(np.sum(mask) / (240*360))
    plt.imshow(mask * 255)
    plt.savefig('mask.png')

================================================
FILE: Datasets/flowlib.py
================================================
"""
# ==============================
# flowlib.py
# library for optical flow processing
# Author: Ruoteng Li
# Date: 6th Aug 2016
# ==============================
"""
import png
from util_flow import readPFM
import numpy as np
import matplotlib.colors as cl
import matplotlib.pyplot as plt
from PIL import Image
import cv2
import pdb


UNKNOWN_FLOW_THRESH = 1e7
SMALLFLOW = 0.0
LARGEFLOW = 1e8

"""
=============
Flow Section
=============
"""


def show_flow(filename):
    """
    visualize optical flow map using matplotlib
    :param filename: optical flow file
    :return: None
    """
    flow = read_flow(filename)
    img = flow_to_image(flow)
    plt.imshow(img)
    plt.show()


def point_vec(img,flow,skip=10):
    skip=20
    maxsize=1000.
    extendfac=2.
    resize_factor = max(1,int(max(maxsize/img.shape[0], maxsize/img.shape[1])))
    meshgrid = np.meshgrid(range(img.shape[1]),range(img.shape[0]))
    dispimg = cv2.resize(img[:,:,::-1].copy(), None,fx=resize_factor,fy=resize_factor)
    colorflow = flow_to_image(flow).astype(int)
    for i in range(img.shape[1]): # x 
        for j in range(img.shape[0]): # y
            if flow[j,i,2] != 1: continue
            if j%skip!=0 or i%skip!=0: continue
            xend = int((meshgrid[0][j,i]+extendfac*flow[j,i,0])*resize_factor)
            yend = int((meshgrid[1][j,i]+extendfac*flow[j,i,1])*resize_factor)
            leng = np.linalg.norm(flow[j,i,:2]*extendfac)
            if leng<1:continue
            dispimg = cv2.arrowedLine(dispimg, (meshgrid[0][j,i]*resize_factor,meshgrid[1][j,i]*resize_factor),\
                                      (xend,yend),
                                      (int(colorflow[j,i,2]),int(colorflow[j,i,1]),int(colorflow[j,i,0])),4,tipLength=2/leng,line_type=cv2.LINE_AA)
    return dispimg

def visualize_flow(flow, mode='Y'):
    """
    this function visualize the input flow
    :param flow: input flow in array
    :param mode: choose which color mode to visualize the flow (Y: Ccbcr, RGB: RGB color)
    :return: None
    """
    if mode == 'Y':
        # Ccbcr color wheel
        img = flow_to_image(flow)
    elif mode == 'RGB':
        (h, w) = flow.shape[0:2]
        du = flow[:, :, 0]
        dv = flow[:, :, 1]
        valid = flow[:, :, 2]
        max_flow = np.sqrt(du**2+dv**2).max()
        img = np.zeros((h, w, 3), dtype=np.float64)
        # angle layer
        img[:, :, 0] = np.fmod(np.arctan2(dv, du) / (2 * np.pi)+1.,1.)
        # magnitude layer, normalized to 1
        img[:, :, 1] = np.sqrt(du * du + dv * dv) * 8 / max_flow
        # phase layer
        img[:, :, 2] = 8 - img[:, :, 1]
        # clip to [0,1]
        small_idx = img[:, :, 0:3] < 0
        large_idx = img[:, :, 0:3] > 1
        img[small_idx] = 0
        img[large_idx] = 1
        # convert to rgb
        img = cl.hsv_to_rgb(img)
        # remove invalid point
        img[:, :, 0] = img[:, :, 0] * valid
        img[:, :, 1] = img[:, :, 1] * valid
        img[:, :, 2] = img[:, :, 2] * valid

    return img


def read_flow(filename):
    """
    read optical flow data from flow file
    :param filename: name of the flow file
    :return: optical flow data in numpy array
    """
    if filename.endswith('.flo'):
        flow = read_flo_file(filename)
    elif filename.endswith('.png'):
        flow = read_png_file(filename)
    elif filename.endswith('.pfm'):
        flow = read_pfm_file(filename)
    else:
        raise Exception('Invalid flow file format!')

    return flow

import numpy as np
import os


def write_flo(flow, filename):

    TAG_STRING = b'PIEH'
    assert type(filename) is str, "file is not str %r" % str(filename)
    assert filename[-4:] == '.flo', "file ending is not .flo %r" % file[-4:]

    height, width, nBands = flow.shape
    assert nBands == 2, "Number of bands = %r != 2" % nBands
    u = flow[: , : , 0]
    v = flow[: , : , 1] 
    assert u.shape == v.shape, "Invalid flow shape"
    height, width = u.shape

    f = open(filename,'wb')
    f.write(TAG_STRING)
    np.array(width).astype(np.int32).tofile(f)
    np.array(height).astype(np.int32).tofile(f)
    tmp = np.zeros((height, width*nBands))
    tmp[:,np.arange(width)*2] = u
    tmp[:,np.arange(width)*2 + 1] = v
    tmp.astype(np.float32).tofile(f)

    f.close()


def write_flow(flow, filename):
    """
    write optical flow in Middlebury .flo format
    :param flow: optical flow map
    :param filename: optical flow file path to be saved
    :return: None
    """
    f = open(filename, 'wb')
    magic = np.array([202021.25], dtype=np.float32)
    (height, width) = flow.shape[0:2]
    w = np.array([width], dtype=np.int32)
    h = np.array([height], dtype=np.int32)
    magic.tofile(f)
    w.tofile(f)
    h.tofile(f)
    flow.tofile(f)
    f.close()


def save_flow_image(flow, image_file):
    """
    save flow visualization into image file
    :param flow: optical flow data
    :param flow_fil
    :return: None
    """
    flow_img = flow_to_image(flow)
    img_out = Image.fromarray(flow_img)
    img_out.save(image_file)


def flowfile_to_imagefile(flow_file, image_file):
    """
    convert flowfile into image file
    :param flow: optical flow data
    :param flow_fil
    :return: None
    """
    flow = read_flow(flow_file)
    save_flow_image(flow, image_file)


def segment_flow(flow):
    h = flow.shape[0]
    w = flow.shape[1]
    u = flow[:, :, 0]
    v = flow[:, :, 1]

    idx = ((abs(u) > LARGEFLOW) | (abs(v) > LARGEFLOW))
    idx2 = (abs(u) == SMALLFLOW)
    class0 = (v == 0) & (u == 0)
    u[idx2] = 0.00001
    tan_value = v / u

    class1 = (tan_value < 1) & (tan_value >= 0) & (u > 0) & (v >= 0)
    class2 = (tan_value >= 1) & (u >= 0) & (v >= 0)
    class3 = (tan_value < -1) & (u <= 0) & (v >= 0)
    class4 = (tan_value < 0) & (tan_value >= -1) & (u < 0) & (v >= 0)
    class8 = (tan_value >= -1) & (tan_value < 0) & (u > 0) & (v <= 0)
    class7 = (tan_value < -1) & (u >= 0) & (v <= 0)
    class6 = (tan_value >= 1) & (u <= 0) & (v <= 0)
    class5 = (tan_value >= 0) & (tan_value < 1) & (u < 0) & (v <= 0)

    seg = np.zeros((h, w))

    seg[class1] = 1
    seg[class2] = 2
    seg[class3] = 3
    seg[class4] = 4
    seg[class5] = 5
    seg[class6] = 6
    seg[class7] = 7
    seg[class8] = 8
    seg[class0] = 0
    seg[idx] = 0

    return seg


def flow_error(tu, tv, u, v):
    """
    Calculate average end point error
    :param tu: ground-truth horizontal flow map
    :param tv: ground-truth vertical flow map
    :param u:  estimated horizontal flow map
    :param v:  estimated vertical flow map
    :return: End point error of the estimated flow
    """
    smallflow = 0.0
    '''
    stu = tu[bord+1:end-bord,bord+1:end-bord]
    stv = tv[bord+1:end-bord,bord+1:end-bord]
    su = u[bord+1:end-bord,bord+1:end-bord]
    sv = v[bord+1:end-bord,bord+1:end-bord]
    '''
    stu = tu[:]
    stv = tv[:]
    su = u[:]
    sv = v[:]

    idxUnknow = (abs(stu) > UNKNOWN_FLOW_THRESH) | (abs(stv) > UNKNOWN_FLOW_THRESH)
    stu[idxUnknow] = 0
    stv[idxUnknow] = 0
    su[idxUnknow] = 0
    sv[idxUnknow] = 0

    ind2 = [(np.absolute(stu) > smallflow) | (np.absolute(stv) > smallflow)]
    index_su = su[ind2]
    index_sv = sv[ind2]
    an = 1.0 / np.sqrt(index_su ** 2 + index_sv ** 2 + 1)
    un = index_su * an
    vn = index_sv * an

    index_stu = stu[ind2]
    index_stv = stv[ind2]
    tn = 1.0 / np.sqrt(index_stu ** 2 + index_stv ** 2 + 1)
    tun = index_stu * tn
    tvn = index_stv * tn

    '''
    angle = un * tun + vn * tvn + (an * tn)
    index = [angle == 1.0]
    angle[index] = 0.999
    ang = np.arccos(angle)
    mang = np.mean(ang)
    mang = mang * 180 / np.pi
    '''

    epe = np.sqrt((stu - su) ** 2 + (stv - sv) ** 2)
    epe = epe[ind2]
    mepe = np.mean(epe)
    return mepe


def flow_to_image(flow):
    """
    Convert flow into middlebury color code image
    :param flow: optical flow map
    :return: optical flow image in middlebury color
    """
    u = flow[:, :, 0]
    v = flow[:, :, 1]

    maxu = -999.
    maxv = -999.
    minu = 999.
    minv = 999.

    idxUnknow = (abs(u) > UNKNOWN_FLOW_THRESH) | (abs(v) > UNKNOWN_FLOW_THRESH)
    u[idxUnknow] = 0
    v[idxUnknow] = 0

    maxu = max(maxu, np.max(u))
    minu = min(minu, np.min(u))

    maxv = max(maxv, np.max(v))
    minv = min(minv, np.min(v))

    rad = np.sqrt(u ** 2 + v ** 2)
    maxrad = max(-1, np.max(rad))

    u = u/(maxrad + np.finfo(float).eps)
    v = v/(maxrad + np.finfo(float).eps)

    img = compute_color(u, v)

    idx = np.repeat(idxUnknow[:, :, np.newaxis], 3, axis=2)
    img[idx] = 0

    return np.uint8(img)


def evaluate_flow_file(gt_file, pred_file):
    """
    evaluate the estimated optical flow end point error according to ground truth provided
    :param gt_file: ground truth file path
    :param pred_file: estimated optical flow file path
    :return: end point error, float32
    """
    # Read flow files and calculate the errors
    gt_flow = read_flow(gt_file)        # ground truth flow
    eva_flow = read_flow(pred_file)     # predicted flow
    # Calculate errors
    average_pe = flow_error(gt_flow[:, :, 0], gt_flow[:, :, 1], eva_flow[:, :, 0], eva_flow[:, :, 1])
    return average_pe


def evaluate_flow(gt_flow, pred_flow):
    """
    gt: ground-truth flow
    pred: estimated flow
    """
    average_pe = flow_error(gt_flow[:, :, 0], gt_flow[:, :, 1], pred_flow[:, :, 0], pred_flow[:, :, 1])
    return average_pe


"""
==============
Disparity Section
==============
"""


def read_disp_png(file_name):
    """
    Read optical flow from KITTI .png file
    :param file_name: name of the flow file
    :return: optical flow data in matrix
    """
    image_object = png.Reader(filename=file_name)
    image_direct = image_object.asDirect()
    image_data = list(image_direct[2])
    (w, h) = image_direct[3]['size']
    channel = len(image_data[0]) / w
    flow = np.zeros((h, w, channel), dtype=np.uint16)
    for i in range(len(image_data)):
        for j in range(channel):
            flow[i, :, j] = image_data[i][j::channel]
    return flow[:, :, 0] / 256


def disp_to_flowfile(disp, filename):
    """
    Read KITTI disparity file in png format
    :param disp: disparity matrix
    :param filename: the flow file name to save
    :return: None
    """
    f = open(filename, 'wb')
    magic = np.array([202021.25], dtype=np.float32)
    (height, width) = disp.shape[0:2]
    w = np.array([width], dtype=np.int32)
    h = np.array([height], dtype=np.int32)
    empty_map = np.zeros((height, width), dtype=np.float32)
    data = np.dstack((disp, empty_map))
    magic.tofile(f)
    w.tofile(f)
    h.tofile(f)
    data.tofile(f)
    f.close()


"""
==============
Image Section
==============
"""


def read_image(filename):
    """
    Read normal image of any format
    :param filename: name of the image file
    :return: image data in matrix uint8 type
    """
    img = Image.open(filename)
    im = np.array(img)
    return im


def warp_image(im, flow):
    """
    Use optical flow to warp image to the next
    :param im: image to warp
    :param flow: optical flow
    :return: warped image
    """
    from scipy import interpolate
    image_height = im.shape[0]
    image_width = im.shape[1]
    flow_height = flow.shape[0]
    flow_width = flow.shape[1]
    n = image_height * image_width
    (iy, ix) = np.mgrid[0:image_height, 0:image_width]
    (fy, fx) = np.mgrid[0:flow_height, 0:flow_width]
    fx = fx.astype(np.float64)
    fy = fy.astype(np.float64)
    fx += flow[:,:,0]
    fy += flow[:,:,1]
    mask = np.logical_or(fx <0 , fx > flow_width)
    mask = np.logical_or(mask, fy < 0)
    mask = np.logical_or(mask, fy > flow_height)
    fx = np.minimum(np.maximum(fx, 0), flow_width)
    fy = np.minimum(np.maximum(fy, 0), flow_height)
    points = np.concatenate((ix.reshape(n,1), iy.reshape(n,1)), axis=1)
    xi = np.concatenate((fx.reshape(n, 1), fy.reshape(n,1)), axis=1)
    warp = np.zeros((image_height, image_width, im.shape[2]))
    for i in range(im.shape[2]):
        channel = im[:, :, i]
        plt.imshow(channel, cmap='gray')
        values = channel.reshape(n, 1)
        new_channel = interpolate.griddata(points, values, xi, method='cubic')
        new_channel = np.reshape(new_channel, [flow_height, flow_width])
        new_channel[mask] = 1
        warp[:, :, i] = new_channel.astype(np.uint8)

    return warp.astype(np.uint8)


"""
==============
Others
==============
"""

def pfm_to_flo(pfm_file):
    flow_filename = pfm_file[0:pfm_file.find('.pfm')] + '.flo'
    (data, scale) = readPFM(pfm_file)
    flow = data[:, :, 0:2]
    write_flow(flow, flow_filename)


def scale_image(image, new_range):
    """
    Linearly scale the image into desired range
    :param image: input image
    :param new_range: the new range to be aligned
    :return: image normalized in new range
    """
    min_val = np.min(image).astype(np.float32)
    max_val = np.max(image).astype(np.float32)
    min_val_new = np.array(min(new_range), dtype=np.float32)
    max_val_new = np.array(max(new_range), dtype=np.float32)
    scaled_image = (image - min_val) / (max_val - min_val) * (max_val_new - min_val_new) + min_val_new
    return scaled_image.astype(np.uint8)


def compute_color(u, v):
    """
    compute optical flow color map
    :param u: optical flow horizontal map
    :param v: optical flow vertical map
    :return: optical flow in color code
    """
    [h, w] = u.shape
    img = np.zeros([h, w, 3])
    nanIdx = np.isnan(u) | np.isnan(v)
    u[nanIdx] = 0
    v[nanIdx] = 0

    colorwheel = make_color_wheel()
    ncols = np.size(colorwheel, 0)

    rad = np.sqrt(u**2+v**2)

    a = np.arctan2(-v, -u) / np.pi

    fk = (a+1) / 2 * (ncols - 1) + 1

    k0 = np.floor(fk).astype(int)

    k1 = k0 + 1
    k1[k1 == ncols+1] = 1
    f = fk - k0

    for i in range(0, np.size(colorwheel,1)):
        tmp = colorwheel[:, i]
        col0 = tmp[k0-1] / 255
        col1 = tmp[k1-1] / 255
        col = (1-f) * col0 + f * col1

        idx = rad <= 1
        col[idx] = 1-rad[idx]*(1-col[idx])
        notidx = np.logical_not(idx)

        col[notidx] *= 0.75
        img[:, :, i] = np.uint8(np.floor(255 * col*(1-nanIdx)))

    return img


def make_color_wheel():
    """
    Generate color wheel according Middlebury color code
    :return: Color wheel
    """
    RY = 15
    YG = 6
    GC = 4
    CB = 11
    BM = 13
    MR = 6

    ncols = RY + YG + GC + CB + BM + MR

    colorwheel = np.zeros([ncols, 3])

    col = 0

    # RY
    colorwheel[0:RY, 0] = 255
    colorwheel[0:RY, 1] = np.transpose(np.floor(255*np.arange(0, RY) / RY))
    col += RY

    # YG
    colorwheel[col:col+YG, 0] = 255 - np.transpose(np.floor(255*np.arange(0, YG) / YG))
    colorwheel[col:col+YG, 1] = 255
    col += YG

    # GC
    colorwheel[col:col+GC, 1] = 255
    colorwheel[col:col+GC, 2] = np.transpose(np.floor(255*np.arange(0, GC) / GC))
    col += GC

    # CB
    colorwheel[col:col+CB, 1] = 255 - np.transpose(np.floor(255*np.arange(0, CB) / CB))
    colorwheel[col:col+CB, 2] = 255
    col += CB

    # BM
    colorwheel[col:col+BM, 2] = 255
    colorwheel[col:col+BM, 0] = np.transpose(np.floor(255*np.arange(0, BM) / BM))
    col += + BM

    # MR
    colorwheel[col:col+MR, 2] = 255 - np.transpose(np.floor(255 * np.arange(0, MR) / MR))
    colorwheel[col:col+MR, 0] = 255

    return colorwheel


def read_flo_file(filename):
    """
    Read from Middlebury .flo file
    :param flow_file: name of the flow file
    :return: optical flow data in matrix
    """
    f = open(filename, 'rb')
    magic = np.fromfile(f, np.float32, count=1)
    data2d = None

    if 202021.25 != magic:
        print('Magic number incorrect. Invalid .flo file')
    else:
        w = np.fromfile(f, np.int32, count=1)
        h = np.fromfile(f, np.int32, count=1)
        #print("Reading %d x %d flow file in .flo format" % (h, w))
        flow = np.ones((h[0],w[0],3))
        data2d = np.fromfile(f, np.float32, count=2 * w[0] * h[0])
        # reshape data into 3D array (columns, rows, channels)
        data2d = np.resize(data2d, (h[0], w[0], 2))
        flow[:,:,:2] = data2d
    f.close()
    return flow


def read_png_file(flow_file):
    """
    Read from KITTI .png file
    :param flow_file: name of the flow file
    :return: optical flow data in matrix
    """
    flow = cv2.imread(flow_file,-1)[:,:,::-1].astype(np.float64)
 #   flow_object = png.Reader(filename=flow_file)
 #   flow_direct = flow_object.asDirect()
 #   flow_data = list(flow_direct[2])
 #   (w, h) = flow_direct[3]['size']
 #   #print("Reading %d x %d flow file in .png format" % (h, w))
 #   flow = np.zeros((h, w, 3), dtype=np.float64)
 #   for i in range(len(flow_data)):
 #       flow[i, :, 0] = flow_data[i][0::3]
 #       flow[i, :, 1] = flow_data[i][1::3]
 #       flow[i, :, 2] = flow_data[i][2::3]

    invalid_idx = (flow[:, :, 2] == 0)
    flow[:, :, 0:2] = (flow[:, :, 0:2] - 2 ** 15) / 64.0
    flow[invalid_idx, 0] = 0
    flow[invalid_idx, 1] = 0
    return flow


def read_pfm_file(flow_file):
    """
    Read from .pfm file
    :param flow_file: name of the flow file
    :return: optical flow data in matrix
    """
    (data, scale) = readPFM(flow_file)
    return data 


# fast resample layer
def resample(img, sz):
    """
    img: flow map to be resampled
    sz: new flow map size. Must be [height,weight]
    """
    original_image_size = img.shape
    in_height = img.shape[0]
    in_width = img.shape[1]
    out_height = sz[0]
    out_width = sz[1]
    out_flow = np.zeros((out_height, out_width, 2))
    # find scale
    height_scale =  float(in_height) / float(out_height)
    width_scale =  float(in_width) / float(out_width)

    [x,y] = np.meshgrid(range(out_width), range(out_height))
    xx = x * width_scale
    yy = y * height_scale
    x0 = np.floor(xx).astype(np.int32)
    x1 = x0 + 1
    y0 = np.floor(yy).astype(np.int32)
    y1 = y0 + 1

    x0 = np.clip(x0,0,in_width-1)
    x1 = np.clip(x1,0,in_width-1)
    y0 = np.clip(y0,0,in_height-1)
    y1 = np.clip(y1,0,in_height-1)

    Ia = img[y0,x0,:]
    Ib = img[y1,x0,:]
    Ic = img[y0,x1,:]
    Id = img[y1,x1,:]

    wa = (y1-yy) * (x1-xx)
    wb = (yy-y0) * (x1-xx)
    wc = (y1-yy) * (xx-x0)
    wd = (yy-y0) * (xx-x0)
    out_flow[:,:,0] = (Ia[:,:,0]*wa + Ib[:,:,0]*wb + Ic[:,:,0]*wc + Id[:,:,0]*wd) * out_width / in_width
    out_flow[:,:,1] = (Ia[:,:,1]*wa + Ib[:,:,1]*wb + Ic[:,:,1]*wc + Id[:,:,1]*wd) * out_height / in_height

    return out_flow



================================================
FILE: Datasets/segmask_gt.py
================================================
"""
# ==============================
# segmask_gt.py
# library to generate groundtruth 
# segmentation mask given flow and
# disparity change
# (Adapted from code for rigidmask)
# Author: Shihao Shen
# Date: 14th Sep 2022
# ==============================
"""

import argparse
import os
import os.path
import glob
import numpy as np
import cv2
from PIL import Image
from flowlib import read_flow, readPFM, flow_to_image

def dataloader(filepath, fpass='frames_cleanpass', level=6):
    iml0 = []
    iml1 = []
    flowl0 = []
    disp0 = []
    dispc = []
    calib = []
    level_stars = '/*'*level
    candidate_pool = glob.glob('%s/optical_flow%s'%(filepath,level_stars))
    for flow_path in sorted(candidate_pool):
        # if 'TEST' in flow_path: continue
        if 'flower_storm_x2/into_future/right/OpticalFlowIntoFuture_0023_R.pfm' in flow_path:
            print('Skipping %s' % flow_path)
            continue
        if 'flower_storm_x2/into_future/left/OpticalFlowIntoFuture_0023_L.pfm' in flow_path:
            print('Skipping %s' % flow_path)
            continue
        if 'flower_storm_augmented0_x2/into_future/right/OpticalFlowIntoFuture_0023_R.pfm' in flow_path:
            print('Skipping %s' % flow_path)
            continue
        if 'flower_storm_augmented0_x2/into_future/left/OpticalFlowIntoFuture_0023_L.pfm' in flow_path:
            print('Skipping %s' % flow_path)
            continue
        # if 'FlyingThings' in flow_path and '_0014_' in flow_path:
        #     print('Skipping %s' % flow_path)
        #     continue
        # if 'FlyingThings' in flow_path and '_0015_' in flow_path:
        #     print('Skipping %s' % flow_path)
        #     continue
        idd = flow_path.split('/')[-1].split('_')[-2]
        if 'into_future' in flow_path:
            idd_p1 = '%04d'%(int(idd)+1)
        else:
            idd_p1 = '%04d'%(int(idd)-1)
        if os.path.exists(flow_path.replace(idd,idd_p1)): 
            d0_path = flow_path.replace('/into_future/','/').replace('/into_past/','/').replace('optical_flow','disparity')
            d0_path = '%s/%s.pfm'%(d0_path.rsplit('/',1)[0],idd)
            dc_path = flow_path.replace('optical_flow','disparity_change')
            dc_path = '%s/%s.pfm'%(dc_path.rsplit('/',1)[0],idd)
            im_path = flow_path.replace('/into_future/','/').replace('/into_past/','/').replace('optical_flow',fpass)
            im0_path = '%s/%s.png'%(im_path.rsplit('/',1)[0],idd)
            im1_path = '%s/%s.png'%(im_path.rsplit('/',1)[0],idd_p1)

            # This will skip any sequence that contains less than 10 poses in camera_data.txt
            with open('%s/camera_data.txt'%(im0_path.replace(fpass,'camera_data').rsplit('/',2)[0]),'r') as f:
               if 'FlyingThings' in flow_path and len(f.readlines())!=40: 
                   print('Skipping %s' % flow_path)
                   continue

            iml0.append(im0_path)
            iml1.append(im1_path)
            flowl0.append(flow_path)
            disp0.append(d0_path)
            dispc.append(dc_path)
            calib.append('%s/camera_data.txt'%(im0_path.replace(fpass,'camera_data').rsplit('/',2)[0]))
    return iml0, iml1, flowl0, disp0, dispc, calib

def default_loader(path):
    return Image.open(path).convert('RGB')

def flow_loader(path):
    if '.pfm' in path:
        data =  readPFM(path)[0]
        data[:,:,2] = 1
        return data
    else:
        return read_flow(path)

def load_exts(cam_file):
    with open(cam_file, 'r') as f:
        lines = f.readlines()

    l_exts = []
    r_exts = []
    for l in lines:
        if 'L ' in l:
            l_exts.append(np.asarray([float(i) for i in l[2:].strip().split(' ')]).reshape(4,4))
        if 'R ' in l:
            r_exts.append(np.asarray([float(i) for i in l[2:].strip().split(' ')]).reshape(4,4))
    return l_exts,r_exts        

def disparity_loader(path):
    if '.png' in path:
        data = Image.open(path)
        data = np.ascontiguousarray(data,dtype=np.float32)/256
        return data
    else:    
        return readPFM(path)[0]

# triangulation
def triangulation(disp, xcoord, ycoord, bl=1, fl = 450, cx = 479.5, cy = 269.5):
    depth = bl*fl / disp # 450px->15mm focal length
    X = (xcoord - cx) * depth / fl
    Y = (ycoord - cy) * depth / fl
    Z = depth
    P = np.concatenate((X[np.newaxis],Y[np.newaxis],Z[np.newaxis]),0).reshape(3,-1)
    P = np.concatenate((P,np.ones((1,P.shape[-1]))),0)
    return P

def exp_loader(index, iml0s, iml1s, flowl0s, disp0s=None, dispcs=None, calibs=None):
    '''
    index: index of the frame in the file lists below
    iml0s: a file list of the first frames
    iml1s: a file list of the second frames
    flowl0s: a file list of the optical w.r.t. iml0s
    disp0s: a file list of the disparity w.r.t. iml0s
    dispcs: a file list of the disparity change w.r.t. disp0s
    calibs: a file list of the camera extrinsics
    '''
    iml0 = iml0s[index]
    iml1 = iml1s[index]
    flowl0 = flowl0s[index]
    
    iml0 = default_loader(iml0)
    iml1 = default_loader(iml1)

    flowl0 = flow_loader(flowl0)
    flowl0[:,:,-1][flowl0[:,:,0] == np.inf] = 0 
    flowl0[:,:,0][~flowl0[:,:,2].astype(bool)] = 0
    flowl0[:,:,1][~flowl0[:,:,2].astype(bool)] = 0
    flowl0 = np.ascontiguousarray(flowl0, dtype=np.float32)
    flowl0[np.isnan(flowl0)] = 1e6
    
    bl = 1
    if '15mm_' in calibs[index]: 
        fl = 450
    else:
        fl = 1050
    cx = 479.5
    cy = 269.5
    intr = [[fl],[cx],[cy],[bl],[1],[0],[0],[1],[0],[0]]

    d1 = np.abs(disparity_loader(disp0s[index]))
    d2 = np.abs(disparity_loader(dispcs[index]) + d1)
    
    flowl0[:,:,2] = np.logical_and(np.logical_and(flowl0[:,:,2] == 1, d1 != 0), d2 != 0).astype(float)
    
    shape = d1.shape
    mesh = np.meshgrid(range(shape[1]), range(shape[0]))
    xcoord = mesh[0].astype(float)
    ycoord = mesh[1].astype(float)

    # triangulation in two frames
    P0 = triangulation(d1, xcoord, ycoord, bl=bl, fl=fl, cx=cx, cy=cy)
    P1 = triangulation(d2, xcoord + flowl0[:,:,0], ycoord + flowl0[:,:,1], bl=bl, fl=fl, cx=cx, cy=cy)
    depth0 = P0[2]
    depth1 = P1[2]

    depth0 = depth0.reshape(shape).astype(np.float32)
    flow3d = (P1-P0)[:3].reshape((3,)+shape).transpose((1,2,0))

    fid = int(flowl0s[index].split('/')[-1].split('_')[1])
    with open(calibs[index], 'r') as f:
        fid = fid - int(f.readline().split(' ')[-1])
    l_exts, r_exts= load_exts(calibs[index])
    if '/right/' in iml0s[index]:
        exts = r_exts
    else:
        exts = l_exts

    if '/into_future/' in flowl0s[index]:
        if (fid + 1) > len(exts) - 1: print(flowl0s[index])
        if (fid) > len(exts) - 1: print(flowl0s[index])
        ext1 = exts[fid+1]
        ext0 = exts[fid]
    else:
        if (fid - 1) > len(exts) - 1: print(flowl0s[index])
        if (fid) > len(exts) - 1: print(flowl0s[index])
        ext1 = exts[fid-1]
        ext0 = exts[fid]
    camT = np.eye(4); camT[1,1] = -1; camT[2,2] = -1  # Sceneflow uses Blender's coordinate system
    RT01 = camT.dot(np.linalg.inv(ext0)).dot(ext1).dot(camT)  # ext is from camera space to world space
    
    rect_flow3d = (RT01[:3,:3].dot(P1[:3])-P0[:3]).reshape((3,)+shape).transpose((1,2,0))  # rectified scene flow

    depthflow = np.concatenate((depth0[:,:,np.newaxis], rect_flow3d, flow3d), 2)
    RT01 = np.concatenate((cv2.Rodrigues(RT01[:3,:3])[0][:,0], RT01[:3,-1])).astype(np.float32)

    # object mask
    fnum = int(iml0s[index].split('/')[-1].split('.png')[0])
    obj_fname = '%s/%04d.pfm'%(flowl0s[index].replace('/optical_flow','object_index').replace('into_past/','/').replace('into_future/','/').rsplit('/',1)[0],fnum)
    obj_idx = disparity_loader(obj_fname)
    
    depthflow = np.concatenate((depthflow, obj_idx[:,:,np.newaxis]), 2)
    # depthflow dimension: H x W x 8 (depth=1 + rectified_flow3d=3 + flow3d=3 + object_segmentation=1)

    iml1 = np.asarray(iml1)
    iml0 = np.asarray(iml0)
    
    return iml0, iml1, flowl0, depthflow, intr, RT01


def motionmask(flowl0, depthflow, RT01):
    '''
    flowl0: optical flow. [H, W, 3]
    depthflow: a concatenation of depth, rectified scene flow, scene flow, and object segmentation. [H, W, 8]
    RT01: camera motion from the future frame to the current frame. [6, ]
    '''
    valid_mask = (flowl0[:,:,2] == 1) & (depthflow[:,:,0] < 100) & (depthflow[:,:,0] > 0.01)  # valid flow & valid depth
    Tglobal_gt = -RT01[3:, np.newaxis, np.newaxis]  # background translation
    Tlocal_gt = depthflow[:,:,1:4].transpose(2, 0, 1)   # point translation (after removing rotation)
    m3d_gt = np.linalg.norm(Tlocal_gt - Tglobal_gt, 2, 0)       # abs. motion
    fgmask_gt = m3d_gt * 100 > 1
    fgmask_gt[~valid_mask] = False

    return fgmask_gt


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='segmask_gt_generation')
    parser.add_argument('--database',
                    help='path to the database (required)')
    parser.add_argument('--debug', action='store_true', default=False,
                    help='generate visualization')
    parser.add_argument('--frames_pass', default='frames_cleanpass', 
                    help='which pass to use, either clean or final')
    parser.add_argument('--dataset', 
                    help='choose from FlyingThings3D, Driving, Monkaa')
    args = parser.parse_args()

    if args.debug:
        os.makedirs('%s/%s/results_viz' % (args.database, args.dataset), exist_ok=True)
    
    if args.dataset == 'Monkaa':
        level = 4
    else:
        level = 6
    iml0s, iml1s, flowl0s, disp0s, dispcs, calibs = dataloader('%s/%s/' % (args.database, args.dataset), 
                                                                level=level, fpass=args.frames_pass)
    
    print("Generating %s masks..." % len(flowl0s))
    for i in range(len(iml0s)):
        idd = flowl0s[i].split('/')[-1].split('_')[-2]
        mask_fn = '%s/%s.npy' % (os.path.dirname(flowl0s[i]).replace('optical_flow', 'rigidmask'), idd)
        if os.path.exists(mask_fn):
            print(i)
            continue
        os.makedirs(os.path.dirname(mask_fn), exist_ok=True)

        iml0, iml1, flowl0, depthflow, intr, RT01 = exp_loader(i, iml0s, iml1s, flowl0s, disp0s, dispcs, calibs)
        fgmask = motionmask(flowl0, depthflow, RT01)
        np.save(mask_fn, fgmask)

        if args.debug:
            if args.dataset == 'Driving' and 'rigidmask/15mm_focallength/scene_forwards/fast/left' not in mask_fn:
                continue
            elif args.dataset == 'Monkaa' and 'rigidmask/eating_camera2_x2/left' not in mask_fn:
                continue
            elif args.dataset == 'FlyingThings3D' and not ('rigidmask/TEST/A' in mask_fn and 'into_future/left' in mask_fn):
                continue
            print("Visualizing %s" % mask_fn)
            flowl0viz = flow_to_image(flowl0)
            maskviz = np.stack((fgmask * 255.0, )*3, axis=-1).astype(np.uint8)
            inputs = np.concatenate([iml0, flowl0viz, maskviz], axis=1)
            cv2.imwrite('%s/%s/results_viz/%s.png' % (args.database, args.dataset, str(i).zfill(5)), cv2.cvtColor(inputs, cv2.COLOR_RGB2BGR))

================================================
FILE: Datasets/tartanTrajFlowDataset.py
================================================
"""
# ==============================
# tartanTrajFlowDataset.py
# library for DytanVO data I/O
# Author: Wenshan Wang, Shihao Shen
# Date: 3rd Jan 2023
# ==============================
"""
import numpy as np
import cv2
from torch.utils.data import Dataset, DataLoader
from os import listdir
from evaluator.transformation import pos_quats2SEs, pose2motion, SEs2ses
from .utils import make_intrinsics_layer

class TrajFolderDataset(Dataset):
    """scene flow synthetic dataset. """

    def __init__(self, imgfolder, transform = None, 
                    focalx = 320.0, focaly = 320.0, centerx = 320.0, centery = 240.0):
        
        files = listdir(imgfolder)
        self.rgbfiles = [(imgfolder +'/'+ ff) for ff in files if (ff.endswith('.png') or ff.endswith('.jpg'))]
        self.rgbfiles.sort()
        self.imgfolder = imgfolder

        print('Find {} image files in {}'.format(len(self.rgbfiles), imgfolder))

        self.N = len(self.rgbfiles) - 1

        # self.N = len(self.lines)
        self.transform = transform
        self.focalx = focalx
        self.focaly = focaly
        self.centerx = centerx
        self.centery = centery

    def __len__(self):
        return self.N

    def __getitem__(self, idx):
        imgfile1 = self.rgbfiles[idx].strip()
        imgfile2 = self.rgbfiles[idx+1].strip()
        img1 = cv2.imread(imgfile1)
        img2 = cv2.imread(imgfile2)

        res = {'img1': img1, 'img2': img2}

        h, w, _ = img1.shape
        intrinsicLayer = make_intrinsics_layer(w, h, self.focalx, self.focaly, self.centerx, self.centery)
        res['intrinsic'] = intrinsicLayer

        if self.transform:
            res = self.transform(res)

        res['img1_raw'] = img1
        res['img2_raw'] = img2

        return res

================================================
FILE: Datasets/util_flow.py
================================================
"""
# ==============================
# util_flow.py
# library for optical flow processing
# Author: Gengshan Yang
# Date: 10th Feb 2021
# ==============================
"""
import math
import png
import struct
import array
import numpy as np
import cv2
import pdb

from io import *

UNKNOWN_FLOW_THRESH = 1e9;
UNKNOWN_FLOW = 1e10;

# Middlebury checks
TAG_STRING = 'PIEH'    # use this when WRITING the file
TAG_FLOAT = 202021.25  # check for this when READING the file

def readPFM(file):
    import re
    file = open(file, 'rb')

    color = None
    width = None
    height = None
    scale = None
    endian = None

    header = file.readline().rstrip()
    if header == b'PF':
        color = True
    elif header == b'Pf':
        color = False
    else:
        raise Exception('Not a PFM file.')

    dim_match = re.match(b'^(\d+)\s(\d+)\s$', file.readline())
    if dim_match:
        width, height = map(int, dim_match.groups())
    else:
        raise Exception('Malformed PFM header.')

    scale = float(file.readline().rstrip())
    if scale < 0: # little-endian
        endian = '<'
        scale = -scale
    else:
        endian = '>' # big-endian

    data = np.fromfile(file, endian + 'f')
    shape = (height, width, 3) if color else (height, width)

    data = np.reshape(data, shape)
    data = np.flipud(data)
    return data, scale


def save_pfm(file, image, scale = 1):
  import sys
  color = None

  if image.dtype.name != 'float32':
    raise Exception('Image dtype must be float32.')

  if len(image.shape) == 3 and image.shape[2] == 3: # color image
    color = True
  elif len(image.shape) == 2 or len(image.shape) == 3 and image.shape[2] == 1: # greyscale
    color = False
  else:
    raise Exception('Image must have H x W x 3, H x W x 1 or H x W dimensions.')

  file.write('PF\n' if color else 'Pf\n')
  file.write('%d %d\n' % (image.shape[1], image.shape[0]))

  endian = image.dtype.byteorder

  if endian == '<' or endian == '=' and sys.byteorder == 'little':
    scale = -scale

  file.write('%f\n' % scale)

  image.tofile(file)


def ReadMiddleburyFloFile(path):
    """ Read .FLO file as specified by Middlebury.

    Returns tuple (width, height, u, v, mask), where u, v, mask are flat
    arrays of values.
    """

    with open(path, 'rb') as fil:
        tag = struct.unpack('f', fil.read(4))[0]
        width = struct.unpack('i', fil.read(4))[0]
        height = struct.unpack('i', fil.read(4))[0]

        assert tag == TAG_FLOAT
        
        #data = np.fromfile(path, dtype=np.float, count=-1)
        #data = data[3:]

        fmt = 'f' * width*height*2
        data = struct.unpack(fmt, fil.read(4*width*height*2))

        u = data[::2]
        v = data[1::2]

        mask = map(lambda x,y: abs(x)<UNKNOWN_FLOW_THRESH and abs(y) < UNKNOWN_FLOW_THRESH, u, v)
        mask = list(mask)
        u_masked = map(lambda x,y: x if y else 0, u, mask)
        v_masked = map(lambda x,y: x if y else 0, v, mask)

    return width, height, list(u_masked), list(v_masked), list(mask)

def ReadKittiPngFile(path):
    """ Read 16-bit .PNG file as specified by KITTI-2015 (flow).

    Returns a tuple, (width, height, u, v, mask), where u, v, mask
    are flat arrays of values.
    """
    # Read .png file.
    png_reader = png.Reader(path)
    data = png_reader.read()
    if data[3]['bitdepth'] != 16:
        raise Exception('bitdepth of ' + path + ' is not 16')

    width = data[0]
    height = data[1]

    # Get list of rows.
    rows = list(data[2])

    u = array.array('f', [0]) * width*height
    v = array.array('f', [0]) * width*height
    mask = array.array('f', [0]) * width*height

    for y, row in enumerate(rows):
        for x in range(width):
            ind = width*y+x
            u[ind] = (row[3*x] - 2**15) / 64.0
            v[ind] = (row[3*x+1] - 2**15) / 64.0
            mask[ind] = row[3*x+2]

            # if mask[ind] > 0:
            #     print(u[ind], v[ind], mask[ind], row[3*x], row[3*x+1], row[3*x+2])

    #png_reader.close()

    return (width, height, u, v, mask)


def WriteMiddleburyFloFile(path, width, height, u, v, mask=None):
    """ Write .FLO file as specified by Middlebury.
    """

    if mask is not None:
        u_masked = map(lambda x,y: x if y else UNKNOWN_FLOW, u, mask)
        v_masked = map(lambda x,y: x if y else UNKNOWN_FLOW, v, mask)
    else:
        u_masked = u
        v_masked = v

    fmt = 'f' * width*height*2
    # Interleave lists
    data = [x for t in zip(u_masked,v_masked) for x in t]

    with open(path, 'wb') as fil:
        fil.write(str.encode(TAG_STRING))
        fil.write(struct.pack('i', width))
        fil.write(struct.pack('i', height))
        fil.write(struct.pack(fmt, *data))


def write_flow(path,flow):
    
    invalid_idx = (flow[:, :, 2] == 0)
    flow[:, :, 0:2] = flow[:, :, 0:2]*64.+ 2 ** 15
    flow[invalid_idx, 0] = 0
    flow[invalid_idx, 1] = 0

    flow = flow.astype(np.uint16)
    flow = cv2.imwrite(path, flow[:,:,::-1])

    #WriteKittiPngFile(path,
    #     flow.shape[1], flow.shape[0], flow[:,:,0].flatten(), 
    #    flow[:,:,1].flatten(), flow[:,:,2].flatten())
    


def WriteKittiPngFile(path, width, height, u, v, mask=None):
    """ Write 16-bit .PNG file as specified by KITTI-2015 (flow).

    u, v are lists of float values
    mask is a list of floats, denoting the *valid* pixels.
    """

    data = array.array('H',[0])*width*height*3

    for i,(u_,v_,mask_) in enumerate(zip(u,v,mask)):
        data[3*i] = int(u_*64.0+2**15)
        data[3*i+1] = int(v_*64.0+2**15)
        data[3*i+2] = int(mask_)

        # if mask_ > 0:
        #     print(data[3*i], data[3*i+1],data[3*i+2])

    with open(path, 'wb') as png_file:
        png_writer = png.Writer(width=width, height=height, bitdepth=16, compression=3, greyscale=False)
        png_writer.write_array(png_file, data)


def ConvertMiddleburyFloToKittiPng(src_path, dest_path):
    width, height, u, v, mask = ReadMiddleburyFloFile(src_path)
    WriteKittiPngFile(dest_path, width, height, u, v, mask=mask)

def ConvertKittiPngToMiddleburyFlo(src_path, dest_path):
    width, height, u, v, mask = ReadKittiPngFile(src_path)
    WriteMiddleburyFloFile(dest_path, width, height, u, v, mask=mask)


def ParseFilenameKitti(filename):
    # Parse kitti filename (seq_frameno.xx),
    # return seq, frameno, ext.
    # Be aware that seq might contain the dataset name (if contained as prefix)
    ext = filename[filename.rfind('.'):]
    frameno = filename[filename.rfind('_')+1:filename.rfind('.')]
    frameno = int(frameno)
    seq = filename[:filename.rfind('_')]
    return seq, frameno, ext


def read_calib_file(filepath):
    """Read in a calibration file and parse into a dictionary."""
    data = {}

    with open(filepath, 'r') as f:
        for line in f.readlines():
            key, value = line.split(':', 1)
            # The only non-float values in these files are dates, which
            # we don't care about anyway
            try:
                data[key] = np.array([float(x) for x in value.split()])
            except ValueError:
                pass

    return data

def load_calib_cam_to_cam(cam_to_cam_file):
    # We'll return the camera calibration as a dictionary
    data = {}

    # Load and parse the cam-to-cam calibration data
    filedata = read_calib_file(cam_to_cam_file)

    # Create 3x4 projection matrices
    P_rect_00 = np.reshape(filedata['P_rect_00'], (3, 4))
    P_rect_10 = np.reshape(filedata['P_rect_01'], (3, 4))
    P_rect_20 = np.reshape(filedata['P_rect_02'], (3, 4))
    P_rect_30 = np.reshape(filedata['P_rect_03'], (3, 4))

    # Compute the camera intrinsics
    data['K_cam0'] = P_rect_00[0:3, 0:3]
    data['K_cam1'] = P_rect_10[0:3, 0:3]
    data['K_cam2'] = P_rect_20[0:3, 0:3]
    data['K_cam3'] = P_rect_30[0:3, 0:3]

    data['b00'] = P_rect_00[0, 3] / P_rect_00[0, 0]
    data['b10'] = P_rect_10[0, 3] / P_rect_10[0, 0]
    data['b20'] = P_rect_20[0, 3] / P_rect_20[0, 0]
    data['b30'] = P_rect_30[0, 3] / P_rect_30[0, 0]

    return data

================================================
FILE: Datasets/utils.py
================================================
"""
# ==============================
# utils.py
# misc library for DytanVO
# Author: Wenshan Wang, Shihao Shen
# Date: 3rd Jan 2023
# ==============================
"""

from __future__ import division
import torch
import math
import random
import numpy as np
import numbers
import cv2
import matplotlib.pyplot as plt
import os
from scipy.spatial.transform import Rotation as R

if ( not ( "DISPLAY" in os.environ ) ):
    plt.switch_backend('agg')
    print("Environment variable DISPLAY is not present in the system.")
    print("Switch the backend of matplotlib to agg.")

import time
# ===== general functions =====

class Compose(object):
    """Composes several transforms together.

    Args:
        transforms (List[Transform]): list of transforms to compose.

    Example:
        >>> transforms.Compose([
        >>>     transforms.CenterCrop(10),
        >>>     transforms.ToTensor(),
        >>> ])
    """

    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, img):
        for t in self.transforms:
            img = t(img)
        return img


class DownscaleFlow(object):
    """
    Scale the flow and mask to a fixed size

    """
    def __init__(self, scale=4):
        '''
        size: output frame size, this should be NO LARGER than the input frame size! 
        '''
        self.downscale = 1.0/scale

    def __call__(self, sample): 
        if self.downscale!=1 and 'flow' in sample :
            sample['flow'] = cv2.resize(sample['flow'], 
                (0, 0), fx=self.downscale, fy=self.downscale, interpolation=cv2.INTER_LINEAR)

        if self.downscale!=1 and 'intrinsic' in sample :
            sample['intrinsic'] = cv2.resize(sample['intrinsic'], 
                (0, 0), fx=self.downscale, fy=self.downscale, interpolation=cv2.INTER_LINEAR)

        if self.downscale!=1 and 'fmask' in sample :
            sample['fmask'] = cv2.resize(sample['fmask'],
                (0, 0), fx=self.downscale, fy=self.downscale, interpolation=cv2.INTER_LINEAR)
        return sample

class CropCenter(object):
    """Crops a sample of data (tuple) at center
    if the image size is not large enough, it will be first resized with fixed ratio
    """

    def __init__(self, size):
        if isinstance(size, numbers.Number):
            self.size = (int(size), int(size))
        else:
            self.size = size

    def __call__(self, sample):
        kks = list(sample.keys())
        th, tw = self.size
        hh, ww = sample[kks[0]].shape[0], sample[kks[0]].shape[1]
        if ww == tw and hh == th:
            return sample

        # resize the image if the image size is smaller than the target size
        scale_h = max(1, float(th)/hh)
        scale_w = max(1, float(tw)/ww)
        
        if scale_h>1 or scale_w>1:
            w = int(round(ww * scale_w)) # w after resize
            h = int(round(hh * scale_h)) # h after resize
        else:
            w, h = ww, hh

        if scale_h != 1. or scale_w != 1.: # resize the data
            resizedata = ResizeData(size=(h, w))
            sample = resizedata(sample)

        x1 = int((w-tw)/2)
        y1 = int((h-th)/2)

        for kk in kks:
            if sample[kk] is None:
                continue
            img = sample[kk]
            sample[kk] = img[y1:y1+th,x1:x1+tw,...]

        return sample

class ResizeData(object):
    """Resize the data in a dict
    """

    def __init__(self, size):
        if isinstance(size, numbers.Number):
            self.size = (int(size), int(size))
        else:
            self.size = size

    def __call__(self, sample):
        kks = list(sample.keys())
        th, tw = self.size
        h, w = sample[kks[0]].shape[0], sample[kks[0]].shape[1]
        if w == tw and h == th:
            return sample
        scale_w = float(tw)/w
        scale_h = float(th)/h

        for kk in kks:
            if sample[kk] is None:
                continue
            sample[kk] = cv2.resize(sample[kk], (tw,th), interpolation=cv2.INTER_LINEAR)

        if 'flow' in sample:
            sample['flow'][...,0] = sample['flow'][...,0] * scale_w
            sample['flow'][...,1] = sample['flow'][...,1] * scale_h

        return sample

class ToTensor(object):
    def __call__(self, sample):
        kks = list(sample)

        for kk in kks:
            data = sample[kk]
            data = data.astype(np.float32) 
            if len(data.shape) == 3: # transpose image-like data
                data = data.transpose(2,0,1)
            elif len(data.shape) == 2:
                data = data.reshape((1,)+data.shape)  # add a dummy channel
                
            if len(data.shape) == 3 and data.shape[0]==3: # normalization of rgb images
                data = data/255.0

            sample[kk] = torch.from_numpy(data.copy()) # copy to make memory continuous

        return sample

def tensor2img(tensImg,mean,std):
    """
    convert a tensor a numpy array, for visualization
    """
    # undo normalize
    for t, m, s in zip(tensImg, mean, std):
        t.mul_(s).add_(m) 
    tensImg = tensImg * float(255)
    # undo transpose
    tensImg = (tensImg.numpy().transpose(1,2,0)).astype(np.uint8)
    return tensImg

def bilinear_interpolate(img, h, w):
    # assert round(h)>=0 and round(h)<img.shape[0]
    # assert round(w)>=0 and round(w)<img.shape[1]

    h0 = int(math.floor(h))
    h1 = h0 + 1
    w0 = int(math.floor(w))
    w1 = w0 + 1

    a = h - h0 
    b = w - w0

    h0 = max(h0, 0)
    w0 = max(w0, 0)
    h1 = min(h1, img.shape[0]-1)
    w1 = min(w1, img.shape[1]-1)

    A = img[h0,w0,:]
    B = img[h1,w0,:]
    C = img[h0,w1,:]
    D = img[h1,w1,:]

    res = (1-a)*(1-b)*A + a*(1-b)*B + (1-a)*b*C + a*b*D

    return res 

def calculate_angle_distance_from_du_dv(du, dv, flagDegree=False):
    a = np.arctan2( dv, du )

    angleShift = np.pi

    if ( True == flagDegree ):
        a = a / np.pi * 180
        angleShift = 180
        # print("Convert angle from radian to degree as demanded by the input file.")

    d = np.sqrt( du * du + dv * dv )

    return a, d, angleShift

def visflow(flownp, maxF=500.0, n=8, mask=None, hueMax=179, angShift=0.0): 
    """
    Show a optical flow field as the KITTI dataset does.
    Some parts of this function is the transform of the original MATLAB code flow_to_color.m.
    """

    ang, mag, _ = calculate_angle_distance_from_du_dv( flownp[:, :, 0], flownp[:, :, 1], flagDegree=False )

    # Use Hue, Saturation, Value colour model 
    hsv = np.zeros( ( ang.shape[0], ang.shape[1], 3 ) , dtype=np.float32)

    am = ang < 0
    ang[am] = ang[am] + np.pi * 2

    hsv[ :, :, 0 ] = np.remainder( ( ang + angShift ) / (2*np.pi), 1 )
    hsv[ :, :, 1 ] = mag / maxF * n
    hsv[ :, :, 2 ] = (n - hsv[:, :, 1])/n

    hsv[:, :, 0] = np.clip( hsv[:, :, 0], 0, 1 ) * hueMax
    hsv[:, :, 1:3] = np.clip( hsv[:, :, 1:3], 0, 1 ) * 255
    hsv = hsv.astype(np.uint8)

    bgr = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)

    if ( mask is not None ):
        mask = mask != 255
        bgr[mask] = np.array([0, 0 ,0], dtype=np.uint8)

    return bgr


def dataset_intrinsics(dataset='tartanair', is_15mm=False):
    if dataset == 'kitti':
        focalx, focaly, centerx, centery = 707.0912, 707.0912, 601.8873, 183.1104
        baseline = None  # to be determined using load_kitti_intrinsics
    elif dataset == 'airdos':
        focalx, focaly, centerx, centery = 772.54834, 772.54834, 320.0, 180.0
        baseline = 1
    elif dataset == 'rs_d435':
        focalx, focaly, centerx, centery = 384.5080871582031, 384.5080871582031, 316.88897705078125, 240.05723571777344
        baseline = 0.05
    elif dataset == 'sceneflow':
        focalx, focaly, centerx, centery = 1050.0, 1050.0, 479.5, 269.5
        if is_15mm:
            focalx = focaly = 450.0
        baseline = 0.5
    elif dataset == 'tartanair':
        focalx, focaly, centerx, centery = 320.0, 320.0, 320.0, 240.0
        baseline = 1
    elif dataset == 'commaai':
        focalx, focaly, centerx, centery = 910.0, 910.0, 582.0, 437.0
        baseline = 1
    else:
        return None
    return focalx, focaly, centerx, centery, baseline



def plot_traj(gtposes, estposes, vis=False, savefigname=None, title=''):
    fig = plt.figure(figsize=(4,4))
    cm = plt.cm.get_cmap('Spectral')

    plt.subplot(111)
    plt.plot(gtposes[:,0],gtposes[:,1], linestyle='dashed',c='k')
    plt.plot(estposes[:, 0], estposes[:, 1],c='#ff7f0e')
    plt.xlabel('x (m)')
    plt.ylabel('y (m)')
    plt.legend(['Ground Truth','TartanVO'])
    plt.title(title)
    if savefigname is not None:
        plt.savefig(savefigname)
    if vis:
        plt.show()
    plt.close(fig)

def make_intrinsics_layer(w, h, fx, fy, ox, oy):
    ww, hh = np.meshgrid(range(w), range(h))
    ww = (ww.astype(np.float32) - ox + 0.5 )/fx
    hh = (hh.astype(np.float32) - oy + 0.5 )/fy
    intrinsicLayer = np.stack((ww,hh)).transpose(1,2,0)

    return intrinsicLayer

def load_kiiti_intrinsics(filename):
    '''
    load intrinsics from kitti intrinsics file
    '''
    data = {}

    with open(filename, 'r') as f:
        for line in f.readlines():
            key, value = line.split(':', 1)
            # The only non-float values in these files are dates, which
            # we don't care about anyway
            try:
                data[key] = np.array([float(x) for x in value.split()])
            except ValueError:
                pass

    P2 = np.reshape(data['P2'], (3,4))
    P3 = np.reshape(data['P3'], (3,4))
    focalx, focaly, centerx, centery = float(P2[0,0]), float(P2[1,1]), float(P2[0,2]), float(P2[1,2])
    baseline = P2[0,3] / P2[0,0] - P3[0,3] / P3[0,0]

    return focalx, focaly, centerx, centery, baseline

def load_sceneflow_extrinsics(filename):
    with open(filename, 'r') as f:
        lines = f.readlines()

    l_exts = []
    r_exts = []
    for l in lines:
        if 'L ' in l:
            l_exts.append(np.asarray([float(i) for i in l[2:].strip().split(' ')]).reshape(4,4))
        if 'R ' in l:
            r_exts.append(np.asarray([float(i) for i in l[2:].strip().split(' ')]).reshape(4,4))

    if 'into_future' in filename:
        fids = np.arange(0, len(l_exts))
    else:
        fids = np.arange(len(l_exts) - 1, -1, -1)
    
    # assuming left camera is used by default
    camT = np.eye(4); camT[1,1] = -1; camT[2,2] = -1  # Sceneflow uses Blender's coordinate system
    pose_quats = []
    pose = np.eye(4)
    for fid in fids:
        ext0 = l_exts[fid]
        ext1 = l_exts[fid+1] if 'into_future' in filename else l_exts[fid-1]
        motion = camT.dot(np.linalg.inv(ext0)).dot(ext1).dot(camT)  # ext is from camera space to world space
        pose = pose @ motion
        pose_quat = np.zeros(7)
        pose_quat[3:] = R.from_matrix(pose[:3,:3]).as_quat()
        pose_quat[:3] = pose[:3,3]
        pose_quats.append(pose_quat)
    
    return pose_quats

================================================
FILE: DytanVO.py
================================================
# Software License Agreement (BSD License)
#
# Copyright (c) 2020, Shihao Shen, CMU
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above
#    copyright notice, this list of conditions and the following
#    disclaimer in the documentation and/or other materials provided
#    with the distribution.
#  * Neither the name of CMU nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

import cv2
import torch
import torch.nn as nn
import numpy as np
import time

np.set_printoptions(precision=4, suppress=True, threshold=10000)

from torch.autograd import Variable
from Network.VONet import VONet
from Network.rigidmask.VCNplus import SegNet, WarpModule, flow_reg
from Datasets.utils import CropCenter, ResizeData
from Datasets.cowmask import cow_masks
from evaluator.transformation import se2SE

class DytanVO(object):
    def __init__(self, vo_model_name, seg_model_name, image_height, image_width, is_kitti=False, flow_model_name=None, pose_model_name=None):
        # import ipdb;ipdb.set_trace()
        self.vonet = VONet() 

        # load VO model separately (flow + pose) or at once
        if flow_model_name.endswith('.pkl') and pose_model_name.endswith('.pkl'):
            modelname = 'models/' + flow_model_name
            self.load_vo_model(self.vonet.flowNet, modelname)
            modelname = 'models/' + pose_model_name
            self.load_vo_model(self.vonet.flowPoseNet, modelname)
        else:
            modelname = 'models/' + vo_model_name
            self.load_vo_model(self.vonet, modelname)

        self.vonet.cuda()

        self.test_count = 0
        self.pose_norm = np.array([ 0.13,  0.13,  0.13,  0.013 ,  0.013,  0.013], dtype=np.float32) # the output scale factor
        self.flow_norm = 20 # scale factor for flow

        # load the segmentation model
        self.testres = 1.2
        if is_kitti:
            maxw, maxh = [int(self.testres * 1280), int(self.testres * 384)]
        else:
            maxw, maxh = [int(self.testres * 1024), int(self.testres * 448)]
        max_h = int(maxh // 64 * 64)
        max_w = int(maxw // 64 * 64)
        if max_h < maxh: max_h += 64
        if max_w < maxw: max_w += 64
        maxh = max_h
        maxw = max_w
        self.segnet = SegNet([1, maxw, maxh], md=[4, 4, 4, 4, 4], fac=1, exp_unc=not ('kitti' in seg_model_name))
        segmodelname = 'models/' + seg_model_name
        self.segnet = self.load_seg_model(self.segnet, segmodelname)
        
        self.segnet.cuda()

        self.segnet_initialize = False

        # To resize/crop segmentation mask
        self.resizedata = ResizeData(size=(image_height,1226)) if is_kitti else None
        self.cropdata = CropCenter((image_height, image_width))

        # To transform coordinates from NED to Blender
        Ry90 = np.array([[0,0,1,0], [0,1,0,0], [-1,0,0,0], [0,0,0,1]])
        Rx90 = np.array([[1,0,0,0], [0,0,-1,0], [0,1,0,0], [0,0,0,1]])
        self.camT = Rx90.dot(Ry90)

        self.sigmoid = lambda x: 1/(1 + np.exp(-x))

    def load_vo_model(self, model, modelname):
        preTrainDict = torch.load(modelname)
        model_dict = model.state_dict()
        preTrainDictTemp = {k:v for k,v in preTrainDict.items() if k in model_dict}

        if( 0 == len(preTrainDictTemp) ):
            print("Does not find any module to load. Try DataParallel version.")
            for k, v in preTrainDict.items():
                kk = k[7:]
                if ( kk in model_dict ):
                    preTrainDictTemp[kk] = v

        if ( 0 == len(preTrainDictTemp) ):
            raise Exception("Could not load model from %s." % (modelname), "load_model")

        model_dict.update(preTrainDictTemp)
        model.load_state_dict(model_dict)
        print('VO Model %s loaded...' % modelname)
        return model

    def load_seg_model(self, model, modelname):
        model = nn.DataParallel(model, device_ids=[0])
        preTrainDict = torch.load(modelname, map_location='cpu')
        self.mean_L = preTrainDict['mean_L']
        self.mean_R = preTrainDict['mean_R']
        preTrainDict['state_dict'] = {k:v for k,v in preTrainDict['state_dict'].items()}
        model.load_state_dict(preTrainDict['state_dict'], strict=False)
        print('Segmentation Model %s loaded...' % modelname)
        return model

    def test_batch(self, sample, intrinsics, seg_thresh, iter_num):
        print("="*20)
        self.test_count += 1
        
        # import ipdb;ipdb.set_trace()
        img0   = sample['img1'].cuda()
        img1   = sample['img2'].cuda()
        intrinsic = sample['intrinsic'].cuda()  # intrinsic layer

        img0_raw = sample['img1_raw'].detach().numpy().squeeze()
        img1_raw = sample['img2_raw'].detach().numpy().squeeze()

        if not self.segnet_initialize:
            self.vonet.eval()
            self.segnet.eval()
            self.initialize_segnet_input(img0_raw, intrinsics)
            self.segnet_initialize = True
        
        with torch.no_grad():
            imgL_noaug, imgLR = self.transform_segnet_input(img0_raw, img1_raw)
            flowdc = self.segnet.module.forward_VCN(imgLR)
            
            total_time = 0
            start_time = time.time()
            flow_output, _ = self.vonet([img0, img1], only_flow=True)
            flownet_time = time.time() - start_time
            total_time += flownet_time

            print("Flownet time: %.2f" % flownet_time)

            seg_thresholds = np.linspace(seg_thresh, 0.95, iter_num - 1)[::-1]
            for iter in range(iter_num):
                flow = flow_output.clone()
                if iter == 0:
                    cow_sigma_range = (20, 60)
                    log_sigma_range = (np.log(cow_sigma_range[0]), np.log(cow_sigma_range[1]))
                    cow_prop_range = (0.3, 0.6)
                    segmask = cow_masks(flow.shape[-2:], log_sigma_range, cow_sigma_range[1], cow_prop_range).astype(np.float32)
                    segmask = segmask[None,None,...]
                    segmask = torch.from_numpy(np.concatenate((segmask,) * img0.shape[0], axis=0)).cuda()

                start_time = time.time()
                _, pose_output = self.vonet([img0, img1, intrinsic, flow, segmask], only_pose=True)
                posenet_time = time.time() - start_time
                total_time += posenet_time

                print("Iter %d, Posenet time: %.2f; " % (iter, posenet_time), end='')

                # Do not pass segnet in the last iteration
                if iter == iter_num - 1:
                    break

                seg_thresh = seg_thresholds[iter] if iter < iter_num-1 else seg_thresh
                pose_input = pose_output.data.cpu().detach().numpy().squeeze()
                pose_input = pose_input * self.pose_norm
                pose_input = self.camT.T.dot(se2SE(pose_input)).dot(self.camT)
                
                start_time = time.time()
                disc_aux = [self.intr_list, imgL_noaug, pose_input[:3,:]]
                fgmask = self.segnet(imgLR, disc_aux, flowdc)
                segnet_time = time.time() - start_time
                total_time += segnet_time
                
                fgmask = cv2.resize(fgmask.cpu().numpy(), (self.input_size[1], self.input_size[0]), interpolation=cv2.INTER_LINEAR).astype(np.float32)
                fg_probs = self.sigmoid(fgmask)
                segmask = np.zeros(fgmask.shape[:2])
                segmask[fg_probs > seg_thresh] = 1.0

                # Resize/Crop segmask (Resize + Crop + Downscale 1/4)
                dummysample = {'segmask': segmask}
                if self.resizedata is not None:
                    dummysample = self.resizedata(dummysample)
                dummysample = self.cropdata(dummysample)
                segmask = dummysample['segmask']
                segmask = cv2.resize(segmask, (0,0), fx=0.25, fy=0.25, interpolation=cv2.INTER_LINEAR)
                segmask = segmask[None,None,...].astype(np.float32)
                segmask = torch.from_numpy(np.concatenate((segmask,) * img0.shape[0], axis=0)).cuda()

                print("Segnet time: %.2f" % segnet_time)
            
            posenp = pose_output.data.cpu().detach().numpy().squeeze()
            posenp = posenp * self.pose_norm  # The output is normalized during training, now scale it back
            flownp = flow.data.cpu().detach().numpy().squeeze()
            flownp = flownp * self.flow_norm

        # # calculate scale from GT posefile
        # if 'motion' in sample:
        #     motions_gt = sample['motion']
        #     scale = np.linalg.norm(motions_gt[:,:3], axis=1)
        #     trans_est = posenp[:,:3]
        #     trans_est = trans_est/np.linalg.norm(trans_est,axis=1).reshape(-1,1)*scale.reshape(-1,1)
        #     posenp[:,:3] = trans_est 
        # else:
        #     print('    scale is not given, using 1 as the default scale value..')

        print("\n{} Pose inference using {}s: \n{}\n".format(self.test_count, total_time, posenp))

        return posenp, flownp

    def initialize_segnet_input(self, imgL_o, intrinsics):
        maxh = imgL_o.shape[0] * self.testres
        maxw = imgL_o.shape[1] * self.testres
        self.max_h = int(maxh // 64 * 64)
        self.max_w = int(maxw // 64 * 64)
        if self.max_h < maxh: self.max_h += 64
        if self.max_w < maxw: self.max_w += 64
        self.input_size = imgL_o.shape

        # modify module according to inputs
        for i in range(len(self.segnet.module.reg_modules)):
            self.segnet.module.reg_modules[i] = flow_reg([1, self.max_w//(2**(6-i)), self.max_h//(2**(6-i))], 
                            ent=getattr(self.segnet.module, 'flow_reg%d'%2**(6-i)).ent,\
                            maxdisp=getattr(self.segnet.module, 'flow_reg%d'%2**(6-i)).md,\
                            fac=getattr(self.segnet.module, 'flow_reg%d'%2**(6-i)).fac).cuda()
        for i in range(len(self.segnet.module.warp_modules)):
            self.segnet.module.warp_modules[i] = WarpModule([1, self.max_w//(2**(6-i)), self.max_h//(2**(6-i))]).cuda()

        # foramt intrinsics input
        fl, cx, cy, bl = intrinsics
        fl_next = fl  # assuming focal length remains the same across frames
        self.intr_list = [torch.Tensor(inxx).cuda() for inxx in [[fl],[cx],[cy],[bl],[1],[0],[0],[1],[0],[0]]]
        self.intr_list.append(torch.Tensor([self.input_size[1] / self.max_w]).cuda()) # delta fx
        self.intr_list.append(torch.Tensor([self.input_size[0] / self.max_h]).cuda()) # delta fy
        self.intr_list.append(torch.Tensor([fl_next]).cuda())

    def transform_segnet_input(self, imgL_o, imgR_o):
        imgL = cv2.resize(imgL_o, (self.max_w, self.max_h))
        imgR = cv2.resize(imgR_o, (self.max_w, self.max_h))
        imgL_noaug = torch.Tensor(imgL / 255.)[np.newaxis].float().cuda()
        
        # flip channel, subtract mean
        imgL = imgL[:,:,::-1].copy() / 255. - np.asarray(self.mean_L).mean(0)[np.newaxis,np.newaxis,:]
        imgR = imgR[:,:,::-1].copy() / 255. - np.asarray(self.mean_R).mean(0)[np.newaxis,np.newaxis,:]
        imgL = np.transpose(imgL, [2,0,1])[np.newaxis]
        imgR = np.transpose(imgR, [2,0,1])[np.newaxis]
        imgL = Variable(torch.FloatTensor(imgL).cuda())
        imgR = Variable(torch.FloatTensor(imgR).cuda())
        imgLR = torch.cat([imgL,imgR],0)

        return imgL_noaug, imgLR

================================================
FILE: LICENSE
================================================
BSD 3-Clause License

Copyright (c) 2020, Air Lab Stacks
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

1. Redistributions of source code must retain the above copyright notice, this
   list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright notice,
   this list of conditions and the following disclaimer in the documentation
   and/or other materials provided with the distribution.

3. Neither the name of the copyright holder nor the names of its
   contributors may be used to endorse or promote products derived from
   this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


================================================
FILE: Network/PWC/PWCNet.py
================================================
"""
implementation of the PWC-DC network for optical flow estimation by Sun et al., 2018

Jinwei Gu and Zhile Ren

"""

import torch
import torch.nn as nn
import torch.nn.functional as F
import os
import numpy as np
from .correlation import FunctionCorrelation
import cv2 # debug

def conv(in_planes, out_planes, kernel_size=3, stride=1, padding=1, dilation=1):   
    return nn.Sequential(
            nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, 
                        padding=padding, dilation=dilation, bias=True),
            nn.LeakyReLU(0.1))

def predict_flow(in_planes):
    return nn.Conv2d(in_planes,2,kernel_size=3,stride=1,padding=1,bias=True)

def deconv(in_planes, out_planes, kernel_size=4, stride=2, padding=1):
    return nn.ConvTranspose2d(in_planes, out_planes, kernel_size, stride, padding, bias=True)



class PWCDCNet(nn.Module):
    """
    PWC-DC net. add dilation convolution and densenet connections

    """
    def __init__(self, md=4, flow_norm=20.0):
        """
        input: md --- maximum displacement (for correlation. default: 4), after warpping

        """
        super(PWCDCNet,self).__init__()

        self.flow_norm = flow_norm
        
        self.conv1a  = conv(3,   16, kernel_size=3, stride=2)
        self.conv1aa = conv(16,  16, kernel_size=3, stride=1)
        self.conv1b  = conv(16,  16, kernel_size=3, stride=1)
        self.conv2a  = conv(16,  32, kernel_size=3, stride=2)
        self.conv2aa = conv(32,  32, kernel_size=3, stride=1)
        self.conv2b  = conv(32,  32, kernel_size=3, stride=1)
        self.conv3a  = conv(32,  64, kernel_size=3, stride=2)
        self.conv3aa = conv(64,  64, kernel_size=3, stride=1)
        self.conv3b  = conv(64,  64, kernel_size=3, stride=1)
        self.conv4a  = conv(64,  96, kernel_size=3, stride=2)
        self.conv4aa = conv(96,  96, kernel_size=3, stride=1)
        self.conv4b  = conv(96,  96, kernel_size=3, stride=1)
        self.conv5a  = conv(96, 128, kernel_size=3, stride=2)
        self.conv5aa = conv(128,128, kernel_size=3, stride=1)
        self.conv5b  = conv(128,128, kernel_size=3, stride=1)
        self.conv6aa = conv(128,196, kernel_size=3, stride=2)
        self.conv6a  = conv(196,196, kernel_size=3, stride=1)
        self.conv6b  = conv(196,196, kernel_size=3, stride=1)

        # self.corr    = Correlation(pad_size=md, kernel_size=1, max_displacement=md, stride1=1, stride2=1, corr_multiply=1)
        self.leakyRELU = nn.LeakyReLU(0.1)
        
        nd = (2*md+1)**2
        dd = np.cumsum([128,128,96,64,32])

        od = nd
        self.conv6_0 = conv(od,      128, kernel_size=3, stride=1)
        self.conv6_1 = conv(od+dd[0],128, kernel_size=3, stride=1)
        self.conv6_2 = conv(od+dd[1],96,  kernel_size=3, stride=1)
        self.conv6_3 = conv(od+dd[2],64,  kernel_size=3, stride=1)
        self.conv6_4 = conv(od+dd[3],32,  kernel_size=3, stride=1)        
        self.predict_flow6 = predict_flow(od+dd[4])
        self.deconv6 = deconv(2, 2, kernel_size=4, stride=2, padding=1) 
        self.upfeat6 = deconv(od+dd[4], 2, kernel_size=4, stride=2, padding=1) 
        
        od = nd+128+4
        self.conv5_0 = conv(od,      128, kernel_size=3, stride=1)
        self.conv5_1 = conv(od+dd[0],128, kernel_size=3, stride=1)
        self.conv5_2 = conv(od+dd[1],96,  kernel_size=3, stride=1)
        self.conv5_3 = conv(od+dd[2],64,  kernel_size=3, stride=1)
        self.conv5_4 = conv(od+dd[3],32,  kernel_size=3, stride=1)
        self.predict_flow5 = predict_flow(od+dd[4]) 
        self.deconv5 = deconv(2, 2, kernel_size=4, stride=2, padding=1) 
        self.upfeat5 = deconv(od+dd[4], 2, kernel_size=4, stride=2, padding=1) 
        
        od = nd+96+4
        self.conv4_0 = conv(od,      128, kernel_size=3, stride=1)
        self.conv4_1 = conv(od+dd[0],128, kernel_size=3, stride=1)
        self.conv4_2 = conv(od+dd[1],96,  kernel_size=3, stride=1)
        self.conv4_3 = conv(od+dd[2],64,  kernel_size=3, stride=1)
        self.conv4_4 = conv(od+dd[3],32,  kernel_size=3, stride=1)
        self.predict_flow4 = predict_flow(od+dd[4]) 
        self.deconv4 = deconv(2, 2, kernel_size=4, stride=2, padding=1) 
        self.upfeat4 = deconv(od+dd[4], 2, kernel_size=4, stride=2, padding=1) 
        
        od = nd+64+4
        self.conv3_0 = conv(od,      128, kernel_size=3, stride=1)
        self.conv3_1 = conv(od+dd[0],128, kernel_size=3, stride=1)
        self.conv3_2 = conv(od+dd[1],96,  kernel_size=3, stride=1)
        self.conv3_3 = conv(od+dd[2],64,  kernel_size=3, stride=1)
        self.conv3_4 = conv(od+dd[3],32,  kernel_size=3, stride=1)
        self.predict_flow3 = predict_flow(od+dd[4]) 
        self.deconv3 = deconv(2, 2, kernel_size=4, stride=2, padding=1) 
        self.upfeat3 = deconv(od+dd[4], 2, kernel_size=4, stride=2, padding=1) 
        
        od = nd+32+4
        self.conv2_0 = conv(od,      128, kernel_size=3, stride=1)
        self.conv2_1 = conv(od+dd[0],128, kernel_size=3, stride=1)
        self.conv2_2 = conv(od+dd[1],96,  kernel_size=3, stride=1)
        self.conv2_3 = conv(od+dd[2],64,  kernel_size=3, stride=1)
        self.conv2_4 = conv(od+dd[3],32,  kernel_size=3, stride=1)
        self.predict_flow2 = predict_flow(od+dd[4]) 
        self.deconv2 = deconv(2, 2, kernel_size=4, stride=2, padding=1) 
        
        self.dc_conv1 = conv(od+dd[4], 128, kernel_size=3, stride=1, padding=1,  dilation=1)
        self.dc_conv2 = conv(128,      128, kernel_size=3, stride=1, padding=2,  dilation=2)
        self.dc_conv3 = conv(128,      128, kernel_size=3, stride=1, padding=4,  dilation=4)
        self.dc_conv4 = conv(128,      96,  kernel_size=3, stride=1, padding=8,  dilation=8)
        self.dc_conv5 = conv(96,       64,  kernel_size=3, stride=1, padding=16, dilation=16)
        self.dc_conv6 = conv(64,       32,  kernel_size=3, stride=1, padding=1,  dilation=1)
        self.dc_conv7 = predict_flow(32)

        for m in self.modules():
            if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
                nn.init.kaiming_normal(m.weight.data, mode='fan_in')
                if m.bias is not None:
                    m.bias.data.zero_()


    def warp(self, x, flo):
        """
        warp an image/tensor (im2) back to im1, according to the optical flow

        x: [B, C, H, W] (im2)
        flo: [B, 2, H, W] flow

        """
        B, C, H, W = x.size()
        # mesh grid 
        xx = torch.arange(0, W).view(1,-1).repeat(H,1)
        yy = torch.arange(0, H).view(-1,1).repeat(1,W)
        xx = xx.view(1,1,H,W).repeat(B,1,1,1)
        yy = yy.view(1,1,H,W).repeat(B,1,1,1)
        grid = torch.cat((xx,yy),1).float()

        if x.is_cuda:
            grid = grid.cuda()
        vgrid = grid + flo

        # scale grid to [-1,1] 
        vgrid[:,0,:,:] = 2.0*vgrid[:,0,:,:].clone() / max(W-1,1)-1.0
        vgrid[:,1,:,:] = 2.0*vgrid[:,1,:,:].clone() / max(H-1,1)-1.0

        vgrid = vgrid.permute(0,2,3,1)        
        output = nn.functional.grid_sample(x, vgrid, align_corners=True)
        mask = torch.ones(x.size()).cuda()
        mask = nn.functional.grid_sample(mask, vgrid, align_corners=True)

        # if W==128:
            # np.save('mask.npy', mask.cpu().data.numpy())
            # np.save('warp.npy', output.cpu().data.numpy())
        
        mask[mask<0.9999] = 0
        mask[mask>0] = 1
        
        return output*mask

    def multi_scale_conv(self, conv0_func, conv1_func, conv2_func, conv3_func, conv4_func, input_feat):
        x = torch.cat((conv0_func(input_feat), input_feat),1)
        x = torch.cat((conv1_func(x), x),1)
        x = torch.cat((conv2_func(x), x),1)
        x = torch.cat((conv3_func(x), x),1)
        x = torch.cat((conv4_func(x), x),1)
        return x

    def concate_two_layers(self, pred_func, decon_func, upfeat_func, feat_high, feat_low1, feat_low2, scale):
        flow_high = pred_func(feat_high)
        up_flow_high = decon_func(flow_high)
        up_feat_high = upfeat_func(feat_high)

        warp_feat = self.warp(feat_low2, up_flow_high*scale)
        corr_low = FunctionCorrelation(tenFirst=feat_low1, tenSecond=warp_feat)
        corr_low = self.leakyRELU(corr_low)
        x = torch.cat((corr_low, feat_low1, up_flow_high, up_feat_high), 1)

        return x, flow_high

    def forward(self,x):
        im1 = x[:,0:3,...]
        im2 = x[:,3:6,...]
        
        c11 = self.conv1b(self.conv1aa(self.conv1a(im1)))
        c21 = self.conv1b(self.conv1aa(self.conv1a(im2)))
        c12 = self.conv2b(self.conv2aa(self.conv2a(c11)))
        c22 = self.conv2b(self.conv2aa(self.conv2a(c21)))
        c13 = self.conv3b(self.conv3aa(self.conv3a(c12)))
        c23 = self.conv3b(self.conv3aa(self.conv3a(c22)))
        c14 = self.conv4b(self.conv4aa(self.conv4a(c13)))
        c24 = self.conv4b(self.conv4aa(self.conv4a(c23)))
        c15 = self.conv5b(self.conv5aa(self.conv5a(c14)))
        c25 = self.conv5b(self.conv5aa(self.conv5a(c24)))
        c16 = self.conv6b(self.conv6a(self.conv6aa(c15)))
        c26 = self.conv6b(self.conv6a(self.conv6aa(c25)))


        # corr6 = self.corr(c16, c26) 
        corr6 = FunctionCorrelation(tenFirst=c16, tenSecond=c26)
        corr6 = self.leakyRELU(corr6)   

        x = self.multi_scale_conv(self.conv6_0, self.conv6_1, self.conv6_2, self.conv6_3, self.conv6_4, corr6)
        x, flow6 = self.concate_two_layers(self.predict_flow6, self.deconv6, self.upfeat6, x, c15, c25, 0.625)

        x = self.multi_scale_conv(self.conv5_0, self.conv5_1, self.conv5_2, self.conv5_3, self.conv5_4, x)
        x, flow5 = self.concate_two_layers(self.predict_flow5, self.deconv5, self.upfeat5, x, c14, c24, 1.25)

        x = self.multi_scale_conv(self.conv4_0, self.conv4_1, self.conv4_2, self.conv4_3, self.conv4_4, x)
        x, flow4 = self.concate_two_layers(self.predict_flow4, self.deconv4, self.upfeat4, x, c13, c23, 2.5)

        x = self.multi_scale_conv(self.conv3_0, self.conv3_1, self.conv3_2, self.conv3_3, self.conv3_4, x)
        x, flow3 = self.concate_two_layers(self.predict_flow3, self.deconv3, self.upfeat3, x, c12, c22, 5.0)

        x = self.multi_scale_conv(self.conv2_0, self.conv2_1, self.conv2_2, self.conv2_3, self.conv2_4, x)

        flow2 = self.predict_flow2(x)
 
        x = self.dc_conv4(self.dc_conv3(self.dc_conv2(self.dc_conv1(x))))
        refine = self.dc_conv7(self.dc_conv6(self.dc_conv5(x)))
        flow2 = flow2 + refine
        
        return flow2


def pwc_dc_net(path=None):

    model = PWCDCNet()
    if path is not None:
        data = torch.load(path)
        if 'state_dict' in data.keys():
            model.load_state_dict(data['state_dict'])
        else:
            model.load_state_dict(data)
    return model






================================================
FILE: Network/PWC/__init__.py
================================================
from .PWCNet import *


================================================
FILE: Network/PWC/correlation.py
================================================
#!/usr/bin/env python

import torch

import cupy
import re

kernel_Correlation_rearrange = '''
	extern "C" __global__ void kernel_Correlation_rearrange(
		const int n,
		const float* input,
		float* output
	) {
	  int intIndex = (blockIdx.x * blockDim.x) + threadIdx.x;

	  if (intIndex >= n) {
	    return;
	  }

	  int intSample = blockIdx.z;
	  int intChannel = blockIdx.y;

	  float fltValue = input[(((intSample * SIZE_1(input)) + intChannel) * SIZE_2(input) * SIZE_3(input)) + intIndex];

	  __syncthreads();

	  int intPaddedY = (intIndex / SIZE_3(input)) + 4;
	  int intPaddedX = (intIndex % SIZE_3(input)) + 4;
	  int intRearrange = ((SIZE_3(input) + 8) * intPaddedY) + intPaddedX;

	  output[(((intSample * SIZE_1(output) * SIZE_2(output)) + intRearrange) * SIZE_1(input)) + intChannel] = fltValue;
	}
'''

kernel_Correlation_updateOutput = '''
	extern "C" __global__ void kernel_Correlation_updateOutput(
	  const int n,
	  const float* rbot0,
	  const float* rbot1,
	  float* top
	) {
	  extern __shared__ char patch_data_char[];
	  
	  float *patch_data = (float *)patch_data_char;
	  
	  // First (upper left) position of kernel upper-left corner in current center position of neighborhood in image 1
	  int x1 = blockIdx.x + 4;
	  int y1 = blockIdx.y + 4;
	  int item = blockIdx.z;
	  int ch_off = threadIdx.x;
	  
	  // Load 3D patch into shared shared memory
	  for (int j = 0; j < 1; j++) { // HEIGHT
	    for (int i = 0; i < 1; i++) { // WIDTH
	      int ji_off = (j + i) * SIZE_3(rbot0);
	      for (int ch = ch_off; ch < SIZE_3(rbot0); ch += 32) { // CHANNELS
	        int idx1 = ((item * SIZE_1(rbot0) + y1+j) * SIZE_2(rbot0) + x1+i) * SIZE_3(rbot0) + ch;
	        int idxPatchData = ji_off + ch;
	        patch_data[idxPatchData] = rbot0[idx1];
	      }
	    }
	  }
	  
	  __syncthreads();
	  
	  __shared__ float sum[32];
	  
	  // Compute correlation
	  for (int top_channel = 0; top_channel < SIZE_1(top); top_channel++) {
	    sum[ch_off] = 0;
	  
	    int s2o = top_channel % 9 - 4;
	    int s2p = top_channel / 9 - 4;
	    
	    for (int j = 0; j < 1; j++) { // HEIGHT
	      for (int i = 0; i < 1; i++) { // WIDTH
	        int ji_off = (j + i) * SIZE_3(rbot0);
	        for (int ch = ch_off; ch < SIZE_3(rbot0); ch += 32) { // CHANNELS
	          int x2 = x1 + s2o;
	          int y2 = y1 + s2p;
	          
	          int idxPatchData = ji_off + ch;
	          int idx2 = ((item * SIZE_1(rbot0) + y2+j) * SIZE_2(rbot0) + x2+i) * SIZE_3(rbot0) + ch;
	          
	          sum[ch_off] += patch_data[idxPatchData] * rbot1[idx2];
	        }
	      }
	    }
	    
	    __syncthreads();
	    
	    if (ch_off == 0) {
	      float total_sum = 0;
	      for (int idx = 0; idx < 32; idx++) {
	        total_sum += sum[idx];
	      }
	      const int sumelems = SIZE_3(rbot0);
	      const int index = ((top_channel*SIZE_2(top) + blockIdx.y)*SIZE_3(top))+blockIdx.x;
	      top[index + item*SIZE_1(top)*SIZE_2(top)*SIZE_3(top)] = total_sum / (float)sumelems;
	    }
	  }
	}
'''

kernel_Correlation_updateGradFirst = '''
	#define ROUND_OFF 50000

	extern "C" __global__ void kernel_Correlation_updateGradFirst(
	  const int n,
	  const int intSample,
	  const float* rbot0,
	  const float* rbot1,
	  const float* gradOutput,
	  float* gradFirst,
	  float* gradSecond
	) { for (int intIndex = (blockIdx.x * blockDim.x) + threadIdx.x; intIndex < n; intIndex += blockDim.x * gridDim.x) {
	  int n = intIndex % SIZE_1(gradFirst); // channels
	  int l = (intIndex / SIZE_1(gradFirst)) % SIZE_3(gradFirst) + 4; // w-pos
	  int m = (intIndex / SIZE_1(gradFirst) / SIZE_3(gradFirst)) % SIZE_2(gradFirst) + 4; // h-pos
	  
	  // round_off is a trick to enable integer division with ceil, even for negative numbers
	  // We use a large offset, for the inner part not to become negative.
	  const int round_off = ROUND_OFF;
	  const int round_off_s1 = round_off;
	  
	  // We add round_off before_s1 the int division and subtract round_off after it, to ensure the formula matches ceil behavior:
	  int xmin = (l - 4 + round_off_s1 - 1) + 1 - round_off; // ceil (l - 4)
	  int ymin = (m - 4 + round_off_s1 - 1) + 1 - round_off; // ceil (l - 4)
	  
	  // Same here:
	  int xmax = (l - 4 + round_off_s1) - round_off; // floor (l - 4)
	  int ymax = (m - 4 + round_off_s1) - round_off; // floor (m - 4)
	  
	  float sum = 0;
	  if (xmax>=0 && ymax>=0 && (xmin<=SIZE_3(gradOutput)-1) && (ymin<=SIZE_2(gradOutput)-1)) {
	    xmin = max(0,xmin);
	    xmax = min(SIZE_3(gradOutput)-1,xmax);
	    
	    ymin = max(0,ymin);
	    ymax = min(SIZE_2(gradOutput)-1,ymax);
	    
	    for (int p = -4; p <= 4; p++) {
	      for (int o = -4; o <= 4; o++) {
	        // Get rbot1 data:
	        int s2o = o;
	        int s2p = p;
	        int idxbot1 = ((intSample * SIZE_1(rbot0) + (m+s2p)) * SIZE_2(rbot0) + (l+s2o)) * SIZE_3(rbot0) + n;
	        float bot1tmp = rbot1[idxbot1]; // rbot1[l+s2o,m+s2p,n]
	        
	        // Index offset for gradOutput in following loops:
	        int op = (p+4) * 9 + (o+4); // index[o,p]
	        int idxopoffset = (intSample * SIZE_1(gradOutput) + op);
	        
	        for (int y = ymin; y <= ymax; y++) {
	          for (int x = xmin; x <= xmax; x++) {
	            int idxgradOutput = (idxopoffset * SIZE_2(gradOutput) + y) * SIZE_3(gradOutput) + x; // gradOutput[x,y,o,p]
	            sum += gradOutput[idxgradOutput] * bot1tmp;
	          }
	        }
	      }
	    }
	  }
	  const int sumelems = SIZE_1(gradFirst);
	  const int bot0index = ((n * SIZE_2(gradFirst)) + (m-4)) * SIZE_3(gradFirst) + (l-4);
	  gradFirst[bot0index + intSample*SIZE_1(gradFirst)*SIZE_2(gradFirst)*SIZE_3(gradFirst)] = sum / (float)sumelems;
	} }
'''

kernel_Correlation_updateGradSecond = '''
	#define ROUND_OFF 50000

	extern "C" __global__ void kernel_Correlation_updateGradSecond(
	  const int n,
	  const int intSample,
	  const float* rbot0,
	  const float* rbot1,
	  const float* gradOutput,
	  float* gradFirst,
	  float* gradSecond
	) { for (int intIndex = (blockIdx.x * blockDim.x) + threadIdx.x; intIndex < n; intIndex += blockDim.x * gridDim.x) {
	  int n = intIndex % SIZE_1(gradSecond); // channels
	  int l = (intIndex / SIZE_1(gradSecond)) % SIZE_3(gradSecond) + 4; // w-pos
	  int m = (intIndex / SIZE_1(gradSecond) / SIZE_3(gradSecond)) % SIZE_2(gradSecond) + 4; // h-pos
	  
	  // round_off is a trick to enable integer division with ceil, even for negative numbers
	  // We use a large offset, for the inner part not to become negative.
	  const int round_off = ROUND_OFF;
	  const int round_off_s1 = round_off;
	  
	  float sum = 0;
	  for (int p = -4; p <= 4; p++) {
	    for (int o = -4; o <= 4; o++) {
	      int s2o = o;
	      int s2p = p;
	      
	      //Get X,Y ranges and clamp
	      // We add round_off before_s1 the int division and subtract round_off after it, to ensure the formula matches ceil behavior:
	      int xmin = (l - 4 - s2o + round_off_s1 - 1) + 1 - round_off; // ceil (l - 4 - s2o)
	      int ymin = (m - 4 - s2p + round_off_s1 - 1) + 1 - round_off; // ceil (l - 4 - s2o)
	      
	      // Same here:
	      int xmax = (l - 4 - s2o + round_off_s1) - round_off; // floor (l - 4 - s2o)
	      int ymax = (m - 4 - s2p + round_off_s1) - round_off; // floor (m - 4 - s2p)
          
	      if (xmax>=0 && ymax>=0 && (xmin<=SIZE_3(gradOutput)-1) && (ymin<=SIZE_2(gradOutput)-1)) {
	        xmin = max(0,xmin);
	        xmax = min(SIZE_3(gradOutput)-1,xmax);
	        
	        ymin = max(0,ymin);
	        ymax = min(SIZE_2(gradOutput)-1,ymax);
	        
	        // Get rbot0 data:
	        int idxbot0 = ((intSample * SIZE_1(rbot0) + (m-s2p)) * SIZE_2(rbot0) + (l-s2o)) * SIZE_3(rbot0) + n;
	        float bot0tmp = rbot0[idxbot0]; // rbot1[l+s2o,m+s2p,n]
	        
	        // Index offset for gradOutput in following loops:
	        int op = (p+4) * 9 + (o+4); // index[o,p]
	        int idxopoffset = (intSample * SIZE_1(gradOutput) + op);
	        
	        for (int y = ymin; y <= ymax; y++) {
	          for (int x = xmin; x <= xmax; x++) {
	            int idxgradOutput = (idxopoffset * SIZE_2(gradOutput) + y) * SIZE_3(gradOutput) + x; // gradOutput[x,y,o,p]
	            sum += gradOutput[idxgradOutput] * bot0tmp;
	          }
	        }
	      }
	    }
	  }
	  const int sumelems = SIZE_1(gradSecond);
	  const int bot1index = ((n * SIZE_2(gradSecond)) + (m-4)) * SIZE_3(gradSecond) + (l-4);
	  gradSecond[bot1index + intSample*SIZE_1(gradSecond)*SIZE_2(gradSecond)*SIZE_3(gradSecond)] = sum / (float)sumelems;
	} }
'''

def cupy_kernel(strFunction, objVariables):
	strKernel = globals()[strFunction]

	while True:
		objMatch = re.search('(SIZE_)([0-4])(\()([^\)]*)(\))', strKernel)

		if objMatch is None:
			break
		# end

		intArg = int(objMatch.group(2))

		strTensor = objMatch.group(4)
		intSizes = objVariables[strTensor].size()

		strKernel = strKernel.replace(objMatch.group(), str(intSizes[intArg]))
	# end

	while True:
		objMatch = re.search('(VALUE_)([0-4])(\()([^\)]+)(\))', strKernel)

		if objMatch is None:
			break
		# end

		intArgs = int(objMatch.group(2))
		strArgs = objMatch.group(4).split(',')

		strTensor = strArgs[0]
		intStrides = objVariables[strTensor].stride()
		strIndex = [ '((' + strArgs[intArg + 1].replace('{', '(').replace('}', ')').strip() + ')*' + str(intStrides[intArg]) + ')' for intArg in range(intArgs) ]

		strKernel = strKernel.replace(objMatch.group(0), strTensor + '[' + str.join('+', strIndex) + ']')
	# end

	return strKernel
# end

# @cupy.util.memoize(for_each_device=True)
def cupy_launch(strFunction, strKernel):
	return cupy.cuda.compile_with_cache(strKernel).get_function(strFunction)
# end

class _FunctionCorrelation(torch.autograd.Function):
	@staticmethod
	def forward(self, first, second):
		rbot0 = first.new_zeros([ first.shape[0], first.shape[2] + 8, first.shape[3] + 8, first.shape[1] ])
		rbot1 = first.new_zeros([ first.shape[0], first.shape[2] + 8, first.shape[3] + 8, first.shape[1] ])

		self.save_for_backward(first, second, rbot0, rbot1)

		assert(first.is_contiguous() == True)
		assert(second.is_contiguous() == True)

		output = first.new_zeros([ first.shape[0], 81, first.shape[2], first.shape[3] ])

		if first.is_cuda == True:
			n = first.shape[2] * first.shape[3]
			cupy_launch('kernel_Correlation_rearrange', cupy_kernel('kernel_Correlation_rearrange', {
				'input': first,
				'output': rbot0
			}))(
				grid=tuple([ int((n + 16 - 1) / 16), first.shape[1], first.shape[0] ]),
				block=tuple([ 16, 1, 1 ]),
				args=[ n, first.data_ptr(), rbot0.data_ptr() ]
			)

			n = second.shape[2] * second.shape[3]
			cupy_launch('kernel_Correlation_rearrange', cupy_kernel('kernel_Correlation_rearrange', {
				'input': second,
				'output': rbot1
			}))(
				grid=tuple([ int((n + 16 - 1) / 16), second.shape[1], second.shape[0] ]),
				block=tuple([ 16, 1, 1 ]),
				args=[ n, second.data_ptr(), rbot1.data_ptr() ]
			)

			n = output.shape[1] * output.shape[2] * output.shape[3]
			cupy_launch('kernel_Correlation_updateOutput', cupy_kernel('kernel_Correlation_updateOutput', {
				'rbot0': rbot0,
				'rbot1': rbot1,
				'top': output
			}))(
				grid=tuple([ output.shape[3], output.shape[2], output.shape[0] ]),
				block=tuple([ 32, 1, 1 ]),
				shared_mem=first.shape[1] * 4,
				args=[ n, rbot0.data_ptr(), rbot1.data_ptr(), output.data_ptr() ]
			)

		elif first.is_cuda == False:
			raise NotImplementedError()

		# end

		return output
	# end

	@staticmethod
	def backward(self, gradOutput):
		first, second, rbot0, rbot1 = self.saved_tensors

		assert(gradOutput.is_contiguous() == True)

		gradFirst = first.new_zeros([ first.shape[0], first.shape[1], first.shape[2], first.shape[3] ]) if self.needs_input_grad[0] == True else None
		gradSecond = first.new_zeros([ first.shape[0], first.shape[1], first.shape[2], first.shape[3] ]) if self.needs_input_grad[1] == True else None

		if first.is_cuda == True:
			if gradFirst is not None:
				for intSample in range(first.shape[0]):
					n = first.shape[1] * first.shape[2] * first.shape[3]
					cupy_launch('kernel_Correlation_updateGradFirst', cupy_kernel('kernel_Correlation_updateGradFirst', {
						'rbot0': rbot0,
						'rbot1': rbot1,
						'gradOutput': gradOutput,
						'gradFirst': gradFirst,
						'gradSecond': None
					}))(
						grid=tuple([ int((n + 512 - 1) / 512), 1, 1 ]),
						block=tuple([ 512, 1, 1 ]),
						args=[ n, intSample, rbot0.data_ptr(), rbot1.data_ptr(), gradOutput.data_ptr(), gradFirst.data_ptr(), None ]
					)
				# end
			# end

			if gradSecond is not None:
				for intSample in range(first.shape[0]):
					n = first.shape[1] * first.shape[2] * first.shape[3]
					cupy_launch('kernel_Correlation_updateGradSecond', cupy_kernel('kernel_Correlation_updateGradSecond', {
						'rbot0': rbot0,
						'rbot1': rbot1,
						'gradOutput': gradOutput,
						'gradFirst': None,
						'gradSecond': gradSecond
					}))(
						grid=tuple([ int((n + 512 - 1) / 512), 1, 1 ]),
						block=tuple([ 512, 1, 1 ]),
						args=[ n, intSample, rbot0.data_ptr(), rbot1.data_ptr(), gradOutput.data_ptr(), None, gradSecond.data_ptr() ]
					)
				# end
			# end

		elif first.is_cuda == False:
			raise NotImplementedError()

		# end

		return gradFirst, gradSecond
	# end
# end

def FunctionCorrelation(tenFirst, tenSecond):
	return _FunctionCorrelation.apply(tenFirst, tenSecond)
# end

class ModuleCorrelation(torch.nn.Module):
	def __init__(self):
		super(ModuleCorrelation, self).__init__()
	# end

	def forward(self, tenFirst, tenSecond):
		return _FunctionCorrelation.apply(tenFirst, tenSecond)
	# end
# end

================================================
FILE: Network/VOFlowNet.py
================================================
# Software License Agreement (BSD License)
#
# Copyright (c) 2020, Wenshan Wang, CMU
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above
#    copyright notice, this list of conditions and the following
#    disclaimer in the documentation and/or other materials provided
#    with the distribution.
#  * Neither the name of CMU nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

import torch 
import torch.nn as nn
import torch.nn.functional as F
import math

def conv(in_planes, out_planes, kernel_size=3, stride=2, padding=1, dilation=1, bn_layer=False, bias=True):
    if bn_layer:
        return nn.Sequential(
            nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, padding=padding, stride=stride, dilation=dilation, bias=bias),
            nn.BatchNorm2d(out_planes),
            nn.ReLU(inplace=True)
        )
    else: 
        return nn.Sequential(
            nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, padding=padding, stride=stride, dilation=dilation),
            nn.ReLU(inplace=True)
        )

def linear(in_planes, out_planes):
    return nn.Sequential(
        nn.Linear(in_planes, out_planes), 
        nn.ReLU(inplace=True)
        )

class BasicBlock(nn.Module):
    expansion = 1
    def __init__(self, inplanes, planes, stride, downsample, pad, dilation):
        super(BasicBlock, self).__init__()

        self.conv1 = conv(inplanes, planes, 3, stride, pad, dilation)
        self.conv2 = nn.Conv2d(planes, planes, 3, 1, pad, dilation)

        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        out = self.conv1(x)
        out = self.conv2(out)

        if self.downsample is not None:
            x = self.downsample(x)
        out += x

        return F.relu(out, inplace=True)

class VOFlowRes(nn.Module):
    def __init__(self):
        super(VOFlowRes, self).__init__()
        inputnum = 5
        blocknums = [2,2,3,4,6,7,3]
        outputnums = [32,64,64,128,128,256,256]

        self.firstconv = nn.Sequential(conv(inputnum, 32, 3, 2, 1, 1, False),
                                       conv(32, 32, 3, 1, 1, 1),
                                       conv(32, 32, 3, 1, 1, 1))

        self.inplanes = 32

        self.layer1 = self._make_layer(BasicBlock, outputnums[2], blocknums[2], 2, 1, 1) # 40 x 28
        self.layer2 = self._make_layer(BasicBlock, outputnums[3], blocknums[3], 2, 1, 1) # 20 x 14
        self.layer3 = self._make_layer(BasicBlock, outputnums[4], blocknums[4], 2, 1, 1) # 10 x 7
        self.layer4 = self._make_layer(BasicBlock, outputnums[5], blocknums[5], 2, 1, 1) # 5 x 4
        self.layer5 = self._make_layer(BasicBlock, outputnums[6], blocknums[6], 2, 1, 1) # 3 x 2
        fcnum = outputnums[6] * 6

        fc1_trans = linear(fcnum, 128)
        fc2_trans = linear(128,32)
        fc3_trans = nn.Linear(32,3)

        fc1_rot = linear(fcnum, 128)
        fc2_rot = linear(128,32)
        fc3_rot = nn.Linear(32,3)


        self.voflow_trans = nn.Sequential(fc1_trans, fc2_trans, fc3_trans)
        self.voflow_rot = nn.Sequential(fc1_rot, fc2_rot, fc3_rot)


    def _make_layer(self, block, planes, blocks, stride, pad, dilation):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
           downsample = nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride)

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample, pad, dilation))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes,1,None,pad,dilation))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.firstconv(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.layer5(x)
        
        x = x.view(x.shape[0], -1)
        x_trans = self.voflow_trans(x)
        x_rot = self.voflow_rot(x)
        return torch.cat((x_trans, x_rot), dim=1)

================================================
FILE: Network/VONet.py
================================================
# Software License Agreement (BSD License)
#
# Copyright (c) 2020, Wenshan Wang, Yaoyu Hu,  CMU
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above
#    copyright notice, this list of conditions and the following
#    disclaimer in the documentation and/or other materials provided
#    with the distribution.
#  * Neither the name of CMU nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

import torch 
import torch.nn as nn
import torch.nn.functional as F
from .PWC import PWCDCNet as FlowNet
from .VOFlowNet import VOFlowRes as FlowPoseNet

class VONet(nn.Module):
    def __init__(self):
        super(VONet, self).__init__()

        self.flowNet     = FlowNet()
        self.flowPoseNet = FlowPoseNet()

    def forward(self, x, only_flow=False, only_pose=False):
        '''
        x[0]: rgb frame t-1
        x[1]: rgb frame t
        x[2]: intrinsics
        x[3]: flow t-1 -> t (optional)
        x[4]: motion segmentation mask
        '''
        # import ipdb;ipdb.set_trace()
        if not only_pose:
            flow_out = self.flowNet(torch.cat((x[0], x[1]), dim=1))

            if only_flow:
                return flow_out, None
            
            flow = flow_out[0]

        else:
            assert(len(x) > 3)
            flow_out = None

        if len(x) > 3 and x[3] is not None:
            flow_input = x[3]
        else:
            flow_input = flow

        # Mask out input flow using the segmentation result
        assert(len(x) > 4)
        mask = torch.gt(x[4], 0)
        for i in range(flow_input.shape[0]):
            zeros = torch.cat([mask[i], ]*2, dim=0)
            flow_input[i][zeros] = 0

        flow_input = torch.cat((flow_input, 1 - x[4]), dim=1)  # segmentation layer
        flow_input = torch.cat((flow_input, x[2]), dim=1)  # intrinsics layer
    
        pose = self.flowPoseNet(flow_input)

        return flow_out, pose


================================================
FILE: Network/__init__.py
================================================


================================================
FILE: Network/rigidmask/.gitignore
================================================
__pycache__


================================================
FILE: Network/rigidmask/VCNplus.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np
import math
import pdb
import cv2
import kornia

from .submodule import pspnet, bfmodule, bfmodule_feat, conv, compute_geo_costs, get_skew_mat, get_intrinsics
from .conv4d import sepConv4d, butterfly4D
from scipy.spatial.transform import Rotation
from .det import create_model

class flow_reg(nn.Module):
    """
    Soft winner-take-all that selects the most likely diplacement.
    Set ent=True to enable entropy output.
    Set maxdisp to adjust maximum allowed displacement towards one side.
        maxdisp=4 searches for a 9x9 region.
    Set fac to squeeze search window.
        maxdisp=4 and fac=2 gives search window of 9x5
    """
    def __init__(self, size, ent=False, maxdisp = int(4), fac=1):
        B,W,H = size
        super(flow_reg, self).__init__()
        self.ent = ent
        self.md = maxdisp
        self.fac = fac
        self.truncated = True
        self.wsize = 3  # by default using truncation 7x7

        flowrangey = range(-maxdisp,maxdisp+1)
        flowrangex = range(-int(maxdisp//self.fac),int(maxdisp//self.fac)+1)
        meshgrid = np.meshgrid(flowrangex,flowrangey)
        flowy = np.tile( np.reshape(meshgrid[0],[1,2*maxdisp+1,2*int(maxdisp//self.fac)+1,1,1]), (B,1,1,H,W) )
        flowx = np.tile( np.reshape(meshgrid[1],[1,2*maxdisp+1,2*int(maxdisp//self.fac)+1,1,1]), (B,1,1,H,W) )
        self.register_buffer('flowx',torch.Tensor(flowx))
        self.register_buffer('flowy',torch.Tensor(flowy))

        self.pool3d = nn.MaxPool3d((self.wsize*2+1,self.wsize*2+1,1),stride=1,padding=(self.wsize,self.wsize,0))

    def forward(self, x):
        b,u,v,h,w = x.shape
        oldx = x

        if self.truncated:
            # truncated softmax
            x = x.view(b,u*v,h,w)

            idx = x.argmax(1)[:,np.newaxis]
            if x.is_cuda:
                mask = Variable(torch.cuda.HalfTensor(b,u*v,h,w)).fill_(0)
            else:
                mask = Variable(torch.FloatTensor(b,u*v,h,w)).fill_(0)
            mask.scatter_(1,idx,1)
            mask = mask.view(b,1,u,v,-1)
            mask = self.pool3d(mask)[:,0].view(b,u,v,h,w)

            ninf = x.clone().fill_(-np.inf).view(b,u,v,h,w)
            x = torch.where(mask.byte(),oldx,ninf)
        else:
            self.wsize = (np.sqrt(u*v)-1)/2

        b,u,v,h,w = x.shape
        x = F.softmax(x.view(b,-1,h,w),1).view(b,u,v,h,w)
        if np.isnan(x.min().detach().cpu()):
            #pdb.set_trace()
            x[torch.isnan(x)] = F.softmax(oldx[torch.isnan(x)])
        outx = torch.sum(torch.sum(x*self.flowx,1),1,keepdim=True)
        outy = torch.sum(torch.sum(x*self.flowy,1),1,keepdim=True)

        if self.ent:
            # local
            local_entropy = (-x*torch.clamp(x,1e-9,1-1e-9).log()).sum(1).sum(1)[:,np.newaxis]
            if self.wsize == 0:
                local_entropy[:] = 1.
            else:
                local_entropy /= np.log((self.wsize*2+1)**2)

            # global
            x = F.softmax(oldx.view(b,-1,h,w),1).view(b,u,v,h,w)
            global_entropy = (-x*torch.clamp(x,1e-9,1-1e-9).log()).sum(1).sum(1)[:,np.newaxis]
            global_entropy /= np.log(x.shape[1]*x.shape[2])
            return torch.cat([outx,outy],1),torch.cat([local_entropy, global_entropy],1)
        else:
            return torch.cat([outx,outy],1),None


class WarpModule(nn.Module):
    """
    taken from https://github.com/NVlabs/PWC-Net/blob/master/PyTorch/models/PWCNet.py
    """
    def __init__(self, size):
        super(WarpModule, self).__init__()
        B,W,H = size
        # mesh grid 
        xx = torch.arange(0, W).view(1,-1).repeat(H,1)
        yy = torch.arange(0, H).view(-1,1).repeat(1,W)
        xx = xx.view(1,1,H,W).repeat(B,1,1,1)
        yy = yy.view(1,1,H,W).repeat(B,1,1,1)
        self.register_buffer('grid',torch.cat((xx,yy),1).float())

    def forward(self, x, flo):
        """
        warp an image/tensor (im2) back to im1, according to the optical flow

        x: [B, C, H, W] (im2)
        flo: [B, 2, H, W] flow

        """
        B, C, H, W = x.size()
        vgrid = self.grid + flo

        # scale grid to [-1,1] 
        vgrid[:,0,:,:] = 2.0*vgrid[:,0,:,:]/max(W-1,1)-1.0
        vgrid[:,1,:,:] = 2.0*vgrid[:,1,:,:]/max(H-1,1)-1.0

        vgrid = vgrid.permute(0,2,3,1)        
        #output = nn.functional.grid_sample(x, vgrid)
        output = nn.functional.grid_sample(x, vgrid, align_corners=True)
        mask = ((vgrid[:,:,:,0].abs()<1) * (vgrid[:,:,:,1].abs()<1)) >0
        return output*mask.unsqueeze(1).float(), mask


def get_grid(B,H,W):
    meshgrid_base = np.meshgrid(range(0,W), range(0,H))[::-1]
    basey = np.reshape(meshgrid_base[0],[1,1,1,H,W])
    basex = np.reshape(meshgrid_base[1],[1,1,1,H,W])
    grid = torch.tensor(np.concatenate((basex.reshape((-1,H,W,1)),basey.reshape((-1,H,W,1))),-1)).cuda().float()
    return grid.view(1,1,H,W,2)


class SegNet(nn.Module):
    """
    Motion Segmentation Network
    """
    def __init__(self, size, md=[4,4,4,4,4], fac=1., exp_unc=True):
        super(SegNet,self).__init__()
        self.md = md
        self.fac = fac
        use_entropy = True
        withbn = True

        ## pspnet
        self.pspnet = pspnet(is_proj=False)

        ### Volumetric-UNet
        fdima1 = 128 # 6/5/4
        fdima2 = 64 # 3/2
        fdimb1 = 16 # 6/5/4/3
        fdimb2 = 12 # 2

        full=False
        self.f6 = butterfly4D(fdima1, fdimb1,withbn=withbn,full=full)
        self.p6 = sepConv4d(fdimb1,fdimb1, with_bn=False, full=full)

        self.f5 = butterfly4D(fdima1, fdimb1,withbn=withbn, full=full)
        self.p5 = sepConv4d(fdimb1,fdimb1, with_bn=False,full=full)

        self.f4 = butterfly4D(fdima1, fdimb1,withbn=withbn,full=full)
        self.p4 = sepConv4d(fdimb1,fdimb1, with_bn=False,full=full)

        self.f3 = butterfly4D(fdima2, fdimb1,withbn=withbn,full=full)
        self.p3 = sepConv4d(fdimb1,fdimb1, with_bn=False,full=full)

        full=True
        self.f2 = butterfly4D(fdima2, fdimb2,withbn=withbn,full=full)
        self.p2 = sepConv4d(fdimb2,fdimb2, with_bn=False,full=full)
    
        self.flow_reg64 = flow_reg([fdimb1*size[0],size[1]//64,size[2]//64], ent=use_entropy, maxdisp=self.md[0], fac=self.fac)
        self.flow_reg32 = flow_reg([fdimb1*size[0],size[1]//32,size[2]//32], ent=use_entropy, maxdisp=self.md[1])
        self.flow_reg16 = flow_reg([fdimb1*size[0],size[1]//16,size[2]//16], ent=use_entropy, maxdisp=self.md[2])
        self.flow_reg8 =  flow_reg([fdimb1*size[0],size[1]//8,size[2]//8]  , ent=use_entropy, maxdisp=self.md[3])
        self.flow_reg4 =  flow_reg([fdimb2*size[0],size[1]//4,size[2]//4]  , ent=use_entropy, maxdisp=self.md[4])

        self.warp5 = WarpModule([size[0],size[1]//32,size[2]//32])
        self.warp4 = WarpModule([size[0],size[1]//16,size[2]//16])
        self.warp3 = WarpModule([size[0],size[1]//8,size[2]//8])
        self.warp2 = WarpModule([size[0],size[1]//4,size[2]//4])
        if self.training:
            self.warpx = WarpModule([size[0],size[1],size[2]])

        ## hypotheses fusion modules, adopted from the refinement module of PWCNet
        # https://github.com/NVlabs/PWC-Net/blob/master/PyTorch/models/PWCNet.py
        # c6
        self.dc6_conv1 = conv(128+4*fdimb1, 128, kernel_size=3, stride=1, padding=1,  dilation=1)
        self.dc6_conv2 = conv(128,      128, kernel_size=3, stride=1, padding=2,  dilation=2)
        self.dc6_conv3 = conv(128,      128, kernel_size=3, stride=1, padding=4,  dilation=4)
        self.dc6_conv4 = conv(128,      96,  kernel_size=3, stride=1, padding=8,  dilation=8)
        self.dc6_conv5 = conv(96,       64,  kernel_size=3, stride=1, padding=16, dilation=16)
        self.dc6_conv6 = conv(64,       32,  kernel_size=3, stride=1, padding=1,  dilation=1)
        self.dc6_conv7 = nn.Conv2d(32,2*fdimb1,kernel_size=3,stride=1,padding=1,bias=True)

        # c5
        self.dc5_conv1 = conv(128+4*fdimb1*2, 128, kernel_size=3, stride=1, padding=1,  dilation=1)
        self.dc5_conv2 = conv(128,      128, kernel_size=3, stride=1, padding=2,  dilation=2)
        self.dc5_conv3 = conv(128,      128, kernel_size=3, stride=1, padding=4,  dilation=4)
        self.dc5_conv4 = conv(128,      96,  kernel_size=3, stride=1, padding=8,  dilation=8)
        self.dc5_conv5 = conv(96,       64,  kernel_size=3, stride=1, padding=16, dilation=16)
        self.dc5_conv6 = conv(64,       32,  kernel_size=3, stride=1, padding=1,  dilation=1)
        self.dc5_conv7 = nn.Conv2d(32,2*fdimb1*2,kernel_size=3,stride=1,padding=1,bias=True)

        # c4
        self.dc4_conv1 = conv(128+4*fdimb1*3, 128, kernel_size=3, stride=1, padding=1,  dilation=1)
        self.dc4_conv2 = conv(128,      128, kernel_size=3, stride=1, padding=2,  dilation=2)
        self.dc4_conv3 = conv(128,      128, kernel_size=3, stride=1, padding=4,  dilation=4)
        self.dc4_conv4 = conv(128,      96,  kernel_size=3, stride=1, padding=8,  dilation=8)
        self.dc4_conv5 = conv(96,       64,  kernel_size=3, stride=1, padding=16, dilation=16)
        self.dc4_conv6 = conv(64,       32,  kernel_size=3, stride=1, padding=1,  dilation=1)
        self.dc4_conv7 = nn.Conv2d(32,2*fdimb1*3,kernel_size=3,stride=1,padding=1,bias=True)

        # c3
        self.dc3_conv1 = conv(64+16*fdimb1, 128, kernel_size=3, stride=1, padding=1,  dilation=1)
        self.dc3_conv2 = conv(128,      128, kernel_size=3, stride=1, padding=2,  dilation=2)
        self.dc3_conv3 = conv(128,      128, kernel_size=3, stride=1, padding=4,  dilation=4)
        self.dc3_conv4 = conv(128,      96,  kernel_size=3, stride=1, padding=8,  dilation=8)
        self.dc3_conv5 = conv(96,       64,  kernel_size=3, stride=1, padding=16, dilation=16)
        self.dc3_conv6 = conv(64,       32,  kernel_size=3, stride=1, padding=1,  dilation=1)
        self.dc3_conv7 = nn.Conv2d(32,8*fdimb1,kernel_size=3,stride=1,padding=1,bias=True)

        # c2
        self.dc2_conv1 = conv(64+16*fdimb1+4*fdimb2, 128, kernel_size=3, stride=1, padding=1,  dilation=1)
        self.dc2_conv2 = conv(128,      128, kernel_size=3, stride=1, padding=2,  dilation=2)
        self.dc2_conv3 = conv(128,      128, kernel_size=3, stride=1, padding=4,  dilation=4)
        self.dc2_conv4 = conv(128,      96,  kernel_size=3, stride=1, padding=8,  dilation=8)
        self.dc2_conv5 = conv(96,       64,  kernel_size=3, stride=1, padding=16, dilation=16)
        self.dc2_conv6 = conv(64,       32,  kernel_size=3, stride=1, padding=1,  dilation=1)
        self.dc2_conv7 = nn.Conv2d(32,4*2*fdimb1 + 2*fdimb2,kernel_size=3,stride=1,padding=1,bias=True)

        self.dc6_conv = nn.Sequential(  self.dc6_conv1,
                                        self.dc6_conv2,
                                        self.dc6_conv3,
                                        self.dc6_conv4,
                                        self.dc6_conv5,
                                        self.dc6_conv6,
                                        self.dc6_conv7)
        self.dc5_conv = nn.Sequential(  self.dc5_conv1,
                                        self.dc5_conv2,
                                        self.dc5_conv3,
                                        self.dc5_conv4,
                                        self.dc5_conv5,
                                        self.dc5_conv6,
                                        self.dc5_conv7)
        self.dc4_conv = nn.Sequential(  self.dc4_conv1,
                                        self.dc4_conv2,
                                        self.dc4_conv3,
                                        self.dc4_conv4,
                                        self.dc4_conv5,
                                        self.dc4_conv6,
                                        self.dc4_conv7)
        self.dc3_conv = nn.Sequential(  self.dc3_conv1,
                                        self.dc3_conv2,
                                        self.dc3_conv3,
                                        self.dc3_conv4,
                                        self.dc3_conv5,
                                        self.dc3_conv6,
                                        self.dc3_conv7)
        self.dc2_conv = nn.Sequential(  self.dc2_conv1,
                                        self.dc2_conv2,
                                        self.dc2_conv3,
                                        self.dc2_conv4,
                                        self.dc2_conv5,
                                        self.dc2_conv6,
                                        self.dc2_conv7)

        ## Out-of-range detection
        self.dc6_convo = nn.Sequential(conv(128+4*fdimb1, 128, kernel_size=3, stride=1, padding=1,  dilation=1),
                            conv(128,      128, kernel_size=3, stride=1, padding=2,  dilation=2),
                            conv(128,      128, kernel_size=3, stride=1, padding=4,  dilation=4),
                            conv(128,      96,  kernel_size=3, stride=1, padding=8,  dilation=8),
                            conv(96,       64,  kernel_size=3, stride=1, padding=16, dilation=16),
                            conv(64,       32,  kernel_size=3, stride=1, padding=1,  dilation=1),
                            nn.Conv2d(32,1,kernel_size=3,stride=1,padding=1,bias=True))

        self.dc5_convo = nn.Sequential(conv(128+2*4*fdimb1, 128, kernel_size=3, stride=1, padding=1,  dilation=1),
                            conv(128,      128, kernel_size=3, stride=1, padding=2,  dilation=2),
                            conv(128,      128, kernel_size=3, stride=1, padding=4,  dilation=4),
                            conv(128,      96,  kernel_size=3, stride=1, padding=8,  dilation=8),
                            conv(96,       64,  kernel_size=3, stride=1, padding=16, dilation=16),
                            conv(64,       32,  kernel_size=3, stride=1, padding=1,  dilation=1),
                            nn.Conv2d(32,1,kernel_size=3,stride=1,padding=1,bias=True))

        self.dc4_convo = nn.Sequential(conv(128+3*4*fdimb1, 128, kernel_size=3, stride=1, padding=1,  dilation=1),
                            conv(128,      128, kernel_size=3, stride=1, padding=2,  dilation=2),
                            conv(128,      128, kernel_size=3, stride=1, padding=4,  dilation=4),
                            conv(128,      96,  kernel_size=3, stride=1, padding=8,  dilation=8),
                            conv(96,       64,  kernel_size=3, stride=1, padding=16, dilation=16),
                            conv(64,       32,  kernel_size=3, stride=1, padding=1,  dilation=1),
                            nn.Conv2d(32,1,kernel_size=3,stride=1,padding=1,bias=True))

        self.dc3_convo = nn.Sequential(conv(64+16*fdimb1, 128, kernel_size=3, stride=1, padding=1,  dilation=1),
                            conv(128,      128, kernel_size=3, stride=1, padding=2,  dilation=2),
                            conv(128,      128, kernel_size=3, stride=1, padding=4,  dilation=4),
                            conv(128,      96,  kernel_size=3, stride=1, padding=8,  dilation=8),
                            conv(96,       64,  kernel_size=3, stride=1, padding=16, dilation=16),
                            conv(64,       32,  kernel_size=3, stride=1, padding=1,  dilation=1),
                            nn.Conv2d(32,1,kernel_size=3,stride=1,padding=1,bias=True))

        self.dc2_convo = nn.Sequential(conv(64+16*fdimb1+4*fdimb2, 128, kernel_size=3, stride=1, padding=1,  dilation=1),
                            conv(128,      128, kernel_size=3, stride=1, padding=2,  dilation=2),
                            conv(128,      128, kernel_size=3, stride=1, padding=4,  dilation=4),
                            conv(128,      96,  kernel_size=3, stride=1, padding=8,  dilation=8),
                            conv(96,       64,  kernel_size=3, stride=1, padding=16, dilation=16),
                            conv(64,       32,  kernel_size=3, stride=1, padding=1,  dilation=1),
                            nn.Conv2d(32,1,kernel_size=3,stride=1,padding=1,bias=True))

        # affine-exp
        self.f3d2v1 = conv(64, 32, kernel_size=3, stride=1, padding=1,dilation=1) # 
        self.f3d2v2 = conv(1,   32, kernel_size=3, stride=1, padding=1,dilation=1) # 
        self.f3d2v3 = conv(1,   32, kernel_size=3, stride=1, padding=1,dilation=1) # 
        self.f3d2v4 = conv(1,   32, kernel_size=3, stride=1, padding=1,dilation=1) # 
        self.f3d2v5 = conv(64,   32, kernel_size=3, stride=1, padding=1,dilation=1) # 
        self.f3d2v6 = conv(12*81,   32, kernel_size=3, stride=1, padding=1,dilation=1) # 
        self.f3d2 = bfmodule(128-64,1)

        # depth change net
        self.dcnetv1 = conv(64, 32, kernel_size=3, stride=1, padding=1,dilation=1) # 
        self.dcnetv2 = conv(1,   32, kernel_size=3, stride=1, padding=1,dilation=1) # 
        self.dcnetv3 = conv(1,   32, kernel_size=3, stride=1, padding=1,dilation=1) # 
        self.dcnetv4 = conv(1,   32, kernel_size=3, stride=1, padding=1,dilation=1) # 
        self.dcnetv5 = conv(12*81,   32, kernel_size=3, stride=1, padding=1,dilation=1) # 
        self.dcnetv6 = conv(4,   32, kernel_size=3, stride=1, padding=1,dilation=1) # 
        if exp_unc:
            self.dcnet = bfmodule(128,2)
        else:
            self.dcnet = bfmodule(128,1)
            
        # moseg net
        self.fgnetv1 = conv(1,   16, kernel_size=3, stride=1, padding=1,dilation=1) # 
        self.fgnetv2 = conv(1,   16, kernel_size=3, stride=1, padding=1,dilation=1) # 
        self.fgnetv3 = conv(1,   16, kernel_size=3, stride=1, padding=1,dilation=1) # 
        self.fgnetv4 = conv(1,   16, kernel_size=3, stride=1, padding=1,dilation=1) # 
        self.fgnetv5 = conv(1,   16, kernel_size=3, stride=1, padding=1,dilation=1) # 
        self.fgnetv6 = conv(1,   16, kernel_size=3, stride=1, padding=1,dilation=1) # 
        self.fgnetv7 = conv(1,   16, kernel_size=3, stride=1, padding=1,dilation=1) # 
        self.fgnetv8 = conv(1,   16, kernel_size=3, stride=1, padding=1,dilation=1) # 
        self.fgnetv9 = conv(3,   16, kernel_size=3, stride=1, padding=1,dilation=1) # 
        self.fgnetv10 = conv(3,   16, kernel_size=3, stride=1, padding=1,dilation=1) # 
        self.fgnet = bfmodule_feat(208-3*16,7)

        #from midas.midas_net import MidasNet
        #self.midas = MidasNet('/data/gengshay/midas.pt', non_negative=True)
        self.midas = torch.hub.load("intel-isl/MiDaS", "MiDaS")
        
        # detection branch
        self.det = create_model('dla_34', {'hm': 2, 'wh': 36}, 256,num_input=14)

        for m in self.modules():
            if isinstance(m, nn.Conv3d):
                n = m.kernel_size[0] * m.kernel_size[1]*m.kernel_size[2] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
                if hasattr(m.bias,'data'):
                    m.bias.data.zero_()

        self.facs = [self.fac,1,1,1,1]
        self.warp_modules = nn.ModuleList([None, self.warp5, self.warp4, self.warp3, self.warp2])
        self.f_modules = nn.ModuleList([self.f6, self.f5, self.f4, self.f3, self.f2])
        self.p_modules = nn.ModuleList([self.p6, self.p5, self.p4, self.p3, self.p2])
        self.reg_modules = nn.ModuleList([self.flow_reg64, self.flow_reg32, self.flow_reg16, self.flow_reg8, self.flow_reg4])
        self.oor_modules = nn.ModuleList([self.dc6_convo, self.dc5_convo, self.dc4_convo, self.dc3_convo, self.dc2_convo])
        self.fuse_modules = nn.ModuleList([self.dc6_conv, self.dc5_conv, self.dc4_conv, self.dc3_conv, self.dc2_conv])

    def corrf(self, refimg_fea, targetimg_fea,maxdisp, fac=1):
        if self.training:
            #fast correlation function
            b,c,h,w = refimg_fea.shape
            targetimg_fea = F.unfold(targetimg_fea, (2*int(maxdisp)//fac+1,2*maxdisp+1), padding=(int(maxdisp)//fac,maxdisp)).view(b,c, 2*int(maxdisp)//fac+1,2*maxdisp+1,h,w).permute(0,1,3,2,4,5).contiguous()
            cost = refimg_fea.view(b,c,h,w)[:,:,np.newaxis, np.newaxis]*targetimg_fea
            cost = F.leaky_relu(cost, 0.1,inplace=True)
        else:
            #slow correlation function
            b,c,height,width = refimg_fea.shape
            if refimg_fea.is_cuda:
                cost = Variable(torch.cuda.FloatTensor(b,c,2*maxdisp+1,2*int(maxdisp//fac)+1,height,width)).fill_(0.) # b,c,u,v,h,w
            else:
                cost = Variable(torch.FloatTensor(b,c,2*maxdisp+1,2*int(maxdisp//fac)+1,height,width)).fill_(0.) # b,c,u,v,h,w
            for i in range(2*maxdisp+1):
                ind = i-maxdisp
                for j in range(2*int(maxdisp//fac)+1):
                    indd = j-int(maxdisp//fac)
                    feata = refimg_fea[:,:,max(0,-indd):height-indd,max(0,-ind):width-ind]
                    featb = targetimg_fea[:,:,max(0,+indd):height+indd,max(0,ind):width+ind]
                    diff = (feata*featb)
                    cost[:, :, i,j,max(0,-indd):height-indd,max(0,-ind):width-ind]   = diff  # standard
            cost = F.leaky_relu(cost, 0.1,inplace=True)
        return cost

    def cost_matching(self,up_flow, c1, c2, flowh, enth, level):
        """
        up_flow: upsample coarse flow
        c1: normalized feature of image 1
        c2: normalized feature of image 2
        flowh: flow hypotheses
        enth: entropy
        oor: out of range score for flow
        """

        # normalize
        c1n = c1 / (c1.norm(dim=1, keepdim=True)+1e-9)
        c2n = c2 / (c2.norm(dim=1, keepdim=True)+1e-9)

        # cost volume
        if level == 0:
            warp = c2n
        else:
            warp,_ = self.warp_modules[level](c2n, up_flow)

        feat = self.corrf(c1n,warp,self.md[level],fac=self.facs[level])
        feat = self.f_modules[level](feat) 
        cost = self.p_modules[level](feat) # b, 16, u,v,h,w

        # soft WTA
        b,c,u,v,h,w = cost.shape
        cost = cost.view(-1,u,v,h,w)  # bx16, 9,9,h,w, also predict uncertainty from here
        flowhh,enthh = self.reg_modules[level](cost) # bx16, 2, h, w
        flowhh = flowhh.view(b,c,2,h,w)
        if level > 0:
            flowhh = flowhh + up_flow[:,np.newaxis]
        flowhh = flowhh.view(b,-1,h,w) # b, 16*2, h, w
        enthh =  enthh.view(b,-1,h,w) # b, 16*1, h, w

        # append coarse hypotheses
        if level == 0:
            flowh = flowhh
            enth = enthh
        else:
            flowh = torch.cat((flowhh, F.upsample(flowh.detach()*2, [flowhh.shape[2],flowhh.shape[3]], mode='bilinear')),1) # b, k2--k2, h, w
            enth = torch.cat((enthh, F.upsample(enth, [flowhh.shape[2],flowhh.shape[3]], mode='bilinear')),1)

        if self.training or level==4:
            x = torch.cat((enth.detach(), flowh.detach(), c1),1)
            oor = self.oor_modules[level](x)[:,0]
        else: oor = None

        # hypotheses fusion
        x = torch.cat((enth.detach(), flowh.detach(), c1),1)
        va = self.fuse_modules[level](x)
        va = va.view(b,-1,2,h,w)
        flow = ( flowh.view(b,-1,2,h,w) * F.softmax(va,1) ).sum(1) # b, 2k, 2, h, w

        return flow, flowh, enth, oor

    def affine(self,pref,flow, pw=1):
        b,_,lh,lw=flow.shape
        ptar = pref + flow
        pw = 1
        pref = F.unfold(pref, (pw*2+1,pw*2+1), padding=(pw)).view(b,2,(pw*2+1)**2,lh,lw)-pref[:,:,np.newaxis]
        ptar = F.unfold(ptar, (pw*2+1,pw*2+1), padding=(pw)).view(b,2,(pw*2+1)**2,lh,lw)-ptar[:,:,np.newaxis] # b, 2,9,h,w
        pref = pref.permute(0,3,4,1,2).reshape(b*lh*lw,2,(pw*2+1)**2)
        ptar = ptar.permute(0,3,4,1,2).reshape(b*lh*lw,2,(pw*2+1)**2)

        prefprefT = pref.matmul(pref.permute(0,2,1))
        ppdet = prefprefT[:,0,0]*prefprefT[:,1,1]-prefprefT[:,1,0]*prefprefT[:,0,1]
        ppinv = torch.cat((prefprefT[:,1,1:],-prefprefT[:,0,1:], -prefprefT[:,1:,0], prefprefT[:,0:1,0]),1).view(-1,2,2)/ppdet.clamp(1e-10,np.inf)[:,np.newaxis,np.newaxis]

        Affine = ptar.matmul(pref.permute(0,2,1)).matmul(ppinv)
        Error = (Affine.matmul(pref)-ptar).norm(2,1).mean(1).view(b,1,lh,lw)

        Avol = (Affine[:,0,0]*Affine[:,1,1]-Affine[:,1,0]*Affine[:,0,1]).view(b,1,lh,lw).abs().clamp(1e-10,np.inf)
        exp = Avol.sqrt()
        mask = (exp>0.5) & (exp<2) & (Error<0.1)
        mask = mask[:,0]

        exp = exp.clamp(0.5,2)
        exp[Error>0.1]=1
        return exp, Error, mask

    def forward_VCN(self, im):
        bs = im.shape[0]//2

        ### compute optical flow
        c06,c05,c04,c03,c02 = self.pspnet(im)
        c16 = c06[:bs];  c26 = c06[bs:]
        c15 = c05[:bs];  c25 = c05[bs:]
        c14 = c04[:bs];  c24 = c04[bs:]
        c13 = c03[:bs];  c23 = c03[bs:]
        c12 = c02[:bs];  c22 = c02[bs:]

        ## matching 6
        flow6, flow6h, ent6h, oor6 = self.cost_matching(None, c16, c26, None, None,level=0)

        ## matching 5
        up_flow6 = F.upsample(flow6, [im.size()[2]//32,im.size()[3]//32], mode='bilinear')*2
        flow5, flow5h, ent5h, oor5 = self.cost_matching(up_flow6, c15, c25, flow6h, ent6h,level=1)

        ## matching 4
        up_flow5 = F.upsample(flow5, [im.size()[2]//16,im.size()[3]//16], mode='bilinear')*2
        flow4, flow4h, ent4h, oor4 = self.cost_matching(up_flow5, c14, c24, flow5h, ent5h,level=2)

        ## matching 3
        up_flow4 = F.upsample(flow4, [im.size()[2]//8,im.size()[3]//8], mode='bilinear')*2
        flow3, flow3h, ent3h, oor3 = self.cost_matching(up_flow4, c13, c23, flow4h, ent4h,level=3)

        ## matching 2
        up_flow3 = F.upsample(flow3, [im.size()[2]//4,im.size()[3]//4], mode='bilinear')*2
        flow2, flow2h, ent2h, oor2 = self.cost_matching(up_flow3, c12, c22, flow3h, ent3h,level=4)

        ### optical expansion
        b,_,h,w = flow2.shape 
        exp2,err2,_ = self.affine(get_grid(b,h,w)[:,0].permute(0,3,1,2).repeat(b,1,1,1).clone(), flow2.detach(),pw=1)
        x = torch.cat((
                        self.f3d2v2(-exp2.log()),
                        self.f3d2v3(err2),
                        ),1)
        dchange2 = -exp2.log()+1./200*self.f3d2(x)[0]

        # depth change net
        iexp2 = F.upsample(dchange2.clone(), [im.size()[2],im.size()[3]], mode='bilinear')
        x = torch.cat((self.dcnetv1(c12.detach()),
                        self.dcnetv2(dchange2.detach()),
                        self.dcnetv3(-exp2.log()),
                        self.dcnetv4(err2),
                    ),1)
        dcneto = 1./200*self.dcnet(x)[0]
        dchange2 = dchange2.detach() + dcneto[:,:1]
        dchange2 = F.upsample(dchange2, [im.size()[2],im.size()[3]], mode='bilinear')

        if dcneto.shape[1]>1:
            dc_unc = dcneto[:,1:2]
        else:
            dc_unc = torch.zeros_like(dcneto)
        dc_unc = F.upsample(dc_unc, [im.size()[2],im.size()[3]], mode='bilinear')[:,0]

        return flow2, oor2, dchange2, dc_unc

    def forward(self,im,disc_aux=None,flowdc=None):
        bs = im.shape[0]//2
        flow2, oor2, dchange2, dc_unc = flowdc

        ### rigid motion segmentation
        ## pre-processing
        Kinv, Kinv_n = get_intrinsics(disc_aux[0], noise=False)
        # get full res flow/expansion inputs
        H,W = im.size()[2:4]
        flow = 4*F.upsample(flow2, [H,W], mode='bilinear').detach()
        oor2 = F.upsample(oor2[:,np.newaxis], [H,W], mode='bilinear').detach()[:,0]
        tau = (-dchange2[:,0]).exp().detach()

        # use different number of correspondences for bg, obj segmentation and pose
        fscale=128./H; fscalex=32./H;fscaled=448./H
        hp0o = torch.cat( [torch.arange(0, W,out=torch.cuda.FloatTensor()).view(1,-1).repeat(H,1)[np.newaxis],  # 1,2,H,W
                            torch.arange(0, H,out=torch.cuda.FloatTensor()).view(-1,1).repeat(1,W)[np.newaxis]], 0)[np.newaxis]
        hp1o = hp0o + flow  # b,2,H,W
        # to deal with input resizing (TODO: move it inside intrinsics)
        hp0o[:,0] *= disc_aux[0][10]
        hp0o[:,1] *= disc_aux[0][11]
        hp1o[:,0] *= disc_aux[0][10]
        hp1o[:,1] *= disc_aux[0][11]

        # sample correspondence for object segmentation (fscaled)
        hp0d = F.interpolate(hp0o,scale_factor=fscaled,mode='nearest')
        hp1d = F.interpolate(hp1o,scale_factor=fscaled,mode='nearest')
        _,_,hd,wd=hp0d.shape
        hp0d = hp0d.view(1,2,-1).permute(0,2,1)
        hp1d = hp1d.view(bs,2,-1).permute(0,2,1)
        hp0d = torch.cat((hp0d,torch.ones(1,hp0d.shape[1],1).cuda()),-1)
        hp1d = torch.cat((hp1d,torch.ones(bs,hp0d.shape[1],1).cuda()),-1)
        uncd = torch.cat((F.interpolate(oor2[:,np.newaxis],scale_factor=fscaled,mode='nearest'),
                F.interpolate(dc_unc[:,np.newaxis].detach(),scale_factor=fscaled,mode='nearest')),1)
        taud = F.interpolate(tau[:,np.newaxis],scale_factor=fscaled,mode='nearest').view(bs,1,-1)

        # sample correspondence for fg/bg seg (fscale)
        hp0 = F.interpolate(hp0o,scale_factor=fscale,mode='nearest')
        hp1 = F.interpolate(hp1o,scale_factor=fscale,mode='nearest')
        _,_,h,w=hp0.shape
        hp0 = hp0.view(1,2,-1).permute(0,2,1)
        hp1 = hp1.view(bs,2,-1).permute(0,2,1)
        hp0 = torch.cat((hp0,torch.ones(1,hp0.shape[1],1).cuda()),-1)
        hp1 = torch.cat((hp1,torch.ones(bs,hp0.shape[1],1).cuda()),-1)
        unc = torch.cat((F.interpolate(oor2[:,np.newaxis],scale_factor=fscale,mode='nearest'),
                F.interpolate(dc_unc[:,np.newaxis].detach(),scale_factor=fscale,mode='nearest')),1)
        tau = F.interpolate(tau[:,np.newaxis],scale_factor=fscale,mode='nearest').view(bs,1,-1)
        
        # sample correspondence for pose estimation (fscalex)
        hp0x = F.interpolate(hp0o,scale_factor=fscalex,mode='nearest')
        hp1x = F.interpolate(hp1o,scale_factor=fscalex,mode='nearest')
        hp0x = hp0x.view(1,2,-1).permute(0,2,1)
        hp1x = hp1x.view(bs,2,-1).permute(0,2,1)
        hp0x = torch.cat((hp0x,torch.ones(1,hp0x.shape[1],1).cuda()),-1)
        hp1x = torch.cat((hp1x,torch.ones(bs,hp0x.shape[1],1).cuda()),-1)

        ## camera pose estimation
        # using input pose from VONet
        rot = torch.from_numpy(cv2.Rodrigues(disc_aux[2][:,:3])[0][:,0].astype(np.float32)).unsqueeze(0)
        trans = torch.from_numpy(disc_aux[2][:,3:].astype(np.float32)).squeeze().unsqueeze(0)
        trans = trans / trans.norm(2,1)[:,np.newaxis]
        rot = rot.cuda().detach()
        trans = trans.cuda().detach()
        Ex = get_skew_mat(trans.cpu(), rot.cpu())

        ## fg/bg segmentation
        # rigidity cost maps
        mcost00, mcost01, mcost1, mcost2, mcost3, mcost4, p3dmag,_ = compute_geo_costs(rot, trans, Ex, Kinv, hp0, hp1, tau, Kinv_n = Kinv_n)
        
        # depth contrast cost
        with torch.no_grad():
            self.midas.eval()
            input_im  = (disc_aux[1].permute(0,3,1,2) -\
                    torch.Tensor([0.485, 0.456, 0.406]).cuda()[np.newaxis,:,np.newaxis,np.newaxis]) /\
                    torch.Tensor([0.229, 0.224, 0.225]).cuda()[np.newaxis,:,np.newaxis,np.newaxis]
            wsize = int((input_im.shape[3] * 448./input_im.shape[2])//32*32)
            input_im = F.interpolate(input_im, (448, wsize), mode='bilinear')
            dispo = self.midas.forward(input_im)[None].clamp(1e-6,np.inf)

        disp = F.interpolate(dispo, [h,w], mode='bilinear')
        med_dgt = torch.median(disp.view(bs,-1),dim=-1)[0]
        med_dp3d = torch.median(p3dmag.view(bs,-1),dim=-1)[0]
        med_ratio = (med_dgt/med_dp3d)[:,np.newaxis,np.newaxis,np.newaxis]
        # disp[disp == float('inf')] = p3dmag.view(bs,1,h,w)[disp == float('inf')] * med_ratio
        log_dratio = ( med_ratio * p3dmag.view(bs,1,h,w) / disp.view(bs,1,h,w) ).log()
        #pdb.set_trace()

        # pseudo 3D point compute
        depth = (1./ disp).view(bs,1,-1)
        depth = depth.clamp(depth.median()/10, depth.median()*10)
        p03d = depth *      Kinv.matmul(hp0.permute(0,2,1))
        p13d = depth/tau*Kinv_n.matmul(hp1.permute(0,2,1))
        p13d = kornia.angle_axis_to_rotation_matrix(rot).matmul(p13d)  # remove rotation
        pts = torch.cat([p03d, p13d],-1) # bs, 3, 2*N
        # normalize it 
        for i in range(bs):
            pts[i] = pts[i] - pts[i].mean(-1,keepdims=True)  # zero mean
            pts[i] = pts[i] / pts[i].flatten().std() # unit std
        p03d = pts[:,:,:p03d.shape[-1]]
        p13d = pts[:,:,p03d.shape[-1]:]

        # fg/bg segmentation network
        # the constants are empirical values multiplied to cost maps to 
        # ensure they have similar scales
        costs = torch.cat((
                        self.fgnetv1( 0.01*(mcost00+mcost01).view(bs,1,h,w).detach()),
                        self.fgnetv2( 2e3*       mcost1.view(bs,1,h,w).detach()),
                        self.fgnetv3(            mcost2.view(bs,1,h,w).detach()),
                        self.fgnetv4(   30*      mcost3.view(bs,1,h,w).detach()),
                        self.fgnetv5(            mcost4.view(bs,1,h,w).detach()),
                        self.fgnetv6(  0.2*      unc[:,:1].view(bs,1,h,w).detach()),
                        self.fgnetv7(  0.2*      unc[:,1:].view(bs,1,h,w).detach()),
                        self.fgnetv8(   3*      log_dratio.view(bs,1,h,w).detach()),
                        self.fgnetv9( p03d.view(bs,3,h,w).detach()),
                        self.fgnetv10( p13d.view(bs,3,h,w).detach()),
                    ),1)
        x,featx = self.fgnet(costs)
        fg_va =  1./20*x[:,:-1]
        fg_res = 1./200*x[:,-1:]
        fg_hps = torch.cat( (
                                0.01*(mcost00+mcost01).view(bs,1,h,w).detach(),
                                2e3* mcost1.view(bs,1,h,w).detach(),
                                    mcost2.view(bs,1,h,w).detach(),
                                30*  mcost3.view(bs,1,h,w).detach(),
                                    mcost4.view(bs,1,h,w).detach(),
                        3*      log_dratio.view(bs,1,h,w).detach(),
                            ),1)
        # fgmask: prelogits of 0-1 probability foreground vs background
        fgmask = (fg_va * fg_hps).sum(1, keepdims=True) + fg_res
        fgmask = F.upsample(fgmask, [im.size()[2],im.size()[3]], mode='bilinear')


        return fgmask[0,0]

================================================
FILE: Network/rigidmask/__init__.py
================================================


================================================
FILE: Network/rigidmask/conv4d.py
================================================
import pdb
import torch.nn as nn
import math
import torch
from torch.nn.parameter import Parameter
import torch.nn.functional as F
from torch.nn import Module
from torch.nn.modules.conv import _ConvNd
from torch.nn.modules.utils import _quadruple
from torch.autograd import Variable
from torch.nn import Conv2d

def conv4d(data,filters,bias=None,permute_filters=True,use_half=False):
    """
    This is done by stacking results of multiple 3D convolutions, and is very slow.
    Taken from https://github.com/ignacio-rocco/ncnet
    """
    b,c,h,w,d,t=data.size()

    data=data.permute(2,0,1,3,4,5).contiguous() # permute to avoid making contiguous inside loop    
        
    # Same permutation is done with filters, unless already provided with permutation
    if permute_filters:
        filters=filters.permute(2,0,1,3,4,5).contiguous() # permute to avoid making contiguous inside loop    

    c_out=filters.size(1)
    if use_half:
        output = Variable(torch.HalfTensor(h,b,c_out,w,d,t),requires_grad=data.requires_grad)
    else:
        output = Variable(torch.zeros(h,b,c_out,w,d,t),requires_grad=data.requires_grad)
    
    padding=filters.size(0)//2
    if use_half:
        Z=Variable(torch.zeros(padding,b,c,w,d,t).half())
    else:
        Z=Variable(torch.zeros(padding,b,c,w,d,t))
    
    if data.is_cuda:
        Z=Z.cuda(data.get_device())    
        output=output.cuda(data.get_device())
        
    data_padded = torch.cat((Z,data,Z),0)
    

    for i in range(output.size(0)): # loop on first feature dimension
        # convolve with center channel of filter (at position=padding)
        output[i,:,:,:,:,:]=F.conv3d(data_padded[i+padding,:,:,:,:,:], 
                                     filters[padding,:,:,:,:,:], bias=bias, stride=1, padding=padding)
        # convolve with upper/lower channels of filter (at postions [:padding] [padding+1:])
        for p in range(1,padding+1):
            output[i,:,:,:,:,:]=output[i,:,:,:,:,:]+F.conv3d(data_padded[i+padding-p,:,:,:,:,:], 
                                                             filters[padding-p,:,:,:,:,:], bias=None, stride=1, padding=padding)
            output[i,:,:,:,:,:]=output[i,:,:,:,:,:]+F.conv3d(data_padded[i+padding+p,:,:,:,:,:], 
                                                             filters[padding+p,:,:,:,:,:], bias=None, stride=1, padding=padding)

    output=output.permute(1,2,0,3,4,5).contiguous()
    return output

class Conv4d(_ConvNd):
    """Applies a 4D convolution over an input signal composed of several input
    planes.
    """

    def __init__(self, in_channels, out_channels, kernel_size, bias=True, pre_permuted_filters=True): 
        # stride, dilation and groups !=1 functionality not tested 
        stride=1
        dilation=1
        groups=1
        # zero padding is added automatically in conv4d function to preserve tensor size
        padding = 0
        kernel_size = _quadruple(kernel_size)
        stride = _quadruple(stride)
        padding = _quadruple(padding)
        dilation = _quadruple(dilation)
        super(Conv4d, self).__init__(
            in_channels, out_channels, kernel_size, stride, padding, dilation,
            False, _quadruple(0), groups, bias)  
        # weights will be sliced along one dimension during convolution loop
        # make the looping dimension to be the first one in the tensor, 
        # so that we don't need to call contiguous() inside the loop
        self.pre_permuted_filters=pre_permuted_filters
        if self.pre_permuted_filters:
            self.weight.data=self.weight.data.permute(2,0,1,3,4,5).contiguous()
        self.use_half=False
    #    self.isbias = bias
    #    if not self.isbias:
    #        self.bn = torch.nn.BatchNorm1d(out_channels)


    def forward(self, input):
        out = conv4d(input, self.weight, bias=self.bias,permute_filters=not self.pre_permuted_filters,use_half=self.use_half) # filters pre-permuted in constructor
    #    if not self.isbias:
    #        b,c,u,v,h,w = out.shape
    #        out = self.bn(out.view(b,c,-1)).view(b,c,u,v,h,w)
        return out

class fullConv4d(torch.nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, bias=True, pre_permuted_filters=True):
        super(fullConv4d, self).__init__()
        self.conv = Conv4d(in_channels, out_channels, kernel_size, bias=bias, pre_permuted_filters=pre_permuted_filters)
        self.isbias = bias
        if not self.isbias:
            self.bn = torch.nn.BatchNorm1d(out_channels)

    def forward(self, input):
        out = self.conv(input)
        if not self.isbias:
            b,c,u,v,h,w = out.shape
            out = self.bn(out.view(b,c,-1)).view(b,c,u,v,h,w)
        return out

class butterfly4D(torch.nn.Module):
    '''
    butterfly 4d
    '''
    def __init__(self, fdima, fdimb, withbn=True, full=True,groups=1):
        super(butterfly4D, self).__init__()
        self.proj = nn.Sequential(projfeat4d(fdima, fdimb, 1, with_bn=withbn,groups=groups),
                                  nn.ReLU(inplace=True),)
        self.conva1 = sepConv4dBlock(fdimb,fdimb,with_bn=withbn, stride=(2,1,1),full=full,groups=groups)
        self.conva2 = sepConv4dBlock(fdimb,fdimb,with_bn=withbn, stride=(2,1,1),full=full,groups=groups)
        self.convb3 = sepConv4dBlock(fdimb,fdimb,with_bn=withbn, stride=(1,1,1),full=full,groups=groups)
        self.convb2 = sepConv4dBlock(fdimb,fdimb,with_bn=withbn, stride=(1,1,1),full=full,groups=groups)
        self.convb1 = sepConv4dBlock(fdimb,fdimb,with_bn=withbn, stride=(1,1,1),full=full,groups=groups)

    #@profile
    def forward(self,x):
        out = self.proj(x)
        b,c,u,v,h,w = out.shape # 9x9

        out1 = self.conva1(out) # 5x5, 3
        _,c1,u1,v1,h1,w1 = out1.shape

        out2 = self.conva2(out1) # 3x3, 9
        _,c2,u2,v2,h2,w2 = out2.shape

        out2 = self.convb3(out2) # 3x3, 9

        tout1 = F.upsample(out2.view(b,c,u2,v2,-1),(u1,v1,h2*w2),mode='trilinear').view(b,c,u1,v1,h2,w2) # 5x5
        tout1 = F.upsample(tout1.view(b,c,-1,h2,w2),(u1*v1,h1,w1),mode='trilinear').view(b,c,u1,v1,h1,w1) # 5x5
        out1 = tout1 + out1
        out1 = self.convb2(out1)

        tout = F.upsample(out1.view(b,c,u1,v1,-1),(u,v,h1*w1),mode='trilinear').view(b,c,u,v,h1,w1)
        tout = F.upsample(tout.view(b,c,-1,h1,w1),(u*v,h,w),mode='trilinear').view(b,c,u,v,h,w)
        out = tout + out
        out = self.convb1(out)

        return out



class projfeat4d(torch.nn.Module):
    '''
    Turn 3d projection into 2d projection
    '''
    def __init__(self, in_planes, out_planes, stride, with_bn=True,groups=1):
        super(projfeat4d, self).__init__()
        self.with_bn = with_bn
        self.stride = stride
        self.conv1 = nn.Conv3d(in_planes, out_planes, 1, (stride,stride,1), padding=0,bias=not with_bn,groups=groups)
        self.bn = nn.BatchNorm3d(out_planes)

    def forward(self,x):
        b,c,u,v,h,w = x.size()
        x = self.conv1(x.view(b,c,u,v,h*w))
        if self.with_bn:
            x = self.bn(x)
        _,c,u,v,_ = x.shape
        x = x.view(b,c,u,v,h,w)
        return x

class sepConv4d(torch.nn.Module):
    '''
    Separable 4d convolution block as 2 3D convolutions
    '''
    def __init__(self, in_planes, out_planes, stride=(1,1,1), with_bn=True, ksize=3, full=True,groups=1):
        super(sepConv4d, self).__init__()
        bias = not with_bn
        self.isproj = False
        self.stride = stride[0]
        expand = 1

        if with_bn:
            if in_planes != out_planes:
                self.isproj = True
                self.proj = nn.Sequential(nn.Conv2d(in_planes, out_planes, 1, bias=bias, padding=0,groups=groups),
                                          nn.BatchNorm2d(out_planes))
            if full:
                self.conv1 = nn.Sequential(nn.Conv3d(in_planes*expand, in_planes, (1,ksize,ksize), stride=(1,self.stride,self.stride), bias=bias, padding=(0,ksize//2,ksize//2),groups=groups),
                                           nn.BatchNorm3d(in_planes))
            else:
                self.conv1 = nn.Sequential(nn.Conv3d(in_planes*expand, in_planes, (1,ksize,ksize), stride=1,                           bias=bias, padding=(0,ksize//2,ksize//2),groups=groups),
                                           nn.BatchNorm3d(in_planes))
            self.conv2 = nn.Sequential(nn.Conv3d(in_planes, in_planes*expand, (ksize,ksize,1), stride=(self.stride,self.stride,1), bias=bias, padding=(ksize//2,ksize//2,0),groups=groups),
                                       nn.BatchNorm3d(in_planes*expand))
        else:
            if in_planes != out_planes:
                self.isproj = True
                self.proj = nn.Conv2d(in_planes, out_planes, 1, bias=bias, padding=0,groups=groups)
            if full:
                self.conv1 = nn.Conv3d(in_planes*expand, in_planes, (1,ksize,ksize), stride=(1,self.stride,self.stride), bias=bias, padding=(0,ksize//2,ksize//2),groups=groups)
            else:
                self.conv1 = nn.Conv3d(in_planes*expand, in_planes, (1,ksize,ksize), stride=1,                           bias=bias, padding=(0,ksize//2,ksize//2),groups=groups)
            self.conv2 = nn.Conv3d(in_planes, in_planes*expand, (ksize,ksize,1), stride=(self.stride,self.stride,1), bias=bias, padding=(ksize//2,ksize//2,0),groups=groups)
        self.relu = nn.ReLU(inplace=True)
        
    #@profile
    def forward(self,x):
        b,c,u,v,h,w = x.shape
        x = self.conv2(x.view(b,c,u,v,-1))
        b,c,u,v,_ = x.shape
        x = self.relu(x)
        x = self.conv1(x.view(b,c,-1,h,w))
        b,c,_,h,w = x.shape

        if self.isproj:
            x = self.proj(x.view(b,c,-1,w))
        x = x.view(b,-1,u,v,h,w)
        return x


class sepConv4dBlock(torch.nn.Module):
    '''
    Separable 4d convolution block as 2 2D convolutions and a projection
    layer
    '''
    def __init__(self, in_planes, out_planes, stride=(1,1,1), with_bn=True, full=True,groups=1):
        super(sepConv4dBlock, self).__init__()
        if in_planes == out_planes and stride==(1,1,1):
            self.downsample = None
        else:
            if full:
                self.downsample = sepConv4d(in_planes, out_planes, stride, with_bn=with_bn,ksize=1, full=full,groups=groups)
            else:
                self.downsample = projfeat4d(in_planes, out_planes,stride[0], with_bn=with_bn,groups=groups)
        self.conv1 = sepConv4d(in_planes, out_planes, stride, with_bn=with_bn, full=full ,groups=groups)
        self.conv2 = sepConv4d(out_planes, out_planes,(1,1,1), with_bn=with_bn, full=full,groups=groups)
        self.relu1 = nn.ReLU(inplace=True)
        self.relu2 = nn.ReLU(inplace=True)

    #@profile
    def forward(self,x):
        out = self.relu1(self.conv1(x))
        if self.downsample:
            x = self.downsample(x)
        out = self.relu2(x + self.conv2(out))
        return out


##import torch.backends.cudnn as cudnn
##cudnn.benchmark = True
#import time
##im = torch.randn(9,64,9,160,224).cuda()
##net = torch.nn.Conv3d(64, 64, 3).cuda()
##net = Conv4d(1,1,3,bias=True,pre_permuted_filters=True).cuda()
##net = sepConv4dBlock(2,2,stride=(1,1,1)).cuda()
#
##im = torch.randn(1,16,9,9,96,320).cuda()
##net = sepConv4d(16,16,with_bn=False).cuda()
#
##im = torch.randn(1,16,81,96,320).cuda()
##net = torch.nn.Conv3d(16,16,(1,3,3),padding=(0,1,1)).cuda()
#
##im = torch.randn(1,16,9,9,96*320).cuda()
##net = torch.nn.Conv3d(16,16,(3,3,1),padding=(1,1,0)).cuda()
#
##im = torch.randn(10000,10,9,9).cuda()
##net = torch.nn.Conv2d(10,10,3,padding=1).cuda()
#
##im = torch.randn(81,16,96,320).cuda()
##net = torch.nn.Conv2d(16,16,3,padding=1).cuda()
#c=   int(16 *1)
#cp = int(16 *1)
#h=int(96  *4)
#w=int(320 *4)
#k=3
#im = torch.randn(1,c,h,w).cuda()
#net = torch.nn.Conv2d(c,cp,k,padding=k//2).cuda()
#
#im2 = torch.randn(cp,k*k*c).cuda()
#im1 = F.unfold(im, (k,k), padding=k//2)[0]
# 
#
#net(im)
#net(im)
#torch.mm(im2,im1)
#torch.mm(im2,im1)
#torch.cuda.synchronize()
#beg = time.time()
#for i in range(100):
#    net(im)
#    #im1 = F.unfold(im, (k,k), padding=k//2)[0]
#    torch.mm(im2,im1)
#torch.cuda.synchronize()
#print('%f'%((time.time()-beg)*10.))


================================================
FILE: Network/rigidmask/det.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import torchvision.models as models
import torch
import torch.nn as nn
import os

from .networks.msra_resnet import get_pose_net
from .networks.dlav0 import get_pose_net as get_dlav0
from .networks.pose_dla_dcn import get_pose_net as get_dla_dcn
from .networks.resnet_dcn import get_pose_net as get_pose_net_dcn
from .networks.large_hourglass import get_large_hourglass_net

_model_factory = {
  'res': get_pose_net, # default Resnet with deconv
  'dlav0': get_dlav0, # default DLAup
  'dla': get_dla_dcn,
  'resdcn': get_pose_net_dcn,
  'hourglass': get_large_hourglass_net,
}

def create_model(arch, heads, head_conv,num_input):
  num_layers = int(arch[arch.find('_') + 1:]) if '_' in arch else 0
  arch = arch[:arch.find('_')] if '_' in arch else arch
  get_model = _model_factory[arch]
  model = get_model(num_layers=num_layers, heads=heads, head_conv=head_conv,num_input=num_input)
  return model

def load_model(model, model_path, optimizer=None, resume=False, 
               lr=None, lr_step=None):
  start_epoch = 0
  checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage)
  print('loaded {}, epoch {}'.format(model_path, checkpoint['epoch']))
  state_dict_ = checkpoint['state_dict']
  state_dict = {}
  
  # convert data_parallal to model
  for k in state_dict_:
    if k.startswith('module') and not k.startswith('module_list'):
      state_dict[k[7:]] = state_dict_[k]
    else:
      state_dict[k] = state_dict_[k]
  model_state_dict = model.state_dict()

  # check loaded parameters and created model parameters
  msg = 'If you see this, your model does not fully load the ' + \
        'pre-trained weight. Please make sure ' + \
        'you have correctly specified --arch xxx ' + \
        'or set the correct --num_classes for your own dataset.'
  for k in state_dict:
    if k in model_state_dict:
      if state_dict[k].shape != model_state_dict[k].shape:
        print('Skip loading parameter {}, required shape{}, '\
              'loaded shape{}. {}'.format(
          k, model_state_dict[k].shape, state_dict[k].shape, msg))
        state_dict[k] = model_state_dict[k]
    else:
      print('Drop parameter {}.'.format(k) + msg)
  for k in model_state_dict:
    if not (k in state_dict):
      print('No param {}.'.format(k) + msg)
      state_dict[k] = model_state_dict[k]
  model.load_state_dict(state_dict, strict=False)

  # resume optimizer parameters
  if optimizer is not None and resume:
    if 'optimizer' in checkpoint:
      optimizer.load_state_dict(checkpoint['optimizer'])
      start_epoch = checkpoint['epoch']
      start_lr = lr
      for step in lr_step:
        if start_epoch >= step:
          start_lr *= 0.1
      for param_group in optimizer.param_groups:
        param_group['lr'] = start_lr
      print('Resumed optimizer with start lr', start_lr)
    else:
      print('No optimizer parameters in checkpoint.')
  if optimizer is not None:
    return model, optimizer, start_epoch
  else:
    return model

def save_model(path, epoch, model, optimizer=None):
  if isinstance(model, torch.nn.DataParallel):
    state_dict = model.module.state_dict()
  else:
    state_dict = model.state_dict()
  data = {'epoch': epoch,
          'state_dict': state_dict}
  if not (optimizer is None):
    data['optimizer'] = optimizer.state_dict()
  torch.save(data, path)



================================================
FILE: Network/rigidmask/det_losses.py
================================================
# ------------------------------------------------------------------------------
# Portions of this code are from
# CornerNet (https://github.com/princeton-vl/CornerNet)
# Copyright (c) 2018, University of Michigan
# Licensed under the BSD 3-Clause License
# ------------------------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import pdb
import torch
import torch.nn as nn
from .det_utils import _transpose_and_gather_feat
import torch.nn.functional as F


def _slow_neg_loss(pred, gt):
  '''focal loss from CornerNet'''
  pos_inds = gt.eq(1)
  neg_inds = gt.lt(1)

  neg_weights = torch.pow(1 - gt[neg_inds], 4)

  loss = 0
  pos_pred = pred[pos_inds]
  neg_pred = pred[neg_inds]

  pos_loss = torch.log(pos_pred) * torch.pow(1 - pos_pred, 2)
  neg_loss = torch.log(1 - neg_pred) * torch.pow(neg_pred, 2) * neg_weights

  num_pos  = pos_inds.float().sum()
  pos_loss = pos_loss.sum()
  neg_loss = neg_loss.sum()

  if pos_pred.nelement() == 0:
    loss = loss - neg_loss
  else:
    loss = loss - (pos_loss + neg_loss) / num_pos
  return loss


def _neg_loss(pred, gt, heat_logits):
  ''' Modified focal loss. Exactly the same as CornerNet.
      Runs faster and costs a little bit more memory
    Arguments:
      pred (batch x c x h x w)
      gt_regr (batch x c x h x w)
  '''
  pos_inds = gt.eq(1).float()
  neg_inds = gt.lt(1).float()

  neg_weights = torch.pow(1 - gt, 4)

  loss = 0

  logpred = torch.nn.functional.log_softmax(heat_logits,1)
  pos_loss = logpred[:,0:1] * torch.pow(1 - pred, 2) * pos_inds
  neg_loss = logpred[:,1:2] * torch.pow(pred, 2) * neg_weights * neg_inds

  num_pos  = pos_inds.float().sum()
  pos_loss = pos_loss.sum()
  neg_loss = neg_loss.sum()

  if num_pos == 0:
    loss = loss - neg_loss
  else:
    loss = loss - (pos_loss + neg_loss) / num_pos
  return loss

def _not_faster_neg_loss(pred, gt):
    pos_inds = gt.eq(1).float()
    neg_inds = gt.lt(1).float()    
    num_pos  = pos_inds.float().sum()
    neg_weights = torch.pow(1 - gt, 4)

    loss = 0
    trans_pred = pred * neg_inds + (1 - pred) * pos_inds
    weight = neg_weights * neg_inds + pos_inds
    all_loss = torch.log(1 - trans_pred) * torch.pow(trans_pred, 2) * weight
    all_loss = all_loss.sum()

    if num_pos > 0:
        all_loss /= num_pos
    loss -=  all_loss
    return loss

def _slow_reg_loss(regr, gt_regr, mask):
    num  = mask.float().sum()
    mask = mask.unsqueeze(2).expand_as(gt_regr)

    regr    = regr[mask]
    gt_regr = gt_regr[mask]
    
    regr_loss = nn.functional.smooth_l1_loss(regr, gt_regr, size_average=False)
    regr_loss = regr_loss / (num + 1e-4)
    return regr_loss

def _reg_loss(regr, gt_regr, mask):
  ''' L1 regression loss
    Arguments:
      regr (batch x max_objects x dim)
      gt_regr (batch x max_objects x dim)
      mask (batch x max_objects)
  '''
  num = mask.float().sum()
  mask = mask.unsqueeze(2).expand_as(gt_regr).float()

  regr = regr * mask
  gt_regr = gt_regr * mask
    
  regr_loss = nn.functional.smooth_l1_loss(regr, gt_regr, size_average=False)
  regr_loss = regr_loss / (num + 1e-4)
  return regr_loss

class FocalLoss(nn.Module):
  '''nn.Module warpper for focal loss'''
  def __init__(self):
    super(FocalLoss, self).__init__()
    self.neg_loss = _neg_loss

  def forward(self, out, target, logits):
    return self.neg_loss(out, target, logits)

class RegLoss(nn.Module):
  '''Regression loss for an output tensor
    Arguments:
      output (batch x dim x h x w)
      mask (batch x max_objects)
      ind (batch x max_objects)
      target (batch x max_objects x dim)
  '''
  def __init__(self):
    super(RegLoss, self).__init__()
  
  def forward(self, output, mask, ind, target):
    pred = _transpose_and_gather_feat(output, ind)
    loss = _reg_loss(pred, target, mask)
    return loss

class RegL1Loss(nn.Module):
  def __init__(self):
    super(RegL1Loss, self).__init__()
  
  def forward(self, output, mask, ind, target):
    pred = _transpose_and_gather_feat(output, ind)
    mask = mask.unsqueeze(2).expand_as(pred).float()
    # loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean')
    loss = F.l1_loss(pred * mask, target * mask, size_average=False)
    loss = loss / (mask.sum() + 1e-4)
    return loss

class NormRegL1Loss(nn.Module):
  def __init__(self):
    super(NormRegL1Loss, self).__init__()
  
  def forward(self, output, mask, ind, target):
    pred = _transpose_and_gather_feat(output, ind)
    mask = mask.unsqueeze(2).expand_as(pred).float()
    # loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean')
    pred = pred / (target + 1e-4)
    target = target * 0 + 1
    loss = F.l1_loss(pred * mask, target * mask, size_average=False)
    loss = loss / (mask.sum() + 1e-4)
    return loss

class RegWeightedL1Loss(nn.Module):
  def __init__(self):
    super(RegWeightedL1Loss, self).__init__()
  
  def forward(self, output, mask, ind, target):
    pred = _transpose_and_gather_feat(output, ind)
    mask = mask.float()
    # loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean')
    loss = F.l1_loss(pred * mask, target * mask, size_average=False)
    loss = loss / (mask.sum() + 1e-4)
    return loss

class L1Loss(nn.Module):
  def __init__(self):
    super(L1Loss, self).__init__()
  
  def forward(self, output, mask, ind, target):
    pred = _transpose_and_gather_feat(output, ind)
    mask = mask.unsqueeze(2).expand_as(pred).float()
    loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean')
    return loss

class BinRotLoss(nn.Module):
  def __init__(self):
    super(BinRotLoss, self).__init__()
  
  def forward(self, output, mask, ind, rotbin, rotres):
    pred = _transpose_and_gather_feat(output, ind)
    loss = compute_rot_loss(pred, rotbin, rotres, mask)
    return loss

def compute_res_loss(output, target):
    return F.smooth_l1_loss(output, target, reduction='elementwise_mean')

# TODO: weight
def compute_bin_loss(output, target, mask):
    mask = mask.expand_as(output)
    output = output * mask.float()
    return F.cross_entropy(output, target, reduction='elementwise_mean')

def compute_rot_loss(output, target_bin, target_res, mask):
    # output: (B, 128, 8) [bin1_cls[0], bin1_cls[1], bin1_sin, bin1_cos, 
    #                 bin2_cls[0], bin2_cls[1], bin2_sin, bin2_cos]
    # target_bin: (B, 128, 2) [bin1_cls, bin2_cls]
    # target_res: (B, 128, 2) [bin1_res, bin2_res]
    # mask: (B, 128, 1)
    # import pdb; pdb.set_trace()
    output = output.view(-1, 8)
    target_bin = target_bin.view(-1, 2)
    target_res = target_res.view(-1, 2)
    mask = mask.view(-1, 1)
    loss_bin1 = compute_bin_loss(output[:, 0:2], target_bin[:, 0], mask)
    loss_bin2 = compute_bin_loss(output[:, 4:6], target_bin[:, 1], mask)
    loss_res = torch.zeros_like(loss_bin1)
    if target_bin[:, 0].nonzero().shape[0] > 0:
        idx1 = target_bin[:, 0].nonzero()[:, 0]
        valid_output1 = torch.index_select(output, 0, idx1.long())
        valid_target_res1 = torch.index_select(target_res, 0, idx1.long())
        loss_sin1 = compute_res_loss(
          valid_output1[:, 2], torch.sin(valid_target_res1[:, 0]))
        loss_cos1 = compute_res_loss(
          valid_output1[:, 3], torch.cos(valid_target_res1[:, 0]))
        loss_res += loss_sin1 + loss_cos1
    if target_bin[:, 1].nonzero().shape[0] > 0:
        idx2 = target_bin[:, 1].nonzero()[:, 0]
        valid_output2 = torch.index_select(output, 0, idx2.long())
        valid_target_res2 = torch.index_select(target_res, 0, idx2.long())
        loss_sin2 = compute_res_loss(
          valid_output2[:, 6], torch.sin(valid_target_res2[:, 1]))
        loss_cos2 = compute_res_loss(
          valid_output2[:, 7], torch.cos(valid_target_res2[:, 1]))
        loss_res += loss_sin2 + loss_cos2
    return loss_bin1 + loss_bin2 + loss_res


================================================
FILE: Network/rigidmask/det_utils.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import torch
import torch.nn as nn

def _sigmoid(x):
  y = torch.clamp(x.sigmoid_(), min=1e-4, max=1-1e-4)
  return y

def _gather_feat(feat, ind, mask=None):
    dim  = feat.size(2)
    ind  = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim)
    feat = feat.gather(1, ind)
    if mask is not None:
        mask = mask.unsqueeze(2).expand_as(feat)
        feat = feat[mask]
        feat = feat.view(-1, dim)
    return feat

def _transpose_and_gather_feat(feat, ind):
    feat = feat.permute(0, 2, 3, 1).contiguous()
    feat = feat.view(feat.size(0), -1, feat.size(3))
    feat = _gather_feat(feat, ind)
    return feat

def flip_tensor(x):
    return torch.flip(x, [3])
    # tmp = x.detach().cpu().numpy()[..., ::-1].copy()
    # return torch.from_numpy(tmp).to(x.device)

def flip_lr(x, flip_idx):
  tmp = x.detach().cpu().numpy()[..., ::-1].copy()
  shape = tmp.shape
  for e in flip_idx:
    tmp[:, e[0], ...], tmp[:, e[1], ...] = \
      tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy()
  return torch.from_numpy(tmp.reshape(shape)).to(x.device)

def flip_lr_off(x, flip_idx):
  tmp = x.detach().cpu().numpy()[..., ::-1].copy()
  shape = tmp.shape
  tmp = tmp.reshape(tmp.shape[0], 17, 2, 
                    tmp.shape[2], tmp.shape[3])
  tmp[:, :, 0, :, :] *= -1
  for e in flip_idx:
    tmp[:, e[0], ...], tmp[:, e[1], ...] = \
      tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy()
  return torch.from_numpy(tmp.reshape(shape)).to(x.device)

================================================
FILE: Network/rigidmask/networks/DCNv2/.gitignore
================================================
.vscode
.idea
*.so
*.o
*pyc
_ext
build
DCNv2.egg-info
dist

================================================
FILE: Network/rigidmask/networks/DCNv2/DCN/__init__.py
================================================
from .dcn_v2 import *


================================================
FILE: Network/rigidmask/networks/DCNv2/DCN/dcn_v2.py
================================================
#!/usr/bin/env python
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division

import math
import torch
from torch import nn
from torch.autograd import Function
from torch.nn.modules.utils import _pair
from torch.autograd.function import once_differentiable

import _ext as _backend


class _DCNv2(Function):
    @staticmethod
    def forward(ctx, input, offset, mask, weight, bias,
                stride, padding, dilation, deformable_groups):
        ctx.stride = _pair(stride)
        ctx.padding = _pair(padding)
        ctx.dilation = _pair(dilation)
        ctx.kernel_size = _pair(weight.shape[2:4])
        ctx.deformable_groups = deformable_groups
        output = _backend.dcn_v2_forward(input, weight, bias,
                                         offset, mask,
                                         ctx.kernel_size[0], ctx.kernel_size[1],
                                         ctx.stride[0], ctx.stride[1],
                                         ctx.padding[0], ctx.padding[1],
                                         ctx.dilation[0], ctx.dilation[1],
                                         ctx.deformable_groups)
        ctx.save_for_backward(input, offset, mask, weight, bias)
        return output

    @staticmethod
    @once_differentiable
    def backward(ctx, grad_output):
        input, offset, mask, weight, bias = ctx.saved_tensors
        grad_input, grad_offset, grad_mask, grad_weight, grad_bias = \
            _backend.dcn_v2_backward(input, weight,
                                     bias,
                                     offset, mask,
                                     grad_output,
                                     ctx.kernel_size[0], ctx.kernel_size[1],
                                     ctx.stride[0], ctx.stride[1],
                                     ctx.padding[0], ctx.padding[1],
                                     ctx.dilation[0], ctx.dilation[1],
                                     ctx.deformable_groups)

        return grad_input, grad_offset, grad_mask, grad_weight, grad_bias,\
            None, None, None, None,


dcn_v2_conv = _DCNv2.apply


class DCNv2(nn.Module):

    def __init__(self, in_channels, out_channels,
                 kernel_size, stride, padding, dilation=1, deformable_groups=1):
        super(DCNv2, self).__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = _pair(kernel_size)
        self.stride = _pair(stride)
        self.padding = _pair(padding)
        self.dilation = _pair(dilation)
        self.deformable_groups = deformable_groups

        self.weight = nn.Parameter(torch.Tensor(
            out_channels, in_channels, *self.kernel_size))
        self.bias = nn.Parameter(torch.Tensor(out_channels))
        self.reset_parameters()

    def reset_parameters(self):
        n = self.in_channels
        for k in self.kernel_size:
            n *= k
        stdv = 1. / math.sqrt(n)
        self.weight.data.uniform_(-stdv, stdv)
        self.bias.data.zero_()

    def forward(self, input, offset, mask):
        assert 2 * self.deformable_groups * self.kernel_size[0] * self.kernel_size[1] == \
            offset.shape[1]
        assert self.deformable_groups * self.kernel_size[0] * self.kernel_size[1] == \
            mask.shape[1]
        return dcn_v2_conv(input, offset, mask,
                           self.weight,
                           self.bias,
                           self.stride,
                           self.padding,
                           self.dilation,
                           self.deformable_groups)


class DCN(DCNv2):

    def __init__(self, in_channels, out_channels,
                 kernel_size, stride, padding,
                 dilation=1, deformable_groups=1):
        super(DCN, self).__init__(in_channels, out_channels,
                                  kernel_size, stride, padding, dilation, deformable_groups)

        channels_ = self.deformable_groups * 3 * self.kernel_size[0] * self.kernel_size[1]
        self.conv_offset_mask = nn.Conv2d(self.in_channels,
                                          channels_,
                                          kernel_size=self.kernel_size,
                                          stride=self.stride,
                                          padding=self.padding,
                                          bias=True)
        self.init_offset()

    def init_offset(self):
        self.conv_offset_mask.weight.data.zero_()
        self.conv_offset_mask.bias.data.zero_()

    def forward(self, input):
        out = self.conv_offset_mask(input)
        o1, o2, mask = torch.chunk(out, 3, dim=1)
        offset = torch.cat((o1, o2), dim=1)
        mask = torch.sigmoid(mask)
        return dcn_v2_conv(input, offset, mask,
                           self.weight, self.bias,
                           self.stride,
                           self.padding,
                           self.dilation,
                           self.deformable_groups)



class _DCNv2Pooling(Function):
    @staticmethod
    def forward(ctx, input, rois, offset,
                spatial_scale,
                pooled_size,
                output_dim,
                no_trans,
                group_size=1,
                part_size=None,
                sample_per_part=4,
                trans_std=.0):
        ctx.spatial_scale = spatial_scale
        ctx.no_trans = int(no_trans)
        ctx.output_dim = output_dim
        ctx.group_size = group_size
        ctx.pooled_size = pooled_size
        ctx.part_size = pooled_size if part_size is None else part_size
        ctx.sample_per_part = sample_per_part
        ctx.trans_std = trans_std

        output, output_count = \
            _backend.dcn_v2_psroi_pooling_forward(input, rois, offset,
                                                  ctx.no_trans, ctx.spatial_scale,
                                                  ctx.output_dim, ctx.group_size,
                                                  ctx.pooled_size, ctx.part_size,
                                                  ctx.sample_per_part, ctx.trans_std)
        ctx.save_for_backward(input, rois, offset, output_count)
        return output

    @staticmethod
    @once_differentiable
    def backward(ctx, grad_output):
        input, rois, offset, output_count = ctx.saved_tensors
        grad_input, grad_offset = \
            _backend.dcn_v2_psroi_pooling_backward(grad_output,
                                                   input,
                                                   rois,
                                                   offset,
                                                   output_count,
                                                   ctx.no_trans,
                                                   ctx.spatial_scale,
                                                   ctx.output_dim,
                                                   ctx.group_size,
                                                   ctx.pooled_size,
                                                   ctx.part_size,
                                                   ctx.sample_per_part,
                                                   ctx.trans_std)

        return grad_input, None, grad_offset, \
            None, None, None, None, None, None, None, None


dcn_v2_pooling = _DCNv2Pooling.apply


class DCNv2Pooling(nn.Module):

    def __init__(self,
                 spatial_scale,
                 pooled_size,
                 output_dim,
                 no_trans,
                 group_size=1,
                 part_size=None,
                 sample_per_part=4,
                 trans_std=.0):
        super(DCNv2Pooling, self).__init__()
        self.spatial_scale = spatial_scale
        self.pooled_size = pooled_size
        self.output_dim = output_dim
        self.no_trans = no_trans
        self.group_size = group_size
        self.part_size = pooled_size if part_size is None else part_size
        self.sample_per_part = sample_per_part
        self.trans_std = trans_std

    def forward(self, input, rois, offset):
        assert input.shape[1] == self.output_dim
        if self.no_trans:
            offset = input.new()
        return dcn_v2_pooling(input, rois, offset,
                              self.spatial_scale,
                              self.pooled_size,
                              self.output_dim,
                              self.no_trans,
                              self.group_size,
                              self.part_size,
                              self.sample_per_part,
                              self.trans_std)


class DCNPooling(DCNv2Pooling):

    def __init__(self,
                 spatial_scale,
                 pooled_size,
                 output_dim,
                 no_trans,
                 group_size=1,
                 part_size=None,
                 sample_per_part=4,
                 trans_std=.0,
                 deform_fc_dim=1024):
        super(DCNPooling, self).__init__(spatial_scale,
                                         pooled_size,
                                         output_dim,
                                         no_trans,
                                         group_size,
                                         part_size,
                                         sample_per_part,
                                         trans_std)

        self.deform_fc_dim = deform_fc_dim

        if not no_trans:
            self.offset_mask_fc = nn.Sequential(
                nn.Linear(self.pooled_size * self.pooled_size *
                          self.output_dim, self.deform_fc_dim),
                nn.ReLU(inplace=True),
                nn.Linear(self.deform_fc_dim, self.deform_fc_dim),
                nn.ReLU(inplace=True),
                nn.Linear(self.deform_fc_dim, self.pooled_size *
                          self.pooled_size * 3)
            )
            self.offset_mask_fc[4].weight.data.zero_()
            self.offset_mask_fc[4].bias.data.zero_()

    def forward(self, input, rois):
        offset = input.new()

        if not self.no_trans:

            # do roi_align first
            n = rois.shape[0]
            roi = dcn_v2_pooling(input, rois, offset,
                                 self.spatial_scale,
                                 self.pooled_size,
                                 self.output_dim,
                                 True,  # no trans
                                 self.group_size,
                                 self.part_size,
                                 self.sample_per_part,
                                 self.trans_std)

            # build mask and offset
            offset_mask = self.offset_mask_fc(roi.view(n, -1))
            offset_mask = offset_mask.view(
                n, 3, self.pooled_size, self.pooled_size)
            o1, o2, mask = torch.chunk(offset_mask, 3, dim=1)
            offset = torch.cat((o1, o2), dim=1)
            mask = torch.sigmoid(mask)

            # do pooling with offset and mask
            return dcn_v2_pooling(input, rois, offset,
                                  self.spatial_scale,
                                  self.pooled_size,
                                  self.output_dim,
                                  self.no_trans,
                                  self.group_size,
                                  self.part_size,
                                  self.sample_per_part,
                                  self.trans_std) * mask
        # only roi_align
        return dcn_v2_pooling(input, rois, offset,
                              self.spatial_scale,
                              self.pooled_size,
                              self.output_dim,
                              self.no_trans,
                              self.group_size,
                              self.part_size,
                              self.sample_per_part,
                              self.trans_std)


================================================
FILE: Network/rigidmask/networks/DCNv2/DCN/src/cpu/dcn_v2_cpu.cpp
================================================
#include <vector>
#include "cpu/dcn_v2_im2col_cpu.h"
#include <iostream>

#include <ATen/ATen.h>
//#include <ATen/cuda/CUDAContext.h>

#include <TH/TH.h>
//#include <THC/THCAtomics.cuh>
//#include <THC/THCDeviceUtils.cuh>

//extern THCState *state;

// author: Charles Shang
// https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu

// modified from the CUDA version for CPU use by Daniel K. Suhendro

// edit by: James Bockman and Matthew Howe
// modified for torch implementation to remove use of deprecated torch access to Blas

at::Tensor
dcn_v2_cpu_forward(const at::Tensor &input,
                    const at::Tensor &weight,
                    const at::Tensor &bias,
                    const at::Tensor &offset,
                    const at::Tensor &mask,
                    const int kernel_h,
                    const int kernel_w,
                    const int stride_h,
                    const int stride_w,
                    const int pad_h,
                    const int pad_w,
                    const int dilation_h,
                    const int dilation_w,
                    const int deformable_group)
{
    // THCAssertSameGPU(THCudaTensor_checkGPU(state, 5, input, weight, bias, offset, mask));
    /*AT_ASSERTM(input.type().is_cuda(), "input must be a CUDA tensor");
    AT_ASSERTM(weight.type().is_cuda(), "weight must be a CUDA tensor");
    AT_ASSERTM(bias.type().is_cuda(), "bias must be a CUDA tensor");
    AT_ASSERTM(offset.type().is_cuda(), "offset must be a CUDA tensor");
    AT_ASSERTM(mask.type().is_cuda(), "mask must be a CUDA tensor");*/

    const int batch = input.size(0);
    const int channels = input.size(1);
    const int height = input.size(2);
    const int width = input.size(3);

    const int channels_out = weight.size(0);
    const int channels_kernel = weight.size(1);
    const int kernel_h_ = weight.size(2);
    const int kernel_w_ = weight.size(3);

    // printf("Kernels: %d %d %d %d\n", kernel_h_, kernel_w_, kernel_w, kernel_h);
    // printf("Channels: %d %d\n", channels, channels_kernel);
    // printf("Channels: %d %d\n", channels_out, channels_kernel);

    AT_ASSERTM(kernel_h_ == kernel_h && kernel_w_ == kernel_w,
               "Input shape and kernel shape wont match: (%d x %d vs %d x %d).", kernel_h_, kernel_w, kernel_h_, kernel_w_);

    AT_ASSERTM(channels == channels_kernel,
               "Input shape and kernel channels wont match: (%d vs %d).", channels, channels_kernel);

    const int height_out = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
    const int width_out = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;

    // auto ones = at::ones({height_out, width_out}, input.options());
    auto ones = at::ones({bias.sizes()[0], height_out, width_out}, input.options());
    auto columns = at::empty({channels * kernel_h * kernel_w, 1 * height_out * width_out}, input.options());
    auto output = at::zeros({batch, channels_out, height_out, width_out}, input.options());

    using scalar_t = float;
    for (int b = 0; b < batch; b++)
    {
        auto input_n = input.select(0, b);
        auto offset_n = offset.select(0, b);
        auto mask_n = mask.select(0, b);
        auto output_n = output.select(0, b);
        // std::cout << "output_n: " << output_n << "output.select(0,b): " << output.select(0,b) << "\n"; 

        // Do Bias first:
        // M,N,K are dims of matrix A and B
        // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
        // (N x 1) (1 x M)

        // torch implementation
        auto ones_T = at::transpose(ones.contiguous(), 2, 0);
        ones_T = at::mul(ones_T, bias.contiguous());
        ones_T = at::transpose(ones_T, 2, 0);
        output_n = at::add(output_n, ones_T);

        modulated_deformable_im2col_cpu(input_n.data_ptr<scalar_t>(),
                                         offset_n.data_ptr<scalar_t>(),
                                         mask_n.data_ptr<scalar_t>(),
                                         1, channels, height, width,
                                         height_out, width_out, kernel_h, kernel_w,
                                         pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w,
                                         deformable_group,
                                         columns.data_ptr<scalar_t>());

        //(k * m)  x  (m * n)
        // Y = WC

        // torch implementation
        auto weight_flat = weight.view({channels_out, channels * kernel_h * kernel_w});
        auto product = at::matmul(weight_flat, columns);
        output.select(0, b) = at::add(output_n, product.view({channels_out, height_out, width_out}));
    }
    return output;
}

std::vector<at::Tensor> dcn_v2_cpu_backward(const at::Tensor &input,
                                             const at::Tensor &weight,
                                             const at::Tensor &bias,
                                             const at::Tensor &offset,
                                             const at::Tensor &mask,
                                             const at::Tensor &grad_output,
                                             int kernel_h, int kernel_w,
                                             int stride_h, int stride_w,
                                             int pad_h, int pad_w,
                                             int dilation_h, int dilation_w,
                                             int deformable_group)
{

    THArgCheck(input.is_contiguous(), 1, "input tensor has to be contiguous");
    THArgCheck(weight.is_contiguous(), 2, "weight tensor has to be contiguous");

    /*AT_ASSERTM(input.type().is_cuda(), "input must be a CUDA tensor");
    AT_ASSERTM(weight.type().is_cuda(), "weight must be a CUDA tensor");
    AT_ASSERTM(bias.type().is_cuda(), "bias must be a CUDA tensor");
    AT_ASSERTM(offset.type().is_cuda(), "offset must be a CUDA tensor");
    AT_ASSERTM(mask.type().is_cuda(), "mask must be a CUDA tensor");*/

    const int batch = input.size(0);
    const int channels = input.size(1);
    const int height = input.size(2);
    const int width = input.size(3);

    const int channels_out = weight.size(0);
    const int channels_kernel = weight.size(1);
    const int kernel_h_ = weight.size(2);
    const int kernel_w_ = weight.size(3);

    AT_ASSERTM(kernel_h_ == kernel_h && kernel_w_ == kernel_w,
               "Input shape and kernel shape wont match: (%d x %d vs %d x %d).", kernel_h_, kernel_w, kernel_h_, kernel_w_);

    AT_ASSERTM(channels == channels_kernel,
               "Input shape and kernel channels wont match: (%d vs %d).", channels, channels_kernel);

    const int height_out = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
    const int width_out = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;

    auto ones = at::ones({height_out, width_out}, input.options());
    auto columns = at::zeros({channels * kernel_h * kernel_w, 1 * height_out * width_out}, input.options());
    auto output = at::empty({batch, channels_out, height_out, width_out}, input.options());

    auto grad_input = at::zeros_like(input);
    auto grad_weight = at::zeros_like(weight);
    auto grad_bias = at::zeros_like(bias);
    auto grad_offset = at::zeros_like(offset);
    auto grad_mask = at::zeros_like(mask);

    using scalar_t = float;

    for (int b = 0; b < batch; b++)
    {
        auto input_n = input.select(0, b);
        auto offset_n = offset.select(0, b);
        auto mask_n = mask.select(0, b);
        auto grad_output_n = grad_output.select(0, b);
        auto grad_input_n = grad_input.select(0, b);
        auto grad_offset_n = grad_offset.select(0, b);
        auto grad_mask_n = grad_mask.select(0, b);



        // Torch implementation
        auto weight_flat = weight.view({channels_out, channels*kernel_h*kernel_w});
        weight_flat = at::transpose(weight_flat, 1, 0);
        auto grad_output_n_flat = grad_output_n.view({channels_out, height_out*width_out});
        columns = at::matmul(weight_flat, grad_output_n_flat);

        // gradient w.r.t. input coordinate data
        modulated_deformable_col2im_coord_cpu(columns.data_ptr<scalar_t>(),
                                               input_n.data_ptr<scalar_t>(),
                                               offset_n.data_ptr<scalar_t>(),
                                               mask_n.data_ptr<scalar_t>(),
                                               1, channels, height, width,
                                               height_out, width_out, kernel_h, kernel_w,
                                               pad_h, pad_w, stride_h, stride_w,
                                               dilation_h, dilation_w, deformable_group,
                                               grad_offset_n.data_ptr<scalar_t>(),
                                               grad_mask_n.data_ptr<scalar_t>());
        // gradient w.r.t. input data
        modulated_deformable_col2im_cpu(columns.data_ptr<scalar_t>(),
                                         offset_n.data_ptr<scalar_t>(),
                                         mask_n.data_ptr<scalar_t>(),
                                         1, channels, height, width,
                                         height_out, width_out, kernel_h, kernel_w,
                                         pad_h, pad_w, stride_h, stride_w,
                                         dilation_h, dilation_w, deformable_group,
                                         grad_input_n.data_ptr<scalar_t>());

        // gradient w.r.t. weight, dWeight should accumulate across the batch and group
        modulated_deformable_im2col_cpu(input_n.data_ptr<scalar_t>(),
                                         offset_n.data_ptr<scalar_t>(),
                                         mask_n.data_ptr<scalar_t>(),
                                         1, channels, height, width,
                                         height_out, width_out, kernel_h, kernel_w,
                                         pad_h, pad_w, stride_h, stride_w,
                                         dilation_h, dilation_w, deformable_group,
                                         columns.data_ptr<scalar_t>());

        // Torch implementation
        auto product = at::matmul(grad_output_n_flat, at::transpose(columns, 1, 0));
        grad_weight = at::add(grad_weight, product.view({channels_out, channels, kernel_h, kernel_w}));


        // Torch implementation
        auto ones_flat = ones.view({height_out*width_out});
        product = at::matmul(grad_output_n_flat, ones_flat);
        grad_bias = at::add(grad_bias, product);
    }

    return {
        grad_input, grad_offset, grad_mask, grad_weight, grad_bias
    };
}


================================================
FILE: Network/rigidmask/networks/DCNv2/DCN/src/cpu/dcn_v2_im2col_cpu.cpp
================================================
#include "dcn_v2_im2col_cpu.h"
#include <cstdio>
#include <algorithm>
#include <cstring>

#include <ATen/ATen.h>
//#include <ATen/cuda/CUDAContext.h>

#include <TH/TH.h>
//#include <THC/THCAtomics.cuh>
//#include <THC/THCDeviceUtils.cuh>

// modified from the CUDA version for CPU use by Daniel K. Suhendro

/*#define CUDA_KERNEL_LOOP(i, n)                          \
  for (int i = blockIdx.x * blockDim.x + threadIdx.x;   \
      i < (n);                                          \
      i += blockDim.x * gridDim.x)

const int CUDA_NUM_THREADS = 1024;
inline int GET_BLOCKS(const int N)
{
  return (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS;
}*/


float dmcn_im2col_bilinear_cpu(const float *bottom_data, const int data_width,
                           const int height, const int width, float h, float w)
{
  int h_low = floor(h);
  int w_low = floor(w);
  int h_high = h_low + 1;
  int w_high = w_low + 1;

  float lh = h - h_low;
  float lw = w - w_low;
  float hh = 1 - lh, hw = 1 - lw;

  float v1 = 0;
  if (h_low >= 0 && w_low >= 0)
    v1 = bottom_data[h_low * data_width + w_low];
  float v2 = 0;
  if (h_low >= 0 && w_high <= width - 1)
    v2 = bottom_data[h_low * data_width + w_high];
  float v3 = 0;
  if (h_high <= height - 1 && w_low >= 0)
    v3 = bottom_data[h_high * data_width + w_low];
  float v4 = 0;
  if (h_high <= height - 1 && w_high <= width - 1)
    v4 = bottom_data[h_high * data_width + w_high];

  float w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;

  float val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
  return val;
}

float dmcn_get_gradient_weight_cpu(float argmax_h, float argmax_w,
                               const int h, const int w, const int height, const int width)
{
  if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width)
  {
    //empty
    return 0;
  }

  int argmax_h_low = floor(argmax_h);
  int argmax_w_low = floor(argmax_w);
  int argmax_h_high = argmax_h_low + 1;
  int argmax_w_high = argmax_w_low + 1;

  float weight = 0;
  if (h == argmax_h_low && w == argmax_w_low)
    weight = (h + 1 - argmax_h) * (w + 1 - argmax_w);
  if (h == argmax_h_low && w == argmax_w_high)
    weight = (h + 1 - argmax_h) * (argmax_w + 1 - w);
  if (h == argmax_h_high && w == argmax_w_low)
    weight = (argmax_h + 1 - h) * (w + 1 - argmax_w);
  if (h == argmax_h_high && w == argmax_w_high)
    weight = (argmax_h + 1 - h) * (argmax_w + 1 - w);
  return weight;
}

float dmcn_get_coordinate_weight_cpu(float argmax_h, float argmax_w,
                                 const int height, const int width, const float *im_data,
                                 const int data_width, const int bp_dir)
{
  if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width)
  {
    //empty
    return 0;
  }

  int argmax_h_low = floor(argmax_h);
  int argmax_w_low = floor(argmax_w);
  int argmax_h_high = argmax_h_low + 1;
  int argmax_w_high = argmax_w_low + 1;

  float weight = 0;

  if (bp_dir == 0)
  {
    if (argmax_h_low >= 0 && argmax_w_low >= 0)
      weight += -1 * (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_low * data_width + argmax_w_low];
    if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
      weight += -1 * (argmax_w - argmax_w_low) * im_data[argmax_h_low * data_width + argmax_w_high];
    if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
      weight += (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_high * data_width + argmax_w_low];
    if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
      weight += (argmax_w - argmax_w_low) * im_data[argmax_h_high * data_width + argmax_w_high];
  }
  else if (bp_dir == 1)
  {
    if (argmax_h_low >= 0 && argmax_w_low >= 0)
      weight += -1 * (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_low];
    if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
      weight += (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_high];
    if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
      weight += -1 * (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_low];
    if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
      weight += (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_high];
  }

  return weight;
}

void modulated_deformable_im2col_cpu_kernel(const int n, const float *data_im, const float *data_offset, const float *data_mask,
                                                       const int height, const int width, const int kernel_h, const int kernel_w,
                                                       const int pad_h, const int pad_w,
                                                       const int stride_h, const int stride_w,
                                                       const int dilation_h, const int dilation_w,
                                                       const int channel_per_deformable_group,
                                                       const int batch_size, const int num_channels, const int deformable_group,
                                                       const int height_col, const int width_col,
                                                       float *data_col)
{
  // launch channels * batch_size * height_col * width_col cores
  for(int index=0; index<n; index++)
  {
    // NOTE(CharlesShang): different from Dai Jifeng's MXNet implementation, col_buffer is of shape (c*kw*kh, N, oh, ow)
    // here columns is of shape (N, c*kw*kh, oh * ow), need to adapt axis

    // index index of output matrix
    const int w_col = index % width_col;
    const int h_col = (index / width_col) % height_col;
    // const int b_col = (index / width_col / height_col) % batch_size;
    const int b_col = (index / width_col / height_col / num_channels) % batch_size;
    // const int c_im = (index / width_col / height_col) / batch_size;
    const int c_im = (index / width_col / height_col) % num_channels;
    // const int c_col = c_im * kernel_h * kernel_w;
    const int c_col = c_im * kernel_h * kernel_w;

    // compute deformable group index
    const int deformable_group_index = c_im / channel_per_deformable_group;

    const int h_in = h_col * stride_h - pad_h;
    const int w_in = w_col * stride_w - pad_w;

    //  float *data_col_ptr = data_col + ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col;
    float *data_col_ptr = data_col + ((b_col * num_channels * kernel_w * kernel_h + c_col) * height_col + h_col) * width_col + w_col;
    //const float* data_im_ptr = data_im + ((b_col * num_channels + c_im) * height + h_in) * width + w_in;
    const float *data_im_ptr = data_im + (b_col * num_channels + c_im) * height * width;
    const float *data_offset_ptr = data_offset + (b_col * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col;

    const float *data_mask_ptr = data_mask + (b_col * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col;

    for (int i = 0; i < kernel_h; ++i)
    {
      for (int j = 0; j < kernel_w; ++j)
      {
        const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col;
        const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col + w_col;
        const int data_mask_hw_ptr = ((i * kernel_w + j) * height_col + h_col) * width_col + w_col;
        const float offset_h = data_offset_ptr[data_offset_h_ptr];
        const float offset_w = data_offset_ptr[data_offset_w_ptr];
        const float mask = data_mask_ptr[data_mask_hw_ptr];
        float val = static_cast<float>(0);
        const float h_im = h_in + i * dilation_h + offset_h;
        const float w_im = w_in + j * dilation_w + offset_w;
        //if (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) {
        if (h_im > -1 && w_im > -1 && h_im < height && w_im < width)
        {
          //const float map_h = i * dilation_h + offset_h;
          //const float map_w = j * dilation_w + offset_w;
          //const int cur_height = height - h_in;
          //const int cur_width = width - w_in;
          //val = dmcn_im2col_bilinear_cpu(data_im_ptr, width, cur_height, cur_width, map_h, map_w);
          val = dmcn_im2col_bilinear_cpu(data_im_ptr, width, height, width, h_im, w_im);
        }
        *data_col_ptr = val * mask;
        // data_col_ptr += batch_size * height_col * width_col;
        data_col_ptr += height_col * width_col;
      }
    }
  }
}

void modulated_deformable_col2im_cpu_kernel(const int n, const float *data_col, const float *data_offset, const float *data_mask,
                                                       const int channels, const int height, const int width,
                                                       const int kernel_h, const int kernel_w,
                                                       const int pad_h, const int pad_w,
                                                       const int stride_h, const int stride_w,
                                                       const int dilation_h, const int dilation_w,
                                                       const int channel_per_deformable_group,
                                                       const int batch_size, const int deformable_group,
                                                       const int height_col, const int width_col,
                                                       float *grad_im)
{
  for(int index = 0; index < n; index++)
  {
    const int j = (index / width_col / height_col / batch_size) % kernel_w;
    const int i = (index / width_col / height_col / batch_size / kernel_w) % kernel_h;
    const int c = index / width_col / height_col / batch_size / kernel_w / kernel_h;
    // compute the start and end of the output

    const int deformable_group_index = c / channel_per_deformable_group;

    int w_out = index % width_col;
    int h_out = (index / width_col) % height_col;
    int b = (index / width_col / height_col) % batch_size;
    int w_in = w_out * stride_w - pad_w;
    int h_in = h_out * stride_h - pad_h;

    const float *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col;
    const float *data_mask_ptr = data_mask + (b * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col;
    const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out;
    const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out;
    const int data_mask_hw_ptr = ((i * kernel_w + j) * height_col + h_out) * width_col + w_out;
    const float offset_h = data_offset_ptr[data_offset_h_ptr];
    const float offset_w = data_offset_ptr[data_offset_w_ptr];
    const float mask = data_mask_ptr[data_mask_hw_ptr];
    const float cur_inv_h_data = h_in + i * dilation_h + offset_h;
    const float cur_inv_w_data = w_in + j * dilation_w + offset_w;

    const float cur_top_grad = data_col[index] * mask;
    const int cur_h = (int)cur_inv_h_data;
    const int cur_w = (int)cur_inv_w_data;
    
    for (int dy = -2; dy <= 2; dy++)
    {
      for (int dx = -2; dx <= 2; dx++)
      {
        if (cur_h + dy >= 0 && cur_h + dy < height &&
            cur_w + dx >= 0 && cur_w + dx < width &&
            abs(cur_inv_h_data - (cur_h + dy)) < 1 &&
            abs(cur_inv_w_data - (cur_w + dx)) < 1)
        {
          int cur_bottom_grad_pos = ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx;
          float weight = dmcn_get_gradient_weight_cpu(cur_inv_h_data, cur_inv_w_data, cur_h + dy, cur_w + dx, height, width);
          //atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad);
          *(grad_im + cur_bottom_grad_pos) += weight * cur_top_grad;

        }
      }
    }
  }
}

void modulated_deformable_col2im_coord_cpu_kernel(const int n, const float *data_col, const float *data_im,
                                                             const float *data_offset, const float *data_mask,
                                                             const int channels, const int height, const int width,
                                                             const int kernel_h, const int kernel_w,
                                                             const int pad_h, const int pad_w,
                                                             const int stride_h, const int stride_w,
                                                             const int dilation_h, const int dilation_w,
                                                             const int channel_per_deformable_group,
  
Download .txt
gitextract_hty7qque/

├── .gitignore
├── Datasets/
│   ├── __init__.py
│   ├── cowmask.py
│   ├── flowlib.py
│   ├── segmask_gt.py
│   ├── tartanTrajFlowDataset.py
│   ├── util_flow.py
│   └── utils.py
├── DytanVO.py
├── LICENSE
├── Network/
│   ├── PWC/
│   │   ├── PWCNet.py
│   │   ├── __init__.py
│   │   └── correlation.py
│   ├── VOFlowNet.py
│   ├── VONet.py
│   ├── __init__.py
│   └── rigidmask/
│       ├── .gitignore
│       ├── VCNplus.py
│       ├── __init__.py
│       ├── conv4d.py
│       ├── det.py
│       ├── det_losses.py
│       ├── det_utils.py
│       ├── networks/
│       │   ├── DCNv2/
│       │   │   ├── .gitignore
│       │   │   ├── DCN/
│       │   │   │   ├── __init__.py
│       │   │   │   ├── dcn_v2.py
│       │   │   │   ├── src/
│       │   │   │   │   ├── cpu/
│       │   │   │   │   │   ├── dcn_v2_cpu.cpp
│       │   │   │   │   │   ├── dcn_v2_im2col_cpu.cpp
│       │   │   │   │   │   ├── dcn_v2_im2col_cpu.h
│       │   │   │   │   │   ├── dcn_v2_psroi_pooling_cpu.cpp
│       │   │   │   │   │   └── vision.h
│       │   │   │   │   ├── cuda/
│       │   │   │   │   │   ├── dcn_v2_cuda.cu
│       │   │   │   │   │   ├── dcn_v2_im2col_cuda.cu
│       │   │   │   │   │   ├── dcn_v2_im2col_cuda.h
│       │   │   │   │   │   ├── dcn_v2_psroi_pooling_cuda.cu
│       │   │   │   │   │   └── vision.h
│       │   │   │   │   ├── dcn_v2.h
│       │   │   │   │   └── vision.cpp
│       │   │   │   ├── testcpu.py
│       │   │   │   └── testcuda.py
│       │   │   ├── LICENSE
│       │   │   ├── README.md
│       │   │   ├── make.sh
│       │   │   └── setup.py
│       │   ├── dlav0.py
│       │   ├── large_hourglass.py
│       │   ├── msra_resnet.py
│       │   ├── pose_dla_dcn.py
│       │   └── resnet_dcn.py
│       └── submodule.py
├── README.md
├── environment.yml
├── evaluator/
│   ├── __init__.py
│   ├── evaluate_ate_scale.py
│   ├── evaluate_kitti.py
│   ├── evaluate_rpe.py
│   ├── evaluator_base.py
│   ├── tartanair_evaluator.py
│   ├── trajectory_transform.py
│   └── transformation.py
└── vo_trajectory_from_folder.py
Download .txt
SYMBOL INDEX (498 symbols across 38 files)

FILE: Datasets/cowmask.py
  function gaussian_kernels (line 34) | def gaussian_kernels(sigma, max_sigma):
  function cow_masks (line 49) | def cow_masks(mask_size, log_sigma_range, max_sigma, prop_range):

FILE: Datasets/flowlib.py
  function show_flow (line 30) | def show_flow(filename):
  function point_vec (line 42) | def point_vec(img,flow,skip=10):
  function visualize_flow (line 63) | def visualize_flow(flow, mode='Y'):
  function read_flow (line 101) | def read_flow(filename):
  function write_flo (line 122) | def write_flo(flow, filename):
  function write_flow (line 147) | def write_flow(flow, filename):
  function save_flow_image (line 166) | def save_flow_image(flow, image_file):
  function flowfile_to_imagefile (line 178) | def flowfile_to_imagefile(flow_file, image_file):
  function segment_flow (line 189) | def segment_flow(flow):
  function flow_error (line 226) | def flow_error(tu, tv, u, v):
  function flow_to_image (line 281) | def flow_to_image(flow):
  function evaluate_flow_file (line 319) | def evaluate_flow_file(gt_file, pred_file):
  function evaluate_flow (line 334) | def evaluate_flow(gt_flow, pred_flow):
  function read_disp_png (line 350) | def read_disp_png(file_name):
  function disp_to_flowfile (line 368) | def disp_to_flowfile(disp, filename):
  function read_image (line 396) | def read_image(filename):
  function warp_image (line 407) | def warp_image(im, flow):
  function pfm_to_flo (line 452) | def pfm_to_flo(pfm_file):
  function scale_image (line 459) | def scale_image(image, new_range):
  function compute_color (line 474) | def compute_color(u, v):
  function make_color_wheel (line 518) | def make_color_wheel():
  function read_flo_file (line 568) | def read_flo_file(filename):
  function read_png_file (line 593) | def read_png_file(flow_file):
  function read_pfm_file (line 618) | def read_pfm_file(flow_file):
  function resample (line 629) | def resample(img, sz):

FILE: Datasets/segmask_gt.py
  function dataloader (line 22) | def dataloader(filepath, fpass='frames_cleanpass', level=6):
  function default_loader (line 79) | def default_loader(path):
  function flow_loader (line 82) | def flow_loader(path):
  function load_exts (line 90) | def load_exts(cam_file):
  function disparity_loader (line 103) | def disparity_loader(path):
  function triangulation (line 112) | def triangulation(disp, xcoord, ycoord, bl=1, fl = 450, cx = 479.5, cy =...
  function exp_loader (line 121) | def exp_loader(index, iml0s, iml1s, flowl0s, disp0s=None, dispcs=None, c...
  function motionmask (line 214) | def motionmask(flowl0, depthflow, RT01):

FILE: Datasets/tartanTrajFlowDataset.py
  class TrajFolderDataset (line 16) | class TrajFolderDataset(Dataset):
    method __init__ (line 19) | def __init__(self, imgfolder, transform = None,
    method __len__ (line 38) | def __len__(self):
    method __getitem__ (line 41) | def __getitem__(self, idx):

FILE: Datasets/util_flow.py
  function readPFM (line 26) | def readPFM(file):
  function save_pfm (line 65) | def save_pfm(file, image, scale = 1):
  function ReadMiddleburyFloFile (line 92) | def ReadMiddleburyFloFile(path):
  function ReadKittiPngFile (line 122) | def ReadKittiPngFile(path):
  function WriteMiddleburyFloFile (line 159) | def WriteMiddleburyFloFile(path, width, height, u, v, mask=None):
  function write_flow (line 181) | def write_flow(path,flow):
  function WriteKittiPngFile (line 197) | def WriteKittiPngFile(path, width, height, u, v, mask=None):
  function ConvertMiddleburyFloToKittiPng (line 219) | def ConvertMiddleburyFloToKittiPng(src_path, dest_path):
  function ConvertKittiPngToMiddleburyFlo (line 223) | def ConvertKittiPngToMiddleburyFlo(src_path, dest_path):
  function ParseFilenameKitti (line 228) | def ParseFilenameKitti(filename):
  function read_calib_file (line 239) | def read_calib_file(filepath):
  function load_calib_cam_to_cam (line 255) | def load_calib_cam_to_cam(cam_to_cam_file):

FILE: Datasets/utils.py
  class Compose (line 29) | class Compose(object):
    method __init__ (line 42) | def __init__(self, transforms):
    method __call__ (line 45) | def __call__(self, img):
  class DownscaleFlow (line 51) | class DownscaleFlow(object):
    method __init__ (line 56) | def __init__(self, scale=4):
    method __call__ (line 62) | def __call__(self, sample):
  class CropCenter (line 76) | class CropCenter(object):
    method __init__ (line 81) | def __init__(self, size):
    method __call__ (line 87) | def __call__(self, sample):
  class ResizeData (line 119) | class ResizeData(object):
    method __init__ (line 123) | def __init__(self, size):
    method __call__ (line 129) | def __call__(self, sample):
  class ToTensor (line 149) | class ToTensor(object):
    method __call__ (line 150) | def __call__(self, sample):
  function tensor2img (line 168) | def tensor2img(tensImg,mean,std):
  function bilinear_interpolate (line 180) | def bilinear_interpolate(img, h, w):
  function calculate_angle_distance_from_du_dv (line 206) | def calculate_angle_distance_from_du_dv(du, dv, flagDegree=False):
  function visflow (line 220) | def visflow(flownp, maxF=500.0, n=8, mask=None, hueMax=179, angShift=0.0):
  function dataset_intrinsics (line 251) | def dataset_intrinsics(dataset='tartanair', is_15mm=False):
  function plot_traj (line 278) | def plot_traj(gtposes, estposes, vis=False, savefigname=None, title=''):
  function make_intrinsics_layer (line 295) | def make_intrinsics_layer(w, h, fx, fy, ox, oy):
  function load_kiiti_intrinsics (line 303) | def load_kiiti_intrinsics(filename):
  function load_sceneflow_extrinsics (line 326) | def load_sceneflow_extrinsics(filename):

FILE: DytanVO.py
  class DytanVO (line 48) | class DytanVO(object):
    method __init__ (line 49) | def __init__(self, vo_model_name, seg_model_name, image_height, image_...
    method load_vo_model (line 100) | def load_vo_model(self, model, modelname):
    method load_seg_model (line 120) | def load_seg_model(self, model, modelname):
    method test_batch (line 130) | def test_batch(self, sample, intrinsics, seg_thresh, iter_num):
    method initialize_segnet_input (line 229) | def initialize_segnet_input(self, imgL_o, intrinsics):
    method transform_segnet_input (line 255) | def transform_segnet_input(self, imgL_o, imgR_o):

FILE: Network/PWC/PWCNet.py
  function conv (line 16) | def conv(in_planes, out_planes, kernel_size=3, stride=1, padding=1, dila...
  function predict_flow (line 22) | def predict_flow(in_planes):
  function deconv (line 25) | def deconv(in_planes, out_planes, kernel_size=4, stride=2, padding=1):
  class PWCDCNet (line 30) | class PWCDCNet(nn.Module):
    method __init__ (line 35) | def __init__(self, md=4, flow_norm=20.0):
    method warp (line 133) | def warp(self, x, flo):
    method multi_scale_conv (line 171) | def multi_scale_conv(self, conv0_func, conv1_func, conv2_func, conv3_f...
    method concate_two_layers (line 179) | def concate_two_layers(self, pred_func, decon_func, upfeat_func, feat_...
    method forward (line 191) | def forward(self,x):
  function pwc_dc_net (line 236) | def pwc_dc_net(path=None):

FILE: Network/PWC/correlation.py
  function cupy_kernel (line 235) | def cupy_kernel(strFunction, objVariables):
  function cupy_launch (line 274) | def cupy_launch(strFunction, strKernel):
  class _FunctionCorrelation (line 278) | class _FunctionCorrelation(torch.autograd.Function):
    method forward (line 280) | def forward(self, first, second):
    method backward (line 333) | def backward(self, gradOutput):
  function FunctionCorrelation (line 385) | def FunctionCorrelation(tenFirst, tenSecond):
  class ModuleCorrelation (line 389) | class ModuleCorrelation(torch.nn.Module):
    method __init__ (line 390) | def __init__(self):
    method forward (line 394) | def forward(self, tenFirst, tenSecond):

FILE: Network/VOFlowNet.py
  function conv (line 38) | def conv(in_planes, out_planes, kernel_size=3, stride=2, padding=1, dila...
  function linear (line 51) | def linear(in_planes, out_planes):
  class BasicBlock (line 57) | class BasicBlock(nn.Module):
    method __init__ (line 59) | def __init__(self, inplanes, planes, stride, downsample, pad, dilation):
    method forward (line 68) | def forward(self, x):
  class VOFlowRes (line 78) | class VOFlowRes(nn.Module):
    method __init__ (line 79) | def __init__(self):
    method _make_layer (line 111) | def _make_layer(self, block, planes, blocks, stride, pad, dilation):
    method forward (line 125) | def forward(self, x):

FILE: Network/VONet.py
  class VONet (line 39) | class VONet(nn.Module):
    method __init__ (line 40) | def __init__(self):
    method forward (line 46) | def forward(self, x, only_flow=False, only_pose=False):

FILE: Network/rigidmask/VCNplus.py
  class flow_reg (line 16) | class flow_reg(nn.Module):
    method __init__ (line 25) | def __init__(self, size, ent=False, maxdisp = int(4), fac=1):
    method forward (line 44) | def forward(self, x):
  class WarpModule (line 91) | class WarpModule(nn.Module):
    method __init__ (line 95) | def __init__(self, size):
    method forward (line 105) | def forward(self, x, flo):
  function get_grid (line 127) | def get_grid(B,H,W):
  class SegNet (line 135) | class SegNet(nn.Module):
    method __init__ (line 139) | def __init__(self, size, md=[4,4,4,4,4], fac=1., exp_unc=True):
    method corrf (line 365) | def corrf(self, refimg_fea, targetimg_fea,maxdisp, fac=1):
    method cost_matching (line 390) | def cost_matching(self,up_flow, c1, c2, flowh, enth, level):
    method affine (line 445) | def affine(self,pref,flow, pw=1):
    method forward_VCN (line 470) | def forward_VCN(self, im):
    method forward (line 528) | def forward(self,im,disc_aux=None,flowdc=None):

FILE: Network/rigidmask/conv4d.py
  function conv4d (line 13) | def conv4d(data,filters,bias=None,permute_filters=True,use_half=False):
  class Conv4d (line 59) | class Conv4d(_ConvNd):
    method __init__ (line 64) | def __init__(self, in_channels, out_channels, kernel_size, bias=True, ...
    method forward (line 90) | def forward(self, input):
  class fullConv4d (line 97) | class fullConv4d(torch.nn.Module):
    method __init__ (line 98) | def __init__(self, in_channels, out_channels, kernel_size, bias=True, ...
    method forward (line 105) | def forward(self, input):
  class butterfly4D (line 112) | class butterfly4D(torch.nn.Module):
    method __init__ (line 116) | def __init__(self, fdima, fdimb, withbn=True, full=True,groups=1):
    method forward (line 127) | def forward(self,x):
  class projfeat4d (line 153) | class projfeat4d(torch.nn.Module):
    method __init__ (line 157) | def __init__(self, in_planes, out_planes, stride, with_bn=True,groups=1):
    method forward (line 164) | def forward(self,x):
  class sepConv4d (line 173) | class sepConv4d(torch.nn.Module):
    method __init__ (line 177) | def __init__(self, in_planes, out_planes, stride=(1,1,1), with_bn=True...
    method forward (line 209) | def forward(self,x):
  class sepConv4dBlock (line 223) | class sepConv4dBlock(torch.nn.Module):
    method __init__ (line 228) | def __init__(self, in_planes, out_planes, stride=(1,1,1), with_bn=True...
    method forward (line 243) | def forward(self,x):

FILE: Network/rigidmask/det.py
  function create_model (line 24) | def create_model(arch, heads, head_conv,num_input):
  function load_model (line 31) | def load_model(model, model_path, optimizer=None, resume=False,
  function save_model (line 86) | def save_model(path, epoch, model, optimizer=None):

FILE: Network/rigidmask/det_losses.py
  function _slow_neg_loss (line 18) | def _slow_neg_loss(pred, gt):
  function _neg_loss (line 43) | def _neg_loss(pred, gt, heat_logits):
  function _not_faster_neg_loss (line 71) | def _not_faster_neg_loss(pred, gt):
  function _slow_reg_loss (line 88) | def _slow_reg_loss(regr, gt_regr, mask):
  function _reg_loss (line 99) | def _reg_loss(regr, gt_regr, mask):
  class FocalLoss (line 116) | class FocalLoss(nn.Module):
    method __init__ (line 118) | def __init__(self):
    method forward (line 122) | def forward(self, out, target, logits):
  class RegLoss (line 125) | class RegLoss(nn.Module):
    method __init__ (line 133) | def __init__(self):
    method forward (line 136) | def forward(self, output, mask, ind, target):
  class RegL1Loss (line 141) | class RegL1Loss(nn.Module):
    method __init__ (line 142) | def __init__(self):
    method forward (line 145) | def forward(self, output, mask, ind, target):
  class NormRegL1Loss (line 153) | class NormRegL1Loss(nn.Module):
    method __init__ (line 154) | def __init__(self):
    method forward (line 157) | def forward(self, output, mask, ind, target):
  class RegWeightedL1Loss (line 167) | class RegWeightedL1Loss(nn.Module):
    method __init__ (line 168) | def __init__(self):
    method forward (line 171) | def forward(self, output, mask, ind, target):
  class L1Loss (line 179) | class L1Loss(nn.Module):
    method __init__ (line 180) | def __init__(self):
    method forward (line 183) | def forward(self, output, mask, ind, target):
  class BinRotLoss (line 189) | class BinRotLoss(nn.Module):
    method __init__ (line 190) | def __init__(self):
    method forward (line 193) | def forward(self, output, mask, ind, rotbin, rotres):
  function compute_res_loss (line 198) | def compute_res_loss(output, target):
  function compute_bin_loss (line 202) | def compute_bin_loss(output, target, mask):
  function compute_rot_loss (line 207) | def compute_rot_loss(output, target_bin, target_res, mask):

FILE: Network/rigidmask/det_utils.py
  function _sigmoid (line 8) | def _sigmoid(x):
  function _gather_feat (line 12) | def _gather_feat(feat, ind, mask=None):
  function _transpose_and_gather_feat (line 22) | def _transpose_and_gather_feat(feat, ind):
  function flip_tensor (line 28) | def flip_tensor(x):
  function flip_lr (line 33) | def flip_lr(x, flip_idx):
  function flip_lr_off (line 41) | def flip_lr_off(x, flip_idx):

FILE: Network/rigidmask/networks/DCNv2/DCN/dcn_v2.py
  class _DCNv2 (line 16) | class _DCNv2(Function):
    method forward (line 18) | def forward(ctx, input, offset, mask, weight, bias,
    method backward (line 37) | def backward(ctx, grad_output):
  class DCNv2 (line 57) | class DCNv2(nn.Module):
    method __init__ (line 59) | def __init__(self, in_channels, out_channels,
    method reset_parameters (line 75) | def reset_parameters(self):
    method forward (line 83) | def forward(self, input, offset, mask):
  class DCN (line 97) | class DCN(DCNv2):
    method __init__ (line 99) | def __init__(self, in_channels, out_channels,
    method init_offset (line 114) | def init_offset(self):
    method forward (line 118) | def forward(self, input):
  class _DCNv2Pooling (line 132) | class _DCNv2Pooling(Function):
    method forward (line 134) | def forward(ctx, input, rois, offset,
    method backward (line 163) | def backward(ctx, grad_output):
  class DCNv2Pooling (line 187) | class DCNv2Pooling(nn.Module):
    method __init__ (line 189) | def __init__(self,
    method forward (line 208) | def forward(self, input, rois, offset):
  class DCNPooling (line 223) | class DCNPooling(DCNv2Pooling):
    method __init__ (line 225) | def __init__(self,
    method forward (line 259) | def forward(self, input, rois):

FILE: Network/rigidmask/networks/DCNv2/DCN/src/cpu/dcn_v2_cpu.cpp
  function dcn_v2_cpu_forward (line 22) | at::Tensor
  function dcn_v2_cpu_backward (line 113) | std::vector<at::Tensor> dcn_v2_cpu_backward(const at::Tensor &input,

FILE: Network/rigidmask/networks/DCNv2/DCN/src/cpu/dcn_v2_im2col_cpu.cpp
  function dmcn_im2col_bilinear_cpu (line 27) | float dmcn_im2col_bilinear_cpu(const float *bottom_data, const int data_...
  function dmcn_get_gradient_weight_cpu (line 58) | float dmcn_get_gradient_weight_cpu(float argmax_h, float argmax_w,
  function dmcn_get_coordinate_weight_cpu (line 84) | float dmcn_get_coordinate_weight_cpu(float argmax_h, float argmax_w,
  function modulated_deformable_im2col_cpu_kernel (line 127) | void modulated_deformable_im2col_cpu_kernel(const int n, const float *da...
  function modulated_deformable_col2im_cpu_kernel (line 198) | void modulated_deformable_col2im_cpu_kernel(const int n, const float *da...
  function modulated_deformable_col2im_coord_cpu_kernel (line 259) | void modulated_deformable_col2im_coord_cpu_kernel(const int n, const flo...
  function modulated_deformable_im2col_cpu (line 331) | void modulated_deformable_im2col_cpu(const float* data_im, const float* ...
  function modulated_deformable_col2im_cpu (line 353) | void modulated_deformable_col2im_cpu(const float* data_col, const float*...
  function modulated_deformable_col2im_coord_cpu (line 375) | void modulated_deformable_col2im_coord_cpu(const float* data_col, const ...

FILE: Network/rigidmask/networks/DCNv2/DCN/src/cpu/dcn_v2_psroi_pooling_cpu.cpp
  function T (line 34) | T bilinear_interp_cpu(
  function DeformablePSROIPoolForwardKernelCpu (line 59) | void DeformablePSROIPoolForwardKernelCpu(
  function DeformablePSROIPoolBackwardAccKernelCpu (line 149) | void DeformablePSROIPoolBackwardAccKernelCpu(
  function dcn_v2_psroi_pooling_cpu_forward (line 278) | std::tuple<at::Tensor, at::Tensor>
  function dcn_v2_psroi_pooling_cpu_backward (line 350) | std::tuple<at::Tensor, at::Tensor>

FILE: Network/rigidmask/networks/DCNv2/DCN/src/vision.cpp
  function PYBIND11_MODULE (line 4) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: Network/rigidmask/networks/DCNv2/DCN/testcpu.py
  function conv_identify (line 20) | def conv_identify(weight, bias):
  function check_zero_offset (line 32) | def check_zero_offset():
  function check_gradient_dconv (line 69) | def check_gradient_dconv():
  function check_pooling_zero_offset (line 100) | def check_pooling_zero_offset():
  function check_gradient_dpooling (line 134) | def check_gradient_dpooling():
  function example_dconv (line 169) | def example_dconv():
  function example_dpooling (line 183) | def example_dpooling():
  function example_mdpooling (line 226) | def example_mdpooling():

FILE: Network/rigidmask/networks/DCNv2/DCN/testcuda.py
  function conv_identify (line 20) | def conv_identify(weight, bias):
  function check_zero_offset (line 32) | def check_zero_offset():
  function check_gradient_dconv (line 69) | def check_gradient_dconv():
  function check_pooling_zero_offset (line 100) | def check_pooling_zero_offset():
  function check_gradient_dpooling (line 134) | def check_gradient_dpooling():
  function example_dconv (line 169) | def example_dconv():
  function example_dpooling (line 183) | def example_dpooling():
  function example_mdpooling (line 226) | def example_mdpooling():

FILE: Network/rigidmask/networks/DCNv2/setup.py
  function get_extensions (line 18) | def get_extensions():

FILE: Network/rigidmask/networks/dlav0.py
  function get_model_url (line 18) | def get_model_url(data='imagenet', name='dla34', hash='ba72cf86'):
  function conv3x3 (line 22) | def conv3x3(in_planes, out_planes, stride=1):
  class BasicBlock (line 28) | class BasicBlock(nn.Module):
    method __init__ (line 29) | def __init__(self, inplanes, planes, stride=1, dilation=1):
    method forward (line 42) | def forward(self, x, residual=None):
  class Bottleneck (line 59) | class Bottleneck(nn.Module):
    method __init__ (line 62) | def __init__(self, inplanes, planes, stride=1, dilation=1):
    method forward (line 79) | def forward(self, x, residual=None):
  class BottleneckX (line 100) | class BottleneckX(nn.Module):
    method __init__ (line 104) | def __init__(self, inplanes, planes, stride=1, dilation=1):
    method forward (line 123) | def forward(self, x, residual=None):
  class Root (line 144) | class Root(nn.Module):
    method __init__ (line 145) | def __init__(self, in_channels, out_channels, kernel_size, residual):
    method forward (line 154) | def forward(self, *x):
  class Tree (line 165) | class Tree(nn.Module):
    method __init__ (line 166) | def __init__(self, levels, block, in_channels, out_channels, stride=1,
    method forward (line 205) | def forward(self, x, residual=None, children=None):
  class DLA (line 221) | class DLA(nn.Module):
    method __init__ (line 222) | def __init__(self, levels, channels, num_classes=1000,
    method _make_level (line 260) | def _make_level(self, block, inplanes, planes, blocks, stride=1):
    method _make_conv_level (line 277) | def _make_conv_level(self, inplanes, planes, convs, stride=1, dilation...
    method forward (line 289) | def forward(self, x):
    method load_pretrained_model (line 304) | def load_pretrained_model(self,  data='imagenet', name='dla34', hash='...
  function dla34 (line 319) | def dla34(pretrained, **kwargs):  # DLA-34
  function dla46_c (line 328) | def dla46_c(pretrained=None, **kwargs):  # DLA-46-C
  function dla46x_c (line 338) | def dla46x_c(pretrained=None, **kwargs):  # DLA-X-46-C
  function dla60x_c (line 348) | def dla60x_c(pretrained, **kwargs):  # DLA-X-60-C
  function dla60 (line 358) | def dla60(pretrained=None, **kwargs):  # DLA-60
  function dla60x (line 368) | def dla60x(pretrained=None, **kwargs):  # DLA-X-60
  function dla102 (line 378) | def dla102(pretrained=None, **kwargs):  # DLA-102
  function dla102x (line 387) | def dla102x(pretrained=None, **kwargs):  # DLA-X-102
  function dla102x2 (line 396) | def dla102x2(pretrained=None, **kwargs):  # DLA-X-102 64
  function dla169 (line 405) | def dla169(pretrained=None, **kwargs):  # DLA-169
  function set_bn (line 414) | def set_bn(bn):
  class Identity (line 420) | class Identity(nn.Module):
    method __init__ (line 421) | def __init__(self):
    method forward (line 424) | def forward(self, x):
  function fill_up_weights (line 428) | def fill_up_weights(up):
  class IDAUp (line 440) | class IDAUp(nn.Module):
    method __init__ (line 441) | def __init__(self, node_kernel, out_dim, channels, up_factors):
    method forward (line 482) | def forward(self, layers):
  class DLAUp (line 499) | class DLAUp(nn.Module):
    method __init__ (line 500) | def __init__(self, channels, scales=(1, 2, 4, 8, 16), in_channels=None):
    method forward (line 515) | def forward(self, layers):
  function fill_fc_weights (line 524) | def fill_fc_weights(layers):
  class DLASeg (line 533) | class DLASeg(nn.Module):
    method __init__ (line 534) | def __init__(self, base_name, heads,
    method forward (line 600) | def forward(self, x):
  function get_pose_net (line 642) | def get_pose_net(num_layers, heads, head_conv=256, down_ratio=4):

FILE: Network/rigidmask/networks/large_hourglass.py
  class convolution (line 17) | class convolution(nn.Module):
    method __init__ (line 18) | def __init__(self, k, inp_dim, out_dim, stride=1, with_bn=True):
    method forward (line 26) | def forward(self, x):
  class fully_connected (line 32) | class fully_connected(nn.Module):
    method __init__ (line 33) | def __init__(self, inp_dim, out_dim, with_bn=True):
    method forward (line 42) | def forward(self, x):
  class residual (line 48) | class residual(nn.Module):
    method __init__ (line 49) | def __init__(self, k, inp_dim, out_dim, stride=1, with_bn=True):
    method forward (line 65) | def forward(self, x):
  function make_layer (line 76) | def make_layer(k, inp_dim, out_dim, modules, layer=convolution, **kwargs):
  function make_layer_revr (line 82) | def make_layer_revr(k, inp_dim, out_dim, modules, layer=convolution, **k...
  class MergeUp (line 89) | class MergeUp(nn.Module):
    method forward (line 90) | def forward(self, up1, up2):
  function make_merge_layer (line 93) | def make_merge_layer(dim):
  function make_pool_layer (line 99) | def make_pool_layer(dim):
  function make_unpool_layer (line 102) | def make_unpool_layer(dim):
  function make_kp_layer (line 105) | def make_kp_layer(cnv_dim, curr_dim, out_dim):
  function make_inter_layer (line 111) | def make_inter_layer(dim):
  function make_cnv_layer (line 114) | def make_cnv_layer(inp_dim, out_dim):
  class kp_module (line 117) | class kp_module(nn.Module):
    method __init__ (line 118) | def __init__(
    method forward (line 167) | def forward(self, x):
  class exkp (line 176) | class exkp(nn.Module):
    method __init__ (line 177) | def __init__(
    method forward (line 253) | def forward(self, image):
  function make_hg_layer (line 277) | def make_hg_layer(kernel, dim0, dim1, mod, layer=convolution, **kwargs):
  class HourglassNet (line 283) | class HourglassNet(exkp):
    method __init__ (line 284) | def __init__(self, heads, num_stacks=2):
  function get_large_hourglass_net (line 298) | def get_large_hourglass_net(num_layers, heads, head_conv):

FILE: Network/rigidmask/networks/msra_resnet.py
  function conv3x3 (line 28) | def conv3x3(in_planes, out_planes, stride=1):
  class BasicBlock (line 34) | class BasicBlock(nn.Module):
    method __init__ (line 37) | def __init__(self, inplanes, planes, stride=1, downsample=None):
    method forward (line 47) | def forward(self, x):
  class Bottleneck (line 66) | class Bottleneck(nn.Module):
    method __init__ (line 69) | def __init__(self, inplanes, planes, stride=1, downsample=None):
    method forward (line 84) | def forward(self, x):
  class PoseResNet (line 107) | class PoseResNet(nn.Module):
    method __init__ (line 109) | def __init__(self, block, layers, heads, head_conv, **kwargs):
    method _make_layer (line 154) | def _make_layer(self, block, planes, blocks, stride=1):
    method _get_deconv_cfg (line 171) | def _get_deconv_cfg(self, deconv_kernel, index):
    method _make_deconv_layer (line 184) | def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
    method forward (line 211) | def forward(self, x):
    method init_weights (line 228) | def init_weights(self, num_layers, pretrained=True):
  function get_pose_net (line 275) | def get_pose_net(num_layers, heads, head_conv):

FILE: Network/rigidmask/networks/pose_dla_dcn.py
  function get_model_url (line 21) | def get_model_url(data='imagenet', name='dla34', hash='ba72cf86'):
  function conv3x3 (line 25) | def conv3x3(in_planes, out_planes, stride=1):
  class BasicBlock (line 31) | class BasicBlock(nn.Module):
    method __init__ (line 32) | def __init__(self, inplanes, planes, stride=1, dilation=1):
    method forward (line 45) | def forward(self, x, residual=None):
  class Bottleneck (line 62) | class Bottleneck(nn.Module):
    method __init__ (line 65) | def __init__(self, inplanes, planes, stride=1, dilation=1):
    method forward (line 82) | def forward(self, x, residual=None):
  class BottleneckX (line 103) | class BottleneckX(nn.Module):
    method __init__ (line 107) | def __init__(self, inplanes, planes, stride=1, dilation=1):
    method forward (line 126) | def forward(self, x, residual=None):
  class Root (line 147) | class Root(nn.Module):
    method __init__ (line 148) | def __init__(self, in_channels, out_channels, kernel_size, residual):
    method forward (line 157) | def forward(self, *x):
  class Tree (line 168) | class Tree(nn.Module):
    method __init__ (line 169) | def __init__(self, levels, block, in_channels, out_channels, stride=1,
    method forward (line 208) | def forward(self, x, residual=None, children=None):
  class DLA (line 224) | class DLA(nn.Module):
    method __init__ (line 225) | def __init__(self, levels, channels, num_classes=1000,
    method _make_level (line 257) | def _make_level(self, block, inplanes, planes, blocks, stride=1):
    method _make_conv_level (line 274) | def _make_conv_level(self, inplanes, planes, convs, stride=1, dilation...
    method forward (line 286) | def forward(self, x):
    method load_pretrained_model (line 294) | def load_pretrained_model(self, data='imagenet', name='dla34', hash='b...
  function dla34 (line 309) | def dla34(pretrained=True, **kwargs):  # DLA-34
  class Identity (line 317) | class Identity(nn.Module):
    method __init__ (line 319) | def __init__(self):
    method forward (line 322) | def forward(self, x):
  function fill_fc_weights (line 326) | def fill_fc_weights(layers):
  function fill_up_weights (line 333) | def fill_up_weights(up):
  class DeformConv (line 345) | class DeformConv(nn.Module):
    method __init__ (line 346) | def __init__(self, chi, cho):
    method forward (line 354) | def forward(self, x):
  class IDAUp (line 360) | class IDAUp(nn.Module):
    method __init__ (line 362) | def __init__(self, o, channels, up_f):
    method forward (line 380) | def forward(self, layers, startp, endp):
  class DLAUp (line 390) | class DLAUp(nn.Module):
    method __init__ (line 391) | def __init__(self, startp, channels, scales, in_channels=None):
    method forward (line 407) | def forward(self, layers):
  class Interpolate (line 416) | class Interpolate(nn.Module):
    method __init__ (line 417) | def __init__(self, scale, mode):
    method forward (line 422) | def forward(self, x):
  class DLASeg (line 427) | class DLASeg(nn.Module):
    method __init__ (line 428) | def __init__(self, base_name, heads, pretrained, down_ratio, final_ker...
    method forward (line 470) | def forward(self, x):
  function get_pose_net (line 485) | def get_pose_net(num_layers, heads, head_conv=256, down_ratio=4,num_inpu...

FILE: Network/rigidmask/networks/resnet_dcn.py
  function conv3x3 (line 32) | def conv3x3(in_planes, out_planes, stride=1):
  class BasicBlock (line 38) | class BasicBlock(nn.Module):
    method __init__ (line 41) | def __init__(self, inplanes, planes, stride=1, downsample=None):
    method forward (line 51) | def forward(self, x):
  class Bottleneck (line 70) | class Bottleneck(nn.Module):
    method __init__ (line 73) | def __init__(self, inplanes, planes, stride=1, downsample=None):
    method forward (line 88) | def forward(self, x):
  function fill_up_weights (line 110) | def fill_up_weights(up):
  function fill_fc_weights (line 121) | def fill_fc_weights(layers):
  class PoseResNet (line 130) | class PoseResNet(nn.Module):
    method __init__ (line 132) | def __init__(self, block, layers, heads, head_conv):
    method _make_layer (line 179) | def _make_layer(self, block, planes, blocks, stride=1):
    method _get_deconv_cfg (line 196) | def _get_deconv_cfg(self, deconv_kernel, index):
    method _make_deconv_layer (line 209) | def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
    method forward (line 248) | def forward(self, x):
    method init_weights (line 265) | def init_weights(self, num_layers):
  function get_pose_net (line 285) | def get_pose_net(num_layers, heads, head_conv=256):

FILE: Network/rigidmask/submodule.py
  class residualBlock (line 12) | class residualBlock(nn.Module):
    method __init__ (line 15) | def __init__(self, in_channels, n_filters, stride=1, downsample=None,d...
    method forward (line 31) | def forward(self, x):
  function conv (line 43) | def conv(in_planes, out_planes, kernel_size=3, stride=1, padding=1, dila...
  class conv2DBatchNorm (line 51) | class conv2DBatchNorm(nn.Module):
    method __init__ (line 52) | def __init__(self, in_channels, n_filters, k_size,  stride, padding, d...
    method forward (line 71) | def forward(self, inputs):
  class conv2DBatchNormRelu (line 75) | class conv2DBatchNormRelu(nn.Module):
    method __init__ (line 76) | def __init__(self, in_channels, n_filters, k_size,  stride, padding, d...
    method forward (line 95) | def forward(self, inputs):
  class pyramidPooling (line 99) | class pyramidPooling(nn.Module):
    method __init__ (line 101) | def __init__(self, in_channels, with_bn=True, levels=4):
    method forward (line 111) | def forward(self, x):
  class pspnet (line 133) | class pspnet(nn.Module):
    method __init__ (line 137) | def __init__(self, is_proj=True,groups=1):
    method _make_layer (line 193) | def _make_layer(self, block, planes, blocks, stride=1):
    method forward (line 206) | def forward(self, x):
  class pspnet_s (line 249) | class pspnet_s(nn.Module):
    method __init__ (line 253) | def __init__(self, is_proj=True,groups=1):
    method _make_layer (line 309) | def _make_layer(self, block, planes, blocks, stride=1):
    method forward (line 322) | def forward(self, x):
  class bfmodule (line 366) | class bfmodule(nn.Module):
    method __init__ (line 367) | def __init__(self, inplanes, outplanes):
    method _make_layer (line 409) | def _make_layer(self, block, planes, blocks, stride=1):
    method forward (line 422) | def forward(self, x):
  class bfmodule_feat (line 452) | class bfmodule_feat(nn.Module):
    method __init__ (line 453) | def __init__(self, inplanes, outplanes):
    method _make_layer (line 495) | def _make_layer(self, block, planes, blocks, stride=1):
    method forward (line 508) | def forward(self, x):
  function compute_geo_costs (line 539) | def compute_geo_costs(rot, trans, Ex, Kinv, hp0, hp1, tau, Kinv_n=None):
  function get_skew_mat (line 568) | def get_skew_mat(transx,rotx):
  function sampson_err (line 581) | def sampson_err(x1h, x2h, F):
  function get_intrinsics (line 589) | def get_intrinsics(intr, noise=False):
  function testEss (line 661) | def testEss(K0,K1,R,T,p1,p2):

FILE: evaluator/evaluate_ate_scale.py
  function align (line 49) | def align(model,data,calc_scale=False):
  function plot_traj (line 102) | def plot_traj(ax,stamps,traj,style,color,label):

FILE: evaluator/evaluate_kitti.py
  function trajectory_distances (line 9) | def trajectory_distances(poses):
  function last_frame_from_segment_length (line 19) | def last_frame_from_segment_length(dist,first_frame,length):
  function rotation_error (line 25) | def rotation_error(pose_error):
  function translation_error (line 33) | def translation_error(pose_error):
  function calculate_sequence_error (line 45) | def calculate_sequence_error(poses_gt,poses_result,lengths=[10,20,30,40,...
  function calculate_ave_errors (line 86) | def calculate_ave_errors(errors,lengths=[10,20,30,40,50,60,70,80]):
  function evaluate (line 105) | def evaluate(gt, data,kittitype=True):
  function main (line 114) | def  main():

FILE: evaluator/evaluate_rpe.py
  function ominus (line 44) | def ominus(a,b):
  function compute_distance (line 57) | def compute_distance(transform):
  function compute_angle (line 63) | def compute_angle(transform):
  function distances_along_trajectory (line 70) | def distances_along_trajectory(traj):
  function evaluate_trajectory (line 83) | def evaluate_trajectory(traj_gt, traj_est, param_max_pairs=10000, param_...

FILE: evaluator/evaluator_base.py
  function transform_trajs (line 13) | def transform_trajs(gt_traj, est_traj, cal_scale):
  function quats2SEs (line 22) | def quats2SEs(gt_traj, est_traj):
  function per_frame_scale_alignment (line 27) | def per_frame_scale_alignment(gt_motions, est_motions):
  class ATEEvaluator (line 37) | class ATEEvaluator(object):
    method __init__ (line 38) | def __init__(self):
    method evaluate (line 42) | def evaluate(self, gt_traj, est_traj, scale):
  class RPEEvaluator (line 69) | class RPEEvaluator(object):
    method __init__ (line 70) | def __init__(self):
    method evaluate (line 74) | def evaluate(self, gt_SEs, est_SEs):
  class KittiEvaluator (line 90) | class KittiEvaluator(object):
    method __init__ (line 91) | def __init__(self):
    method evaluate (line 95) | def evaluate(self, gt_SEs, est_SEs, kittitype):

FILE: evaluator/tartanair_evaluator.py
  class TartanAirEvaluator (line 10) | class TartanAirEvaluator:
    method __init__ (line 11) | def __init__(self, scale = False, round=1):
    method evaluate_one_trajectory (line 16) | def evaluate_one_trajectory(self, gt_traj, est_traj, scale=False, kitt...

FILE: evaluator/trajectory_transform.py
  function shift0 (line 7) | def shift0(traj):
  function ned2cam (line 21) | def ned2cam(traj):
  function cam2ned (line 39) | def cam2ned(traj):
  function trajectory_transform (line 58) | def trajectory_transform(gt_traj, est_traj):
  function rescale_bk (line 71) | def rescale_bk(poses_gt, poses):
  function pose2trans (line 88) | def pose2trans(pose_data):
  function rescale (line 98) | def rescale(poses_gt, poses):
  function trajectory_scale (line 118) | def trajectory_scale(traj, scale):
  function timestamp_associate (line 123) | def timestamp_associate(first_list, second_list, max_difference):

FILE: evaluator/transformation.py
  function line2mat (line 8) | def line2mat(line_data):
  function mat2line (line 16) | def mat2line(mat_data):
  function motion2pose (line 24) | def motion2pose(data):
  function pose2motion (line 41) | def pose2motion(data, skip=0):
  function SE2se (line 56) | def SE2se(SE_data):
  function SO2so (line 62) | def SO2so(SO_data):
  function so2SO (line 65) | def so2SO(so_data):
  function se2SE (line 68) | def se2SE(se_data):
  function se_mean (line 74) | def se_mean(se_datas):
  function ses_mean (line 84) | def ses_mean(se_datas):
  function ses2poses (line 93) | def ses2poses(data):
  function ses2poses_quat (line 106) | def ses2poses_quat(data):
  function SEs2ses (line 122) | def SEs2ses(motion_data):
  function so2quat (line 131) | def so2quat(so_data):
  function quat2so (line 140) | def quat2so(quat_data):
  function sos2quats (line 151) | def sos2quats(so_datas,mean_std=[[1],[1]]):
  function SO2quat (line 163) | def SO2quat(SO_data):
  function quat2SO (line 167) | def quat2SO(quat_data):
  function pos_quat2SE (line 171) | def pos_quat2SE(quat_data):
  function pos_quats2SEs (line 180) | def pos_quats2SEs(quat_datas):
  function pos_quats2SE_matrices (line 189) | def pos_quats2SE_matrices(quat_datas):
  function SE2pos_quat (line 200) | def SE2pos_quat(SE_data):
  function SEs2ses (line 206) | def SEs2ses(data):
  function ses2SEs (line 217) | def ses2SEs(data):
  function SE2quat (line 228) | def SE2quat(SE_data):
  function quat2SE (line 238) | def quat2SE(quat_data):
  function SEs2quats (line 249) | def SEs2quats(SEs_data):
  function quats2SEs (line 261) | def quats2SEs(quat_datas):
  function motion_ses2pose_quats (line 273) | def motion_ses2pose_quats(data):
  function pose_quats2motion_ses (line 283) | def pose_quats2motion_ses(data):
  function kitti2tartan (line 293) | def kitti2tartan(traj):
  function tartan2kitti (line 313) | def tartan2kitti(traj):

FILE: vo_trajectory_from_folder.py
  function get_args (line 16) | def get_args():
Condensed preview — 61 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (491K chars).
[
  {
    "path": ".gitignore",
    "chars": 42,
    "preview": "*.pyc\nmodels/\ndata/\n__pycache__/\n.DS_Store"
  },
  {
    "path": "Datasets/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "Datasets/cowmask.py",
    "chars": 3660,
    "preview": "# pylint: disable=bad-indentation\n# coding=utf-8\n# Copyright 2022 The Google Research Authors.\n#\n# Licensed under the Ap"
  },
  {
    "path": "Datasets/flowlib.py",
    "chars": 18503,
    "preview": "\"\"\"\n# ==============================\n# flowlib.py\n# library for optical flow processing\n# Author: Ruoteng Li\n# Date: 6th"
  },
  {
    "path": "Datasets/segmask_gt.py",
    "chars": 11176,
    "preview": "\"\"\"\n# ==============================\n# segmask_gt.py\n# library to generate groundtruth \n# segmentation mask given flow a"
  },
  {
    "path": "Datasets/tartanTrajFlowDataset.py",
    "chars": 1771,
    "preview": "\"\"\"\n# ==============================\n# tartanTrajFlowDataset.py\n# library for DytanVO data I/O\n# Author: Wenshan Wang, S"
  },
  {
    "path": "Datasets/util_flow.py",
    "chars": 8042,
    "preview": "\"\"\"\n# ==============================\n# util_flow.py\n# library for optical flow processing\n# Author: Gengshan Yang\n# Date"
  },
  {
    "path": "Datasets/utils.py",
    "chars": 10928,
    "preview": "\"\"\"\n# ==============================\n# utils.py\n# misc library for DytanVO\n# Author: Wenshan Wang, Shihao Shen\n# Date: 3"
  },
  {
    "path": "DytanVO.py",
    "chars": 12577,
    "preview": "# Software License Agreement (BSD License)\n#\n# Copyright (c) 2020, Shihao Shen, CMU\n# All rights reserved.\n#\n# Redistrib"
  },
  {
    "path": "LICENSE",
    "chars": 1522,
    "preview": "BSD 3-Clause License\n\nCopyright (c) 2020, Air Lab Stacks\nAll rights reserved.\n\nRedistribution and use in source and bina"
  },
  {
    "path": "Network/PWC/PWCNet.py",
    "chars": 10764,
    "preview": "\"\"\"\nimplementation of the PWC-DC network for optical flow estimation by Sun et al., 2018\n\nJinwei Gu and Zhile Ren\n\n\"\"\"\n\n"
  },
  {
    "path": "Network/PWC/__init__.py",
    "chars": 22,
    "preview": "from .PWCNet import *\n"
  },
  {
    "path": "Network/PWC/correlation.py",
    "chars": 13539,
    "preview": "#!/usr/bin/env python\n\nimport torch\n\nimport cupy\nimport re\n\nkernel_Correlation_rearrange = '''\n\textern \"C\" __global__ vo"
  },
  {
    "path": "Network/VOFlowNet.py",
    "chars": 5275,
    "preview": "# Software License Agreement (BSD License)\n#\n# Copyright (c) 2020, Wenshan Wang, CMU\n# All rights reserved.\n#\n# Redistri"
  },
  {
    "path": "Network/VONet.py",
    "chars": 3092,
    "preview": "# Software License Agreement (BSD License)\n#\n# Copyright (c) 2020, Wenshan Wang, Yaoyu Hu,  CMU\n# All rights reserved.\n#"
  },
  {
    "path": "Network/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "Network/rigidmask/.gitignore",
    "chars": 12,
    "preview": "__pycache__\n"
  },
  {
    "path": "Network/rigidmask/VCNplus.py",
    "chars": 34308,
    "preview": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom torch.autograd import Variable\nimport numpy as n"
  },
  {
    "path": "Network/rigidmask/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "Network/rigidmask/conv4d.py",
    "chars": 12200,
    "preview": "import pdb\nimport torch.nn as nn\nimport math\nimport torch\nfrom torch.nn.parameter import Parameter\nimport torch.nn.funct"
  },
  {
    "path": "Network/rigidmask/det.py",
    "chars": 3445,
    "preview": "from __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tor"
  },
  {
    "path": "Network/rigidmask/det_losses.py",
    "chars": 7940,
    "preview": "# ------------------------------------------------------------------------------\n# Portions of this code are from\n# Corn"
  },
  {
    "path": "Network/rigidmask/det_utils.py",
    "chars": 1571,
    "preview": "from __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tor"
  },
  {
    "path": "Network/rigidmask/networks/DCNv2/.gitignore",
    "chars": 58,
    "preview": ".vscode\n.idea\n*.so\n*.o\n*pyc\n_ext\nbuild\nDCNv2.egg-info\ndist"
  },
  {
    "path": "Network/rigidmask/networks/DCNv2/DCN/__init__.py",
    "chars": 22,
    "preview": "from .dcn_v2 import *\n"
  },
  {
    "path": "Network/rigidmask/networks/DCNv2/DCN/dcn_v2.py",
    "chars": 12081,
    "preview": "#!/usr/bin/env python\nfrom __future__ import absolute_import\nfrom __future__ import print_function\nfrom __future__ impor"
  },
  {
    "path": "Network/rigidmask/networks/DCNv2/DCN/src/cpu/dcn_v2_cpu.cpp",
    "chars": 10885,
    "preview": "#include <vector>\n#include \"cpu/dcn_v2_im2col_cpu.h\"\n#include <iostream>\n\n#include <ATen/ATen.h>\n//#include <ATen/cuda/C"
  },
  {
    "path": "Network/rigidmask/networks/DCNv2/DCN/src/cpu/dcn_v2_im2col_cpu.cpp",
    "chars": 19948,
    "preview": "#include \"dcn_v2_im2col_cpu.h\"\n#include <cstdio>\n#include <algorithm>\n#include <cstring>\n\n#include <ATen/ATen.h>\n//#incl"
  },
  {
    "path": "Network/rigidmask/networks/DCNv2/DCN/src/cpu/dcn_v2_im2col_cpu.h",
    "chars": 5105,
    "preview": "\n/*!\n ******************* BEGIN Caffe Copyright Notice and Disclaimer ****************\n *\n * COPYRIGHT\n *\n * All contrib"
  },
  {
    "path": "Network/rigidmask/networks/DCNv2/DCN/src/cpu/dcn_v2_psroi_pooling_cpu.cpp",
    "chars": 17007,
    "preview": "/*!\n * Copyright (c) 2017 Microsoft\n * Licensed under The MIT License [see LICENSE for details]\n * \\file deformable_psro"
  },
  {
    "path": "Network/rigidmask/networks/DCNv2/DCN/src/cpu/vision.h",
    "chars": 2665,
    "preview": "#pragma once\n#include <torch/extension.h>\n\nat::Tensor\ndcn_v2_cpu_forward(const at::Tensor &input,\n                    co"
  },
  {
    "path": "Network/rigidmask/networks/DCNv2/DCN/src/cuda/dcn_v2_cuda.cu",
    "chars": 15024,
    "preview": "#include <vector>\n#include \"cuda/dcn_v2_im2col_cuda.h\"\n\n#include <ATen/ATen.h>\n#include <ATen/cuda/CUDAContext.h>\n\n#incl"
  },
  {
    "path": "Network/rigidmask/networks/DCNv2/DCN/src/cuda/dcn_v2_im2col_cuda.cu",
    "chars": 20335,
    "preview": "#include \"dcn_v2_im2col_cuda.h\"\n#include <cstdio>\n#include <algorithm>\n#include <cstring>\n\n#include <ATen/ATen.h>\n#inclu"
  },
  {
    "path": "Network/rigidmask/networks/DCNv2/DCN/src/cuda/dcn_v2_im2col_cuda.h",
    "chars": 5226,
    "preview": "\n/*!\n ******************* BEGIN Caffe Copyright Notice and Disclaimer ****************\n *\n * COPYRIGHT\n *\n * All contrib"
  },
  {
    "path": "Network/rigidmask/networks/DCNv2/DCN/src/cuda/dcn_v2_psroi_pooling_cuda.cu",
    "chars": 16240,
    "preview": "/*!\n * Copyright (c) 2017 Microsoft\n * Licensed under The MIT License [see LICENSE for details]\n * \\file deformable_psro"
  },
  {
    "path": "Network/rigidmask/networks/DCNv2/DCN/src/cuda/vision.h",
    "chars": 2669,
    "preview": "#pragma once\n#include <torch/extension.h>\n\nat::Tensor\ndcn_v2_cuda_forward(const at::Tensor &input,\n                    c"
  },
  {
    "path": "Network/rigidmask/networks/DCNv2/DCN/src/dcn_v2.h",
    "chars": 7715,
    "preview": "#pragma once\n\n#include \"cpu/vision.h\"\n\n#ifdef WITH_CUDA\n#include \"cuda/vision.h\"\n#endif\n\nat::Tensor\ndcn_v2_forward(const"
  },
  {
    "path": "Network/rigidmask/networks/DCNv2/DCN/src/vision.cpp",
    "chars": 405,
    "preview": "\n#include \"dcn_v2.h\"\n\nPYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {\n  m.def(\"dcn_v2_forward\", &dcn_v2_forward, \"dcn_v2_forw"
  },
  {
    "path": "Network/rigidmask/networks/DCNv2/DCN/testcpu.py",
    "chars": 8233,
    "preview": "#!/usr/bin/env python\nfrom __future__ import absolute_import\nfrom __future__ import print_function\nfrom __future__ impor"
  },
  {
    "path": "Network/rigidmask/networks/DCNv2/DCN/testcuda.py",
    "chars": 8514,
    "preview": "#!/usr/bin/env python\nfrom __future__ import absolute_import\nfrom __future__ import print_function\nfrom __future__ impor"
  },
  {
    "path": "Network/rigidmask/networks/DCNv2/LICENSE",
    "chars": 1520,
    "preview": "BSD 3-Clause License\n\nCopyright (c) 2019, Charles Shang\nAll rights reserved.\n\nRedistribution and use in source and binar"
  },
  {
    "path": "Network/rigidmask/networks/DCNv2/README.md",
    "chars": 913,
    "preview": "## Deformable Convolutional Networks V2 with Pytorch 1.X\n\n### Build\n```bash\n    ./make.sh         # build\n    python tes"
  },
  {
    "path": "Network/rigidmask/networks/DCNv2/make.sh",
    "chars": 50,
    "preview": "#!/usr/bin/env bash\npython setup.py build develop\n"
  },
  {
    "path": "Network/rigidmask/networks/DCNv2/setup.py",
    "chars": 2091,
    "preview": "#!/usr/bin/env python\n\nimport os\nimport glob\n\nimport torch\n\nfrom torch.utils.cpp_extension import CUDA_HOME\nfrom torch.u"
  },
  {
    "path": "Network/rigidmask/networks/dlav0.py",
    "chars": 22682,
    "preview": "#!/usr/bin/env python\n# -*- coding: utf-8 -*-\nfrom __future__ import absolute_import\nfrom __future__ import division\nfro"
  },
  {
    "path": "Network/rigidmask/networks/large_hourglass.py",
    "chars": 9942,
    "preview": "# ------------------------------------------------------------------------------\n# This code is base on \n# CornerNet (ht"
  },
  {
    "path": "Network/rigidmask/networks/msra_resnet.py",
    "chars": 10167,
    "preview": "# ------------------------------------------------------------------------------\n# Copyright (c) Microsoft\n# Licensed un"
  },
  {
    "path": "Network/rigidmask/networks/pose_dla_dcn.py",
    "chars": 17721,
    "preview": "from __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport os\n"
  },
  {
    "path": "Network/rigidmask/networks/resnet_dcn.py",
    "chars": 10058,
    "preview": "# ------------------------------------------------------------------------------\n# Copyright (c) Microsoft\n# Licensed un"
  },
  {
    "path": "Network/rigidmask/submodule.py",
    "chars": 31165,
    "preview": "from __future__ import print_function\nimport torch\nimport torch.nn as nn\nimport torch.utils.data\nfrom torch.autograd imp"
  },
  {
    "path": "README.md",
    "chars": 8824,
    "preview": "# DytanVO: Joint Refinement of Visual Odometry and Motion Segmentation in Dynamic Environments\r\n\r\n<p align=\"center\">\r\n  "
  },
  {
    "path": "environment.yml",
    "chars": 5107,
    "preview": "name: dytanvo\nchannels:\n  - pytorch3d\n  - pytorch\n  - conda-forge\n  - defaults\ndependencies:\n  - _libgcc_mutex=0.1=main\n"
  },
  {
    "path": "evaluator/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "evaluator/evaluate_ate_scale.py",
    "chars": 4561,
    "preview": "#!/usr/bin/python\n\n# Modified by Wenshan Wang\n# Modified by Raul Mur-Artal\n# Automatically compute the optimal scale fac"
  },
  {
    "path": "evaluator/evaluate_kitti.py",
    "chars": 4464,
    "preview": "# Copyright (c) 2020 Carnegie Mellon University, Wenshan Wang <wenshanw@andrew.cmu.edu>\n# For License information please"
  },
  {
    "path": "evaluator/evaluate_rpe.py",
    "chars": 5002,
    "preview": "#!/usr/bin/python\n# Software License Agreement (BSD License)\n#\n# Modified by Wenshan Wang\n# Copyright (c) 2013, Juergen "
  },
  {
    "path": "evaluator/evaluator_base.py",
    "chars": 3128,
    "preview": "# Copyright (c) 2020 Carnegie Mellon University, Wenshan Wang <wenshanw@andrew.cmu.edu>\n# For License information please"
  },
  {
    "path": "evaluator/tartanair_evaluator.py",
    "chars": 2036,
    "preview": "# Copyright (c) 2020 Carnegie Mellon University, Wenshan Wang <wenshanw@andrew.cmu.edu>\n# For License information please"
  },
  {
    "path": "evaluator/trajectory_transform.py",
    "chars": 5049,
    "preview": "# Copyright (c) 2020 Carnegie Mellon University, Wenshan Wang <wenshanw@andrew.cmu.edu>\n# For License information please"
  },
  {
    "path": "evaluator/transformation.py",
    "chars": 9106,
    "preview": "# Copyright (c) 2020 Carnegie Mellon University, Wenshan Wang <wenshanw@andrew.cmu.edu>\n# For License information please"
  },
  {
    "path": "vo_trajectory_from_folder.py",
    "chars": 6973,
    "preview": "from torch.utils.data import DataLoader\nfrom Datasets.utils import ToTensor, Compose, CropCenter, ResizeData, dataset_in"
  }
]

About this extraction

This page contains the full source code of the castacks/DytanVO GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 61 files (462.0 KB), approximately 131.0k tokens, and a symbol index with 498 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Copied to clipboard!