[
  {
    "path": ".gitignore",
    "content": "*.pyc\nmodels/\ndata/\n__pycache__/\n.DS_Store"
  },
  {
    "path": "Datasets/__init__.py",
    "content": ""
  },
  {
    "path": "Datasets/cowmask.py",
    "content": "# pylint: disable=bad-indentation\n# coding=utf-8\n# Copyright 2022 The Google Research Authors.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\n\"\"\"\nCow mask generation. \nhttps://github.com/google-research/google-research/blob/master/milking_cowmask/\nAdapted from LAX implementation to NumPy due to PyTorch dataloader \nbeing incompatible with JAX\nAuthor: Shihao Shen\nDate: 29th Aug 2022\n\"\"\"\nimport math\nimport numpy as np\nfrom scipy import special\nfrom scipy.signal import convolve\n\n_ROOT_2 = math.sqrt(2.0)\n_ROOT_2_PI = math.sqrt(2.0 * math.pi)\n\n\ndef gaussian_kernels(sigma, max_sigma):\n\t\"\"\"Make Gaussian kernels for Gaussian blur.\n\tArgs:\n\t\tsigma: kernel sigma\n\t\tmax_sigma: sigma upper limit as a float (this is used to determine\n\t\t\tthe size of kernel required to fit all kernels)\n  \tReturns:\n\t  \ta (1, kernel_width) numpy array\n  \t\"\"\"\n\tsize = round(max_sigma * 3) * 2 + 1\n\tx = np.arange(-size, size + 1)[None, :].astype(np.float32)\n\ty = np.exp(-0.5 * x ** 2 / sigma ** 2)\n\treturn y / (sigma * _ROOT_2_PI)\n\n\ndef cow_masks(mask_size, log_sigma_range, max_sigma, prop_range):\n\t\"\"\"Generate Cow Mask.\n\tArgs:\n      n_masks: number of masks to generate as an int\n      mask_size: image size as a `(height, width)` tuple\n      log_sigma_range: the range of the sigma (smoothing kernel)\n          parameter in log-space`(log(sigma_min), log(sigma_max))`\n      max_sigma: smoothing sigma upper limit\n      prop_range: range from which to draw the proportion `p` that\n        controls the proportion of pixel in a mask that are 1 vs 0\n  Returns:\n      Cow Masks as a [v, height, width, 1] numpy array\n\t\"\"\"\n\n\t# Draw the per-mask proportion p\n\tp = np.random.uniform(prop_range[0], prop_range[1])\n\t# Compute threshold factors\n\tthreshold_factor = special.erfinv(2 * p - 1) * _ROOT_2\n\n\tsigma = np.exp(np.random.uniform(log_sigma_range[0], log_sigma_range[1]))\n\n\tnoise = np.random.normal(size=mask_size)\n\n\t# Generate a kernel for each sigma\n\tkernel = gaussian_kernels(sigma, max_sigma)\n\tkernel = kernel.squeeze()\n\t# kernels in y and x\n\tkrn_y = kernel[None, :]\n\tkrn_x = kernel[:, None]\n\n\t# Apply kernels in y and x separately\n\tsmooth_noise = convolve(noise, krn_y, mode='same')\n\tsmooth_noise = convolve(smooth_noise, krn_x, mode='same')\n\n\t# Compute mean and std-dev\n\tnoise_mu = smooth_noise.mean(axis=(0,1))\n\tnoise_sigma = smooth_noise.std(axis=(0,1))\n\t# Compute thresholds\n\tthreshold = threshold_factor * noise_sigma + noise_mu\n\t# Apply threshold\n\tmask = (smooth_noise <= threshold).astype(bool)\n\n\treturn mask\n\n\nif __name__==\"__main__\":\n    import time\n    import matplotlib.pyplot as plt\n\n    cow_sigma_range = (20, 60)\n    log_sigma_range = (math.log(cow_sigma_range[0]), math.log(cow_sigma_range[1]))\n    cow_prop_range = (0.1, 0.5)\n    s = time.time()\n    max_iou = 0\n    # for _ in range(1000):\n    #     mask = cow_masks((240, 360), log_sigma_range, cow_sigma_range[1], cow_prop_range)\n    #     max_iou = max(max_iou, np.sum(mask) / (240*360))\n    # print(time.time() - s)\n    # print(max_iou)\n\n    mask = cow_masks((240, 360), log_sigma_range, cow_sigma_range[1], cow_prop_range)\n    print(np.sum(mask) / (240*360))\n    plt.imshow(mask * 255)\n    plt.savefig('mask.png')"
  },
  {
    "path": "Datasets/flowlib.py",
    "content": "\"\"\"\n# ==============================\n# flowlib.py\n# library for optical flow processing\n# Author: Ruoteng Li\n# Date: 6th Aug 2016\n# ==============================\n\"\"\"\nimport png\nfrom util_flow import readPFM\nimport numpy as np\nimport matplotlib.colors as cl\nimport matplotlib.pyplot as plt\nfrom PIL import Image\nimport cv2\nimport pdb\n\n\nUNKNOWN_FLOW_THRESH = 1e7\nSMALLFLOW = 0.0\nLARGEFLOW = 1e8\n\n\"\"\"\n=============\nFlow Section\n=============\n\"\"\"\n\n\ndef show_flow(filename):\n    \"\"\"\n    visualize optical flow map using matplotlib\n    :param filename: optical flow file\n    :return: None\n    \"\"\"\n    flow = read_flow(filename)\n    img = flow_to_image(flow)\n    plt.imshow(img)\n    plt.show()\n\n\ndef point_vec(img,flow,skip=10):\n    skip=20\n    maxsize=1000.\n    extendfac=2.\n    resize_factor = max(1,int(max(maxsize/img.shape[0], maxsize/img.shape[1])))\n    meshgrid = np.meshgrid(range(img.shape[1]),range(img.shape[0]))\n    dispimg = cv2.resize(img[:,:,::-1].copy(), None,fx=resize_factor,fy=resize_factor)\n    colorflow = flow_to_image(flow).astype(int)\n    for i in range(img.shape[1]): # x \n        for j in range(img.shape[0]): # y\n            if flow[j,i,2] != 1: continue\n            if j%skip!=0 or i%skip!=0: continue\n            xend = int((meshgrid[0][j,i]+extendfac*flow[j,i,0])*resize_factor)\n            yend = int((meshgrid[1][j,i]+extendfac*flow[j,i,1])*resize_factor)\n            leng = np.linalg.norm(flow[j,i,:2]*extendfac)\n            if leng<1:continue\n            dispimg = cv2.arrowedLine(dispimg, (meshgrid[0][j,i]*resize_factor,meshgrid[1][j,i]*resize_factor),\\\n                                      (xend,yend),\n                                      (int(colorflow[j,i,2]),int(colorflow[j,i,1]),int(colorflow[j,i,0])),4,tipLength=2/leng,line_type=cv2.LINE_AA)\n    return dispimg\n\ndef visualize_flow(flow, mode='Y'):\n    \"\"\"\n    this function visualize the input flow\n    :param flow: input flow in array\n    :param mode: choose which color mode to visualize the flow (Y: Ccbcr, RGB: RGB color)\n    :return: None\n    \"\"\"\n    if mode == 'Y':\n        # Ccbcr color wheel\n        img = flow_to_image(flow)\n    elif mode == 'RGB':\n        (h, w) = flow.shape[0:2]\n        du = flow[:, :, 0]\n        dv = flow[:, :, 1]\n        valid = flow[:, :, 2]\n        max_flow = np.sqrt(du**2+dv**2).max()\n        img = np.zeros((h, w, 3), dtype=np.float64)\n        # angle layer\n        img[:, :, 0] = np.fmod(np.arctan2(dv, du) / (2 * np.pi)+1.,1.)\n        # magnitude layer, normalized to 1\n        img[:, :, 1] = np.sqrt(du * du + dv * dv) * 8 / max_flow\n        # phase layer\n        img[:, :, 2] = 8 - img[:, :, 1]\n        # clip to [0,1]\n        small_idx = img[:, :, 0:3] < 0\n        large_idx = img[:, :, 0:3] > 1\n        img[small_idx] = 0\n        img[large_idx] = 1\n        # convert to rgb\n        img = cl.hsv_to_rgb(img)\n        # remove invalid point\n        img[:, :, 0] = img[:, :, 0] * valid\n        img[:, :, 1] = img[:, :, 1] * valid\n        img[:, :, 2] = img[:, :, 2] * valid\n\n    return img\n\n\ndef read_flow(filename):\n    \"\"\"\n    read optical flow data from flow file\n    :param filename: name of the flow file\n    :return: optical flow data in numpy array\n    \"\"\"\n    if filename.endswith('.flo'):\n        flow = read_flo_file(filename)\n    elif filename.endswith('.png'):\n        flow = read_png_file(filename)\n    elif filename.endswith('.pfm'):\n        flow = read_pfm_file(filename)\n    else:\n        raise Exception('Invalid flow file format!')\n\n    return flow\n\nimport numpy as np\nimport os\n\n\ndef write_flo(flow, filename):\n\n    TAG_STRING = b'PIEH'\n    assert type(filename) is str, \"file is not str %r\" % str(filename)\n    assert filename[-4:] == '.flo', \"file ending is not .flo %r\" % file[-4:]\n\n    height, width, nBands = flow.shape\n    assert nBands == 2, \"Number of bands = %r != 2\" % nBands\n    u = flow[: , : , 0]\n    v = flow[: , : , 1] \n    assert u.shape == v.shape, \"Invalid flow shape\"\n    height, width = u.shape\n\n    f = open(filename,'wb')\n    f.write(TAG_STRING)\n    np.array(width).astype(np.int32).tofile(f)\n    np.array(height).astype(np.int32).tofile(f)\n    tmp = np.zeros((height, width*nBands))\n    tmp[:,np.arange(width)*2] = u\n    tmp[:,np.arange(width)*2 + 1] = v\n    tmp.astype(np.float32).tofile(f)\n\n    f.close()\n\n\ndef write_flow(flow, filename):\n    \"\"\"\n    write optical flow in Middlebury .flo format\n    :param flow: optical flow map\n    :param filename: optical flow file path to be saved\n    :return: None\n    \"\"\"\n    f = open(filename, 'wb')\n    magic = np.array([202021.25], dtype=np.float32)\n    (height, width) = flow.shape[0:2]\n    w = np.array([width], dtype=np.int32)\n    h = np.array([height], dtype=np.int32)\n    magic.tofile(f)\n    w.tofile(f)\n    h.tofile(f)\n    flow.tofile(f)\n    f.close()\n\n\ndef save_flow_image(flow, image_file):\n    \"\"\"\n    save flow visualization into image file\n    :param flow: optical flow data\n    :param flow_fil\n    :return: None\n    \"\"\"\n    flow_img = flow_to_image(flow)\n    img_out = Image.fromarray(flow_img)\n    img_out.save(image_file)\n\n\ndef flowfile_to_imagefile(flow_file, image_file):\n    \"\"\"\n    convert flowfile into image file\n    :param flow: optical flow data\n    :param flow_fil\n    :return: None\n    \"\"\"\n    flow = read_flow(flow_file)\n    save_flow_image(flow, image_file)\n\n\ndef segment_flow(flow):\n    h = flow.shape[0]\n    w = flow.shape[1]\n    u = flow[:, :, 0]\n    v = flow[:, :, 1]\n\n    idx = ((abs(u) > LARGEFLOW) | (abs(v) > LARGEFLOW))\n    idx2 = (abs(u) == SMALLFLOW)\n    class0 = (v == 0) & (u == 0)\n    u[idx2] = 0.00001\n    tan_value = v / u\n\n    class1 = (tan_value < 1) & (tan_value >= 0) & (u > 0) & (v >= 0)\n    class2 = (tan_value >= 1) & (u >= 0) & (v >= 0)\n    class3 = (tan_value < -1) & (u <= 0) & (v >= 0)\n    class4 = (tan_value < 0) & (tan_value >= -1) & (u < 0) & (v >= 0)\n    class8 = (tan_value >= -1) & (tan_value < 0) & (u > 0) & (v <= 0)\n    class7 = (tan_value < -1) & (u >= 0) & (v <= 0)\n    class6 = (tan_value >= 1) & (u <= 0) & (v <= 0)\n    class5 = (tan_value >= 0) & (tan_value < 1) & (u < 0) & (v <= 0)\n\n    seg = np.zeros((h, w))\n\n    seg[class1] = 1\n    seg[class2] = 2\n    seg[class3] = 3\n    seg[class4] = 4\n    seg[class5] = 5\n    seg[class6] = 6\n    seg[class7] = 7\n    seg[class8] = 8\n    seg[class0] = 0\n    seg[idx] = 0\n\n    return seg\n\n\ndef flow_error(tu, tv, u, v):\n    \"\"\"\n    Calculate average end point error\n    :param tu: ground-truth horizontal flow map\n    :param tv: ground-truth vertical flow map\n    :param u:  estimated horizontal flow map\n    :param v:  estimated vertical flow map\n    :return: End point error of the estimated flow\n    \"\"\"\n    smallflow = 0.0\n    '''\n    stu = tu[bord+1:end-bord,bord+1:end-bord]\n    stv = tv[bord+1:end-bord,bord+1:end-bord]\n    su = u[bord+1:end-bord,bord+1:end-bord]\n    sv = v[bord+1:end-bord,bord+1:end-bord]\n    '''\n    stu = tu[:]\n    stv = tv[:]\n    su = u[:]\n    sv = v[:]\n\n    idxUnknow = (abs(stu) > UNKNOWN_FLOW_THRESH) | (abs(stv) > UNKNOWN_FLOW_THRESH)\n    stu[idxUnknow] = 0\n    stv[idxUnknow] = 0\n    su[idxUnknow] = 0\n    sv[idxUnknow] = 0\n\n    ind2 = [(np.absolute(stu) > smallflow) | (np.absolute(stv) > smallflow)]\n    index_su = su[ind2]\n    index_sv = sv[ind2]\n    an = 1.0 / np.sqrt(index_su ** 2 + index_sv ** 2 + 1)\n    un = index_su * an\n    vn = index_sv * an\n\n    index_stu = stu[ind2]\n    index_stv = stv[ind2]\n    tn = 1.0 / np.sqrt(index_stu ** 2 + index_stv ** 2 + 1)\n    tun = index_stu * tn\n    tvn = index_stv * tn\n\n    '''\n    angle = un * tun + vn * tvn + (an * tn)\n    index = [angle == 1.0]\n    angle[index] = 0.999\n    ang = np.arccos(angle)\n    mang = np.mean(ang)\n    mang = mang * 180 / np.pi\n    '''\n\n    epe = np.sqrt((stu - su) ** 2 + (stv - sv) ** 2)\n    epe = epe[ind2]\n    mepe = np.mean(epe)\n    return mepe\n\n\ndef flow_to_image(flow):\n    \"\"\"\n    Convert flow into middlebury color code image\n    :param flow: optical flow map\n    :return: optical flow image in middlebury color\n    \"\"\"\n    u = flow[:, :, 0]\n    v = flow[:, :, 1]\n\n    maxu = -999.\n    maxv = -999.\n    minu = 999.\n    minv = 999.\n\n    idxUnknow = (abs(u) > UNKNOWN_FLOW_THRESH) | (abs(v) > UNKNOWN_FLOW_THRESH)\n    u[idxUnknow] = 0\n    v[idxUnknow] = 0\n\n    maxu = max(maxu, np.max(u))\n    minu = min(minu, np.min(u))\n\n    maxv = max(maxv, np.max(v))\n    minv = min(minv, np.min(v))\n\n    rad = np.sqrt(u ** 2 + v ** 2)\n    maxrad = max(-1, np.max(rad))\n\n    u = u/(maxrad + np.finfo(float).eps)\n    v = v/(maxrad + np.finfo(float).eps)\n\n    img = compute_color(u, v)\n\n    idx = np.repeat(idxUnknow[:, :, np.newaxis], 3, axis=2)\n    img[idx] = 0\n\n    return np.uint8(img)\n\n\ndef evaluate_flow_file(gt_file, pred_file):\n    \"\"\"\n    evaluate the estimated optical flow end point error according to ground truth provided\n    :param gt_file: ground truth file path\n    :param pred_file: estimated optical flow file path\n    :return: end point error, float32\n    \"\"\"\n    # Read flow files and calculate the errors\n    gt_flow = read_flow(gt_file)        # ground truth flow\n    eva_flow = read_flow(pred_file)     # predicted flow\n    # Calculate errors\n    average_pe = flow_error(gt_flow[:, :, 0], gt_flow[:, :, 1], eva_flow[:, :, 0], eva_flow[:, :, 1])\n    return average_pe\n\n\ndef evaluate_flow(gt_flow, pred_flow):\n    \"\"\"\n    gt: ground-truth flow\n    pred: estimated flow\n    \"\"\"\n    average_pe = flow_error(gt_flow[:, :, 0], gt_flow[:, :, 1], pred_flow[:, :, 0], pred_flow[:, :, 1])\n    return average_pe\n\n\n\"\"\"\n==============\nDisparity Section\n==============\n\"\"\"\n\n\ndef read_disp_png(file_name):\n    \"\"\"\n    Read optical flow from KITTI .png file\n    :param file_name: name of the flow file\n    :return: optical flow data in matrix\n    \"\"\"\n    image_object = png.Reader(filename=file_name)\n    image_direct = image_object.asDirect()\n    image_data = list(image_direct[2])\n    (w, h) = image_direct[3]['size']\n    channel = len(image_data[0]) / w\n    flow = np.zeros((h, w, channel), dtype=np.uint16)\n    for i in range(len(image_data)):\n        for j in range(channel):\n            flow[i, :, j] = image_data[i][j::channel]\n    return flow[:, :, 0] / 256\n\n\ndef disp_to_flowfile(disp, filename):\n    \"\"\"\n    Read KITTI disparity file in png format\n    :param disp: disparity matrix\n    :param filename: the flow file name to save\n    :return: None\n    \"\"\"\n    f = open(filename, 'wb')\n    magic = np.array([202021.25], dtype=np.float32)\n    (height, width) = disp.shape[0:2]\n    w = np.array([width], dtype=np.int32)\n    h = np.array([height], dtype=np.int32)\n    empty_map = np.zeros((height, width), dtype=np.float32)\n    data = np.dstack((disp, empty_map))\n    magic.tofile(f)\n    w.tofile(f)\n    h.tofile(f)\n    data.tofile(f)\n    f.close()\n\n\n\"\"\"\n==============\nImage Section\n==============\n\"\"\"\n\n\ndef read_image(filename):\n    \"\"\"\n    Read normal image of any format\n    :param filename: name of the image file\n    :return: image data in matrix uint8 type\n    \"\"\"\n    img = Image.open(filename)\n    im = np.array(img)\n    return im\n\n\ndef warp_image(im, flow):\n    \"\"\"\n    Use optical flow to warp image to the next\n    :param im: image to warp\n    :param flow: optical flow\n    :return: warped image\n    \"\"\"\n    from scipy import interpolate\n    image_height = im.shape[0]\n    image_width = im.shape[1]\n    flow_height = flow.shape[0]\n    flow_width = flow.shape[1]\n    n = image_height * image_width\n    (iy, ix) = np.mgrid[0:image_height, 0:image_width]\n    (fy, fx) = np.mgrid[0:flow_height, 0:flow_width]\n    fx = fx.astype(np.float64)\n    fy = fy.astype(np.float64)\n    fx += flow[:,:,0]\n    fy += flow[:,:,1]\n    mask = np.logical_or(fx <0 , fx > flow_width)\n    mask = np.logical_or(mask, fy < 0)\n    mask = np.logical_or(mask, fy > flow_height)\n    fx = np.minimum(np.maximum(fx, 0), flow_width)\n    fy = np.minimum(np.maximum(fy, 0), flow_height)\n    points = np.concatenate((ix.reshape(n,1), iy.reshape(n,1)), axis=1)\n    xi = np.concatenate((fx.reshape(n, 1), fy.reshape(n,1)), axis=1)\n    warp = np.zeros((image_height, image_width, im.shape[2]))\n    for i in range(im.shape[2]):\n        channel = im[:, :, i]\n        plt.imshow(channel, cmap='gray')\n        values = channel.reshape(n, 1)\n        new_channel = interpolate.griddata(points, values, xi, method='cubic')\n        new_channel = np.reshape(new_channel, [flow_height, flow_width])\n        new_channel[mask] = 1\n        warp[:, :, i] = new_channel.astype(np.uint8)\n\n    return warp.astype(np.uint8)\n\n\n\"\"\"\n==============\nOthers\n==============\n\"\"\"\n\ndef pfm_to_flo(pfm_file):\n    flow_filename = pfm_file[0:pfm_file.find('.pfm')] + '.flo'\n    (data, scale) = readPFM(pfm_file)\n    flow = data[:, :, 0:2]\n    write_flow(flow, flow_filename)\n\n\ndef scale_image(image, new_range):\n    \"\"\"\n    Linearly scale the image into desired range\n    :param image: input image\n    :param new_range: the new range to be aligned\n    :return: image normalized in new range\n    \"\"\"\n    min_val = np.min(image).astype(np.float32)\n    max_val = np.max(image).astype(np.float32)\n    min_val_new = np.array(min(new_range), dtype=np.float32)\n    max_val_new = np.array(max(new_range), dtype=np.float32)\n    scaled_image = (image - min_val) / (max_val - min_val) * (max_val_new - min_val_new) + min_val_new\n    return scaled_image.astype(np.uint8)\n\n\ndef compute_color(u, v):\n    \"\"\"\n    compute optical flow color map\n    :param u: optical flow horizontal map\n    :param v: optical flow vertical map\n    :return: optical flow in color code\n    \"\"\"\n    [h, w] = u.shape\n    img = np.zeros([h, w, 3])\n    nanIdx = np.isnan(u) | np.isnan(v)\n    u[nanIdx] = 0\n    v[nanIdx] = 0\n\n    colorwheel = make_color_wheel()\n    ncols = np.size(colorwheel, 0)\n\n    rad = np.sqrt(u**2+v**2)\n\n    a = np.arctan2(-v, -u) / np.pi\n\n    fk = (a+1) / 2 * (ncols - 1) + 1\n\n    k0 = np.floor(fk).astype(int)\n\n    k1 = k0 + 1\n    k1[k1 == ncols+1] = 1\n    f = fk - k0\n\n    for i in range(0, np.size(colorwheel,1)):\n        tmp = colorwheel[:, i]\n        col0 = tmp[k0-1] / 255\n        col1 = tmp[k1-1] / 255\n        col = (1-f) * col0 + f * col1\n\n        idx = rad <= 1\n        col[idx] = 1-rad[idx]*(1-col[idx])\n        notidx = np.logical_not(idx)\n\n        col[notidx] *= 0.75\n        img[:, :, i] = np.uint8(np.floor(255 * col*(1-nanIdx)))\n\n    return img\n\n\ndef make_color_wheel():\n    \"\"\"\n    Generate color wheel according Middlebury color code\n    :return: Color wheel\n    \"\"\"\n    RY = 15\n    YG = 6\n    GC = 4\n    CB = 11\n    BM = 13\n    MR = 6\n\n    ncols = RY + YG + GC + CB + BM + MR\n\n    colorwheel = np.zeros([ncols, 3])\n\n    col = 0\n\n    # RY\n    colorwheel[0:RY, 0] = 255\n    colorwheel[0:RY, 1] = np.transpose(np.floor(255*np.arange(0, RY) / RY))\n    col += RY\n\n    # YG\n    colorwheel[col:col+YG, 0] = 255 - np.transpose(np.floor(255*np.arange(0, YG) / YG))\n    colorwheel[col:col+YG, 1] = 255\n    col += YG\n\n    # GC\n    colorwheel[col:col+GC, 1] = 255\n    colorwheel[col:col+GC, 2] = np.transpose(np.floor(255*np.arange(0, GC) / GC))\n    col += GC\n\n    # CB\n    colorwheel[col:col+CB, 1] = 255 - np.transpose(np.floor(255*np.arange(0, CB) / CB))\n    colorwheel[col:col+CB, 2] = 255\n    col += CB\n\n    # BM\n    colorwheel[col:col+BM, 2] = 255\n    colorwheel[col:col+BM, 0] = np.transpose(np.floor(255*np.arange(0, BM) / BM))\n    col += + BM\n\n    # MR\n    colorwheel[col:col+MR, 2] = 255 - np.transpose(np.floor(255 * np.arange(0, MR) / MR))\n    colorwheel[col:col+MR, 0] = 255\n\n    return colorwheel\n\n\ndef read_flo_file(filename):\n    \"\"\"\n    Read from Middlebury .flo file\n    :param flow_file: name of the flow file\n    :return: optical flow data in matrix\n    \"\"\"\n    f = open(filename, 'rb')\n    magic = np.fromfile(f, np.float32, count=1)\n    data2d = None\n\n    if 202021.25 != magic:\n        print('Magic number incorrect. Invalid .flo file')\n    else:\n        w = np.fromfile(f, np.int32, count=1)\n        h = np.fromfile(f, np.int32, count=1)\n        #print(\"Reading %d x %d flow file in .flo format\" % (h, w))\n        flow = np.ones((h[0],w[0],3))\n        data2d = np.fromfile(f, np.float32, count=2 * w[0] * h[0])\n        # reshape data into 3D array (columns, rows, channels)\n        data2d = np.resize(data2d, (h[0], w[0], 2))\n        flow[:,:,:2] = data2d\n    f.close()\n    return flow\n\n\ndef read_png_file(flow_file):\n    \"\"\"\n    Read from KITTI .png file\n    :param flow_file: name of the flow file\n    :return: optical flow data in matrix\n    \"\"\"\n    flow = cv2.imread(flow_file,-1)[:,:,::-1].astype(np.float64)\n #   flow_object = png.Reader(filename=flow_file)\n #   flow_direct = flow_object.asDirect()\n #   flow_data = list(flow_direct[2])\n #   (w, h) = flow_direct[3]['size']\n #   #print(\"Reading %d x %d flow file in .png format\" % (h, w))\n #   flow = np.zeros((h, w, 3), dtype=np.float64)\n #   for i in range(len(flow_data)):\n #       flow[i, :, 0] = flow_data[i][0::3]\n #       flow[i, :, 1] = flow_data[i][1::3]\n #       flow[i, :, 2] = flow_data[i][2::3]\n\n    invalid_idx = (flow[:, :, 2] == 0)\n    flow[:, :, 0:2] = (flow[:, :, 0:2] - 2 ** 15) / 64.0\n    flow[invalid_idx, 0] = 0\n    flow[invalid_idx, 1] = 0\n    return flow\n\n\ndef read_pfm_file(flow_file):\n    \"\"\"\n    Read from .pfm file\n    :param flow_file: name of the flow file\n    :return: optical flow data in matrix\n    \"\"\"\n    (data, scale) = readPFM(flow_file)\n    return data \n\n\n# fast resample layer\ndef resample(img, sz):\n    \"\"\"\n    img: flow map to be resampled\n    sz: new flow map size. Must be [height,weight]\n    \"\"\"\n    original_image_size = img.shape\n    in_height = img.shape[0]\n    in_width = img.shape[1]\n    out_height = sz[0]\n    out_width = sz[1]\n    out_flow = np.zeros((out_height, out_width, 2))\n    # find scale\n    height_scale =  float(in_height) / float(out_height)\n    width_scale =  float(in_width) / float(out_width)\n\n    [x,y] = np.meshgrid(range(out_width), range(out_height))\n    xx = x * width_scale\n    yy = y * height_scale\n    x0 = np.floor(xx).astype(np.int32)\n    x1 = x0 + 1\n    y0 = np.floor(yy).astype(np.int32)\n    y1 = y0 + 1\n\n    x0 = np.clip(x0,0,in_width-1)\n    x1 = np.clip(x1,0,in_width-1)\n    y0 = np.clip(y0,0,in_height-1)\n    y1 = np.clip(y1,0,in_height-1)\n\n    Ia = img[y0,x0,:]\n    Ib = img[y1,x0,:]\n    Ic = img[y0,x1,:]\n    Id = img[y1,x1,:]\n\n    wa = (y1-yy) * (x1-xx)\n    wb = (yy-y0) * (x1-xx)\n    wc = (y1-yy) * (xx-x0)\n    wd = (yy-y0) * (xx-x0)\n    out_flow[:,:,0] = (Ia[:,:,0]*wa + Ib[:,:,0]*wb + Ic[:,:,0]*wc + Id[:,:,0]*wd) * out_width / in_width\n    out_flow[:,:,1] = (Ia[:,:,1]*wa + Ib[:,:,1]*wb + Ic[:,:,1]*wc + Id[:,:,1]*wd) * out_height / in_height\n\n    return out_flow\n\n"
  },
  {
    "path": "Datasets/segmask_gt.py",
    "content": "\"\"\"\n# ==============================\n# segmask_gt.py\n# library to generate groundtruth \n# segmentation mask given flow and\n# disparity change\n# (Adapted from code for rigidmask)\n# Author: Shihao Shen\n# Date: 14th Sep 2022\n# ==============================\n\"\"\"\n\nimport argparse\nimport os\nimport os.path\nimport glob\nimport numpy as np\nimport cv2\nfrom PIL import Image\nfrom flowlib import read_flow, readPFM, flow_to_image\n\ndef dataloader(filepath, fpass='frames_cleanpass', level=6):\n    iml0 = []\n    iml1 = []\n    flowl0 = []\n    disp0 = []\n    dispc = []\n    calib = []\n    level_stars = '/*'*level\n    candidate_pool = glob.glob('%s/optical_flow%s'%(filepath,level_stars))\n    for flow_path in sorted(candidate_pool):\n        # if 'TEST' in flow_path: continue\n        if 'flower_storm_x2/into_future/right/OpticalFlowIntoFuture_0023_R.pfm' in flow_path:\n            print('Skipping %s' % flow_path)\n            continue\n        if 'flower_storm_x2/into_future/left/OpticalFlowIntoFuture_0023_L.pfm' in flow_path:\n            print('Skipping %s' % flow_path)\n            continue\n        if 'flower_storm_augmented0_x2/into_future/right/OpticalFlowIntoFuture_0023_R.pfm' in flow_path:\n            print('Skipping %s' % flow_path)\n            continue\n        if 'flower_storm_augmented0_x2/into_future/left/OpticalFlowIntoFuture_0023_L.pfm' in flow_path:\n            print('Skipping %s' % flow_path)\n            continue\n        # if 'FlyingThings' in flow_path and '_0014_' in flow_path:\n        #     print('Skipping %s' % flow_path)\n        #     continue\n        # if 'FlyingThings' in flow_path and '_0015_' in flow_path:\n        #     print('Skipping %s' % flow_path)\n        #     continue\n        idd = flow_path.split('/')[-1].split('_')[-2]\n        if 'into_future' in flow_path:\n            idd_p1 = '%04d'%(int(idd)+1)\n        else:\n            idd_p1 = '%04d'%(int(idd)-1)\n        if os.path.exists(flow_path.replace(idd,idd_p1)): \n            d0_path = flow_path.replace('/into_future/','/').replace('/into_past/','/').replace('optical_flow','disparity')\n            d0_path = '%s/%s.pfm'%(d0_path.rsplit('/',1)[0],idd)\n            dc_path = flow_path.replace('optical_flow','disparity_change')\n            dc_path = '%s/%s.pfm'%(dc_path.rsplit('/',1)[0],idd)\n            im_path = flow_path.replace('/into_future/','/').replace('/into_past/','/').replace('optical_flow',fpass)\n            im0_path = '%s/%s.png'%(im_path.rsplit('/',1)[0],idd)\n            im1_path = '%s/%s.png'%(im_path.rsplit('/',1)[0],idd_p1)\n\n            # This will skip any sequence that contains less than 10 poses in camera_data.txt\n            with open('%s/camera_data.txt'%(im0_path.replace(fpass,'camera_data').rsplit('/',2)[0]),'r') as f:\n               if 'FlyingThings' in flow_path and len(f.readlines())!=40: \n                   print('Skipping %s' % flow_path)\n                   continue\n\n            iml0.append(im0_path)\n            iml1.append(im1_path)\n            flowl0.append(flow_path)\n            disp0.append(d0_path)\n            dispc.append(dc_path)\n            calib.append('%s/camera_data.txt'%(im0_path.replace(fpass,'camera_data').rsplit('/',2)[0]))\n    return iml0, iml1, flowl0, disp0, dispc, calib\n\ndef default_loader(path):\n    return Image.open(path).convert('RGB')\n\ndef flow_loader(path):\n    if '.pfm' in path:\n        data =  readPFM(path)[0]\n        data[:,:,2] = 1\n        return data\n    else:\n        return read_flow(path)\n\ndef load_exts(cam_file):\n    with open(cam_file, 'r') as f:\n        lines = f.readlines()\n\n    l_exts = []\n    r_exts = []\n    for l in lines:\n        if 'L ' in l:\n            l_exts.append(np.asarray([float(i) for i in l[2:].strip().split(' ')]).reshape(4,4))\n        if 'R ' in l:\n            r_exts.append(np.asarray([float(i) for i in l[2:].strip().split(' ')]).reshape(4,4))\n    return l_exts,r_exts        \n\ndef disparity_loader(path):\n    if '.png' in path:\n        data = Image.open(path)\n        data = np.ascontiguousarray(data,dtype=np.float32)/256\n        return data\n    else:    \n        return readPFM(path)[0]\n\n# triangulation\ndef triangulation(disp, xcoord, ycoord, bl=1, fl = 450, cx = 479.5, cy = 269.5):\n    depth = bl*fl / disp # 450px->15mm focal length\n    X = (xcoord - cx) * depth / fl\n    Y = (ycoord - cy) * depth / fl\n    Z = depth\n    P = np.concatenate((X[np.newaxis],Y[np.newaxis],Z[np.newaxis]),0).reshape(3,-1)\n    P = np.concatenate((P,np.ones((1,P.shape[-1]))),0)\n    return P\n\ndef exp_loader(index, iml0s, iml1s, flowl0s, disp0s=None, dispcs=None, calibs=None):\n    '''\n    index: index of the frame in the file lists below\n    iml0s: a file list of the first frames\n    iml1s: a file list of the second frames\n    flowl0s: a file list of the optical w.r.t. iml0s\n    disp0s: a file list of the disparity w.r.t. iml0s\n    dispcs: a file list of the disparity change w.r.t. disp0s\n    calibs: a file list of the camera extrinsics\n    '''\n    iml0 = iml0s[index]\n    iml1 = iml1s[index]\n    flowl0 = flowl0s[index]\n    \n    iml0 = default_loader(iml0)\n    iml1 = default_loader(iml1)\n\n    flowl0 = flow_loader(flowl0)\n    flowl0[:,:,-1][flowl0[:,:,0] == np.inf] = 0 \n    flowl0[:,:,0][~flowl0[:,:,2].astype(bool)] = 0\n    flowl0[:,:,1][~flowl0[:,:,2].astype(bool)] = 0\n    flowl0 = np.ascontiguousarray(flowl0, dtype=np.float32)\n    flowl0[np.isnan(flowl0)] = 1e6\n    \n    bl = 1\n    if '15mm_' in calibs[index]: \n        fl = 450\n    else:\n        fl = 1050\n    cx = 479.5\n    cy = 269.5\n    intr = [[fl],[cx],[cy],[bl],[1],[0],[0],[1],[0],[0]]\n\n    d1 = np.abs(disparity_loader(disp0s[index]))\n    d2 = np.abs(disparity_loader(dispcs[index]) + d1)\n    \n    flowl0[:,:,2] = np.logical_and(np.logical_and(flowl0[:,:,2] == 1, d1 != 0), d2 != 0).astype(float)\n    \n    shape = d1.shape\n    mesh = np.meshgrid(range(shape[1]), range(shape[0]))\n    xcoord = mesh[0].astype(float)\n    ycoord = mesh[1].astype(float)\n\n    # triangulation in two frames\n    P0 = triangulation(d1, xcoord, ycoord, bl=bl, fl=fl, cx=cx, cy=cy)\n    P1 = triangulation(d2, xcoord + flowl0[:,:,0], ycoord + flowl0[:,:,1], bl=bl, fl=fl, cx=cx, cy=cy)\n    depth0 = P0[2]\n    depth1 = P1[2]\n\n    depth0 = depth0.reshape(shape).astype(np.float32)\n    flow3d = (P1-P0)[:3].reshape((3,)+shape).transpose((1,2,0))\n\n    fid = int(flowl0s[index].split('/')[-1].split('_')[1])\n    with open(calibs[index], 'r') as f:\n        fid = fid - int(f.readline().split(' ')[-1])\n    l_exts, r_exts= load_exts(calibs[index])\n    if '/right/' in iml0s[index]:\n        exts = r_exts\n    else:\n        exts = l_exts\n\n    if '/into_future/' in flowl0s[index]:\n        if (fid + 1) > len(exts) - 1: print(flowl0s[index])\n        if (fid) > len(exts) - 1: print(flowl0s[index])\n        ext1 = exts[fid+1]\n        ext0 = exts[fid]\n    else:\n        if (fid - 1) > len(exts) - 1: print(flowl0s[index])\n        if (fid) > len(exts) - 1: print(flowl0s[index])\n        ext1 = exts[fid-1]\n        ext0 = exts[fid]\n    camT = np.eye(4); camT[1,1] = -1; camT[2,2] = -1  # Sceneflow uses Blender's coordinate system\n    RT01 = camT.dot(np.linalg.inv(ext0)).dot(ext1).dot(camT)  # ext is from camera space to world space\n    \n    rect_flow3d = (RT01[:3,:3].dot(P1[:3])-P0[:3]).reshape((3,)+shape).transpose((1,2,0))  # rectified scene flow\n\n    depthflow = np.concatenate((depth0[:,:,np.newaxis], rect_flow3d, flow3d), 2)\n    RT01 = np.concatenate((cv2.Rodrigues(RT01[:3,:3])[0][:,0], RT01[:3,-1])).astype(np.float32)\n\n    # object mask\n    fnum = int(iml0s[index].split('/')[-1].split('.png')[0])\n    obj_fname = '%s/%04d.pfm'%(flowl0s[index].replace('/optical_flow','object_index').replace('into_past/','/').replace('into_future/','/').rsplit('/',1)[0],fnum)\n    obj_idx = disparity_loader(obj_fname)\n    \n    depthflow = np.concatenate((depthflow, obj_idx[:,:,np.newaxis]), 2)\n    # depthflow dimension: H x W x 8 (depth=1 + rectified_flow3d=3 + flow3d=3 + object_segmentation=1)\n\n    iml1 = np.asarray(iml1)\n    iml0 = np.asarray(iml0)\n    \n    return iml0, iml1, flowl0, depthflow, intr, RT01\n\n\ndef motionmask(flowl0, depthflow, RT01):\n    '''\n    flowl0: optical flow. [H, W, 3]\n    depthflow: a concatenation of depth, rectified scene flow, scene flow, and object segmentation. [H, W, 8]\n    RT01: camera motion from the future frame to the current frame. [6, ]\n    '''\n    valid_mask = (flowl0[:,:,2] == 1) & (depthflow[:,:,0] < 100) & (depthflow[:,:,0] > 0.01)  # valid flow & valid depth\n    Tglobal_gt = -RT01[3:, np.newaxis, np.newaxis]  # background translation\n    Tlocal_gt = depthflow[:,:,1:4].transpose(2, 0, 1)   # point translation (after removing rotation)\n    m3d_gt = np.linalg.norm(Tlocal_gt - Tglobal_gt, 2, 0)       # abs. motion\n    fgmask_gt = m3d_gt * 100 > 1\n    fgmask_gt[~valid_mask] = False\n\n    return fgmask_gt\n\n\nif __name__ == '__main__':\n    parser = argparse.ArgumentParser(description='segmask_gt_generation')\n    parser.add_argument('--database',\n                    help='path to the database (required)')\n    parser.add_argument('--debug', action='store_true', default=False,\n                    help='generate visualization')\n    parser.add_argument('--frames_pass', default='frames_cleanpass', \n                    help='which pass to use, either clean or final')\n    parser.add_argument('--dataset', \n                    help='choose from FlyingThings3D, Driving, Monkaa')\n    args = parser.parse_args()\n\n    if args.debug:\n        os.makedirs('%s/%s/results_viz' % (args.database, args.dataset), exist_ok=True)\n    \n    if args.dataset == 'Monkaa':\n        level = 4\n    else:\n        level = 6\n    iml0s, iml1s, flowl0s, disp0s, dispcs, calibs = dataloader('%s/%s/' % (args.database, args.dataset), \n                                                                level=level, fpass=args.frames_pass)\n    \n    print(\"Generating %s masks...\" % len(flowl0s))\n    for i in range(len(iml0s)):\n        idd = flowl0s[i].split('/')[-1].split('_')[-2]\n        mask_fn = '%s/%s.npy' % (os.path.dirname(flowl0s[i]).replace('optical_flow', 'rigidmask'), idd)\n        if os.path.exists(mask_fn):\n            print(i)\n            continue\n        os.makedirs(os.path.dirname(mask_fn), exist_ok=True)\n\n        iml0, iml1, flowl0, depthflow, intr, RT01 = exp_loader(i, iml0s, iml1s, flowl0s, disp0s, dispcs, calibs)\n        fgmask = motionmask(flowl0, depthflow, RT01)\n        np.save(mask_fn, fgmask)\n\n        if args.debug:\n            if args.dataset == 'Driving' and 'rigidmask/15mm_focallength/scene_forwards/fast/left' not in mask_fn:\n                continue\n            elif args.dataset == 'Monkaa' and 'rigidmask/eating_camera2_x2/left' not in mask_fn:\n                continue\n            elif args.dataset == 'FlyingThings3D' and not ('rigidmask/TEST/A' in mask_fn and 'into_future/left' in mask_fn):\n                continue\n            print(\"Visualizing %s\" % mask_fn)\n            flowl0viz = flow_to_image(flowl0)\n            maskviz = np.stack((fgmask * 255.0, )*3, axis=-1).astype(np.uint8)\n            inputs = np.concatenate([iml0, flowl0viz, maskviz], axis=1)\n            cv2.imwrite('%s/%s/results_viz/%s.png' % (args.database, args.dataset, str(i).zfill(5)), cv2.cvtColor(inputs, cv2.COLOR_RGB2BGR))"
  },
  {
    "path": "Datasets/tartanTrajFlowDataset.py",
    "content": "\"\"\"\n# ==============================\n# tartanTrajFlowDataset.py\n# library for DytanVO data I/O\n# Author: Wenshan Wang, Shihao Shen\n# Date: 3rd Jan 2023\n# ==============================\n\"\"\"\nimport numpy as np\nimport cv2\nfrom torch.utils.data import Dataset, DataLoader\nfrom os import listdir\nfrom evaluator.transformation import pos_quats2SEs, pose2motion, SEs2ses\nfrom .utils import make_intrinsics_layer\n\nclass TrajFolderDataset(Dataset):\n    \"\"\"scene flow synthetic dataset. \"\"\"\n\n    def __init__(self, imgfolder, transform = None, \n                    focalx = 320.0, focaly = 320.0, centerx = 320.0, centery = 240.0):\n        \n        files = listdir(imgfolder)\n        self.rgbfiles = [(imgfolder +'/'+ ff) for ff in files if (ff.endswith('.png') or ff.endswith('.jpg'))]\n        self.rgbfiles.sort()\n        self.imgfolder = imgfolder\n\n        print('Find {} image files in {}'.format(len(self.rgbfiles), imgfolder))\n\n        self.N = len(self.rgbfiles) - 1\n\n        # self.N = len(self.lines)\n        self.transform = transform\n        self.focalx = focalx\n        self.focaly = focaly\n        self.centerx = centerx\n        self.centery = centery\n\n    def __len__(self):\n        return self.N\n\n    def __getitem__(self, idx):\n        imgfile1 = self.rgbfiles[idx].strip()\n        imgfile2 = self.rgbfiles[idx+1].strip()\n        img1 = cv2.imread(imgfile1)\n        img2 = cv2.imread(imgfile2)\n\n        res = {'img1': img1, 'img2': img2}\n\n        h, w, _ = img1.shape\n        intrinsicLayer = make_intrinsics_layer(w, h, self.focalx, self.focaly, self.centerx, self.centery)\n        res['intrinsic'] = intrinsicLayer\n\n        if self.transform:\n            res = self.transform(res)\n\n        res['img1_raw'] = img1\n        res['img2_raw'] = img2\n\n        return res"
  },
  {
    "path": "Datasets/util_flow.py",
    "content": "\"\"\"\n# ==============================\n# util_flow.py\n# library for optical flow processing\n# Author: Gengshan Yang\n# Date: 10th Feb 2021\n# ==============================\n\"\"\"\nimport math\nimport png\nimport struct\nimport array\nimport numpy as np\nimport cv2\nimport pdb\n\nfrom io import *\n\nUNKNOWN_FLOW_THRESH = 1e9;\nUNKNOWN_FLOW = 1e10;\n\n# Middlebury checks\nTAG_STRING = 'PIEH'    # use this when WRITING the file\nTAG_FLOAT = 202021.25  # check for this when READING the file\n\ndef readPFM(file):\n    import re\n    file = open(file, 'rb')\n\n    color = None\n    width = None\n    height = None\n    scale = None\n    endian = None\n\n    header = file.readline().rstrip()\n    if header == b'PF':\n        color = True\n    elif header == b'Pf':\n        color = False\n    else:\n        raise Exception('Not a PFM file.')\n\n    dim_match = re.match(b'^(\\d+)\\s(\\d+)\\s$', file.readline())\n    if dim_match:\n        width, height = map(int, dim_match.groups())\n    else:\n        raise Exception('Malformed PFM header.')\n\n    scale = float(file.readline().rstrip())\n    if scale < 0: # little-endian\n        endian = '<'\n        scale = -scale\n    else:\n        endian = '>' # big-endian\n\n    data = np.fromfile(file, endian + 'f')\n    shape = (height, width, 3) if color else (height, width)\n\n    data = np.reshape(data, shape)\n    data = np.flipud(data)\n    return data, scale\n\n\ndef save_pfm(file, image, scale = 1):\n  import sys\n  color = None\n\n  if image.dtype.name != 'float32':\n    raise Exception('Image dtype must be float32.')\n\n  if len(image.shape) == 3 and image.shape[2] == 3: # color image\n    color = True\n  elif len(image.shape) == 2 or len(image.shape) == 3 and image.shape[2] == 1: # greyscale\n    color = False\n  else:\n    raise Exception('Image must have H x W x 3, H x W x 1 or H x W dimensions.')\n\n  file.write('PF\\n' if color else 'Pf\\n')\n  file.write('%d %d\\n' % (image.shape[1], image.shape[0]))\n\n  endian = image.dtype.byteorder\n\n  if endian == '<' or endian == '=' and sys.byteorder == 'little':\n    scale = -scale\n\n  file.write('%f\\n' % scale)\n\n  image.tofile(file)\n\n\ndef ReadMiddleburyFloFile(path):\n    \"\"\" Read .FLO file as specified by Middlebury.\n\n    Returns tuple (width, height, u, v, mask), where u, v, mask are flat\n    arrays of values.\n    \"\"\"\n\n    with open(path, 'rb') as fil:\n        tag = struct.unpack('f', fil.read(4))[0]\n        width = struct.unpack('i', fil.read(4))[0]\n        height = struct.unpack('i', fil.read(4))[0]\n\n        assert tag == TAG_FLOAT\n        \n        #data = np.fromfile(path, dtype=np.float, count=-1)\n        #data = data[3:]\n\n        fmt = 'f' * width*height*2\n        data = struct.unpack(fmt, fil.read(4*width*height*2))\n\n        u = data[::2]\n        v = data[1::2]\n\n        mask = map(lambda x,y: abs(x)<UNKNOWN_FLOW_THRESH and abs(y) < UNKNOWN_FLOW_THRESH, u, v)\n        mask = list(mask)\n        u_masked = map(lambda x,y: x if y else 0, u, mask)\n        v_masked = map(lambda x,y: x if y else 0, v, mask)\n\n    return width, height, list(u_masked), list(v_masked), list(mask)\n\ndef ReadKittiPngFile(path):\n    \"\"\" Read 16-bit .PNG file as specified by KITTI-2015 (flow).\n\n    Returns a tuple, (width, height, u, v, mask), where u, v, mask\n    are flat arrays of values.\n    \"\"\"\n    # Read .png file.\n    png_reader = png.Reader(path)\n    data = png_reader.read()\n    if data[3]['bitdepth'] != 16:\n        raise Exception('bitdepth of ' + path + ' is not 16')\n\n    width = data[0]\n    height = data[1]\n\n    # Get list of rows.\n    rows = list(data[2])\n\n    u = array.array('f', [0]) * width*height\n    v = array.array('f', [0]) * width*height\n    mask = array.array('f', [0]) * width*height\n\n    for y, row in enumerate(rows):\n        for x in range(width):\n            ind = width*y+x\n            u[ind] = (row[3*x] - 2**15) / 64.0\n            v[ind] = (row[3*x+1] - 2**15) / 64.0\n            mask[ind] = row[3*x+2]\n\n            # if mask[ind] > 0:\n            #     print(u[ind], v[ind], mask[ind], row[3*x], row[3*x+1], row[3*x+2])\n\n    #png_reader.close()\n\n    return (width, height, u, v, mask)\n\n\ndef WriteMiddleburyFloFile(path, width, height, u, v, mask=None):\n    \"\"\" Write .FLO file as specified by Middlebury.\n    \"\"\"\n\n    if mask is not None:\n        u_masked = map(lambda x,y: x if y else UNKNOWN_FLOW, u, mask)\n        v_masked = map(lambda x,y: x if y else UNKNOWN_FLOW, v, mask)\n    else:\n        u_masked = u\n        v_masked = v\n\n    fmt = 'f' * width*height*2\n    # Interleave lists\n    data = [x for t in zip(u_masked,v_masked) for x in t]\n\n    with open(path, 'wb') as fil:\n        fil.write(str.encode(TAG_STRING))\n        fil.write(struct.pack('i', width))\n        fil.write(struct.pack('i', height))\n        fil.write(struct.pack(fmt, *data))\n\n\ndef write_flow(path,flow):\n    \n    invalid_idx = (flow[:, :, 2] == 0)\n    flow[:, :, 0:2] = flow[:, :, 0:2]*64.+ 2 ** 15\n    flow[invalid_idx, 0] = 0\n    flow[invalid_idx, 1] = 0\n\n    flow = flow.astype(np.uint16)\n    flow = cv2.imwrite(path, flow[:,:,::-1])\n\n    #WriteKittiPngFile(path,\n    #     flow.shape[1], flow.shape[0], flow[:,:,0].flatten(), \n    #    flow[:,:,1].flatten(), flow[:,:,2].flatten())\n    \n\n\ndef WriteKittiPngFile(path, width, height, u, v, mask=None):\n    \"\"\" Write 16-bit .PNG file as specified by KITTI-2015 (flow).\n\n    u, v are lists of float values\n    mask is a list of floats, denoting the *valid* pixels.\n    \"\"\"\n\n    data = array.array('H',[0])*width*height*3\n\n    for i,(u_,v_,mask_) in enumerate(zip(u,v,mask)):\n        data[3*i] = int(u_*64.0+2**15)\n        data[3*i+1] = int(v_*64.0+2**15)\n        data[3*i+2] = int(mask_)\n\n        # if mask_ > 0:\n        #     print(data[3*i], data[3*i+1],data[3*i+2])\n\n    with open(path, 'wb') as png_file:\n        png_writer = png.Writer(width=width, height=height, bitdepth=16, compression=3, greyscale=False)\n        png_writer.write_array(png_file, data)\n\n\ndef ConvertMiddleburyFloToKittiPng(src_path, dest_path):\n    width, height, u, v, mask = ReadMiddleburyFloFile(src_path)\n    WriteKittiPngFile(dest_path, width, height, u, v, mask=mask)\n\ndef ConvertKittiPngToMiddleburyFlo(src_path, dest_path):\n    width, height, u, v, mask = ReadKittiPngFile(src_path)\n    WriteMiddleburyFloFile(dest_path, width, height, u, v, mask=mask)\n\n\ndef ParseFilenameKitti(filename):\n    # Parse kitti filename (seq_frameno.xx),\n    # return seq, frameno, ext.\n    # Be aware that seq might contain the dataset name (if contained as prefix)\n    ext = filename[filename.rfind('.'):]\n    frameno = filename[filename.rfind('_')+1:filename.rfind('.')]\n    frameno = int(frameno)\n    seq = filename[:filename.rfind('_')]\n    return seq, frameno, ext\n\n\ndef read_calib_file(filepath):\n    \"\"\"Read in a calibration file and parse into a dictionary.\"\"\"\n    data = {}\n\n    with open(filepath, 'r') as f:\n        for line in f.readlines():\n            key, value = line.split(':', 1)\n            # The only non-float values in these files are dates, which\n            # we don't care about anyway\n            try:\n                data[key] = np.array([float(x) for x in value.split()])\n            except ValueError:\n                pass\n\n    return data\n\ndef load_calib_cam_to_cam(cam_to_cam_file):\n    # We'll return the camera calibration as a dictionary\n    data = {}\n\n    # Load and parse the cam-to-cam calibration data\n    filedata = read_calib_file(cam_to_cam_file)\n\n    # Create 3x4 projection matrices\n    P_rect_00 = np.reshape(filedata['P_rect_00'], (3, 4))\n    P_rect_10 = np.reshape(filedata['P_rect_01'], (3, 4))\n    P_rect_20 = np.reshape(filedata['P_rect_02'], (3, 4))\n    P_rect_30 = np.reshape(filedata['P_rect_03'], (3, 4))\n\n    # Compute the camera intrinsics\n    data['K_cam0'] = P_rect_00[0:3, 0:3]\n    data['K_cam1'] = P_rect_10[0:3, 0:3]\n    data['K_cam2'] = P_rect_20[0:3, 0:3]\n    data['K_cam3'] = P_rect_30[0:3, 0:3]\n\n    data['b00'] = P_rect_00[0, 3] / P_rect_00[0, 0]\n    data['b10'] = P_rect_10[0, 3] / P_rect_10[0, 0]\n    data['b20'] = P_rect_20[0, 3] / P_rect_20[0, 0]\n    data['b30'] = P_rect_30[0, 3] / P_rect_30[0, 0]\n\n    return data"
  },
  {
    "path": "Datasets/utils.py",
    "content": "\"\"\"\n# ==============================\n# utils.py\n# misc library for DytanVO\n# Author: Wenshan Wang, Shihao Shen\n# Date: 3rd Jan 2023\n# ==============================\n\"\"\"\n\nfrom __future__ import division\nimport torch\nimport math\nimport random\nimport numpy as np\nimport numbers\nimport cv2\nimport matplotlib.pyplot as plt\nimport os\nfrom scipy.spatial.transform import Rotation as R\n\nif ( not ( \"DISPLAY\" in os.environ ) ):\n    plt.switch_backend('agg')\n    print(\"Environment variable DISPLAY is not present in the system.\")\n    print(\"Switch the backend of matplotlib to agg.\")\n\nimport time\n# ===== general functions =====\n\nclass Compose(object):\n    \"\"\"Composes several transforms together.\n\n    Args:\n        transforms (List[Transform]): list of transforms to compose.\n\n    Example:\n        >>> transforms.Compose([\n        >>>     transforms.CenterCrop(10),\n        >>>     transforms.ToTensor(),\n        >>> ])\n    \"\"\"\n\n    def __init__(self, transforms):\n        self.transforms = transforms\n\n    def __call__(self, img):\n        for t in self.transforms:\n            img = t(img)\n        return img\n\n\nclass DownscaleFlow(object):\n    \"\"\"\n    Scale the flow and mask to a fixed size\n\n    \"\"\"\n    def __init__(self, scale=4):\n        '''\n        size: output frame size, this should be NO LARGER than the input frame size! \n        '''\n        self.downscale = 1.0/scale\n\n    def __call__(self, sample): \n        if self.downscale!=1 and 'flow' in sample :\n            sample['flow'] = cv2.resize(sample['flow'], \n                (0, 0), fx=self.downscale, fy=self.downscale, interpolation=cv2.INTER_LINEAR)\n\n        if self.downscale!=1 and 'intrinsic' in sample :\n            sample['intrinsic'] = cv2.resize(sample['intrinsic'], \n                (0, 0), fx=self.downscale, fy=self.downscale, interpolation=cv2.INTER_LINEAR)\n\n        if self.downscale!=1 and 'fmask' in sample :\n            sample['fmask'] = cv2.resize(sample['fmask'],\n                (0, 0), fx=self.downscale, fy=self.downscale, interpolation=cv2.INTER_LINEAR)\n        return sample\n\nclass CropCenter(object):\n    \"\"\"Crops a sample of data (tuple) at center\n    if the image size is not large enough, it will be first resized with fixed ratio\n    \"\"\"\n\n    def __init__(self, size):\n        if isinstance(size, numbers.Number):\n            self.size = (int(size), int(size))\n        else:\n            self.size = size\n\n    def __call__(self, sample):\n        kks = list(sample.keys())\n        th, tw = self.size\n        hh, ww = sample[kks[0]].shape[0], sample[kks[0]].shape[1]\n        if ww == tw and hh == th:\n            return sample\n\n        # resize the image if the image size is smaller than the target size\n        scale_h = max(1, float(th)/hh)\n        scale_w = max(1, float(tw)/ww)\n        \n        if scale_h>1 or scale_w>1:\n            w = int(round(ww * scale_w)) # w after resize\n            h = int(round(hh * scale_h)) # h after resize\n        else:\n            w, h = ww, hh\n\n        if scale_h != 1. or scale_w != 1.: # resize the data\n            resizedata = ResizeData(size=(h, w))\n            sample = resizedata(sample)\n\n        x1 = int((w-tw)/2)\n        y1 = int((h-th)/2)\n\n        for kk in kks:\n            if sample[kk] is None:\n                continue\n            img = sample[kk]\n            sample[kk] = img[y1:y1+th,x1:x1+tw,...]\n\n        return sample\n\nclass ResizeData(object):\n    \"\"\"Resize the data in a dict\n    \"\"\"\n\n    def __init__(self, size):\n        if isinstance(size, numbers.Number):\n            self.size = (int(size), int(size))\n        else:\n            self.size = size\n\n    def __call__(self, sample):\n        kks = list(sample.keys())\n        th, tw = self.size\n        h, w = sample[kks[0]].shape[0], sample[kks[0]].shape[1]\n        if w == tw and h == th:\n            return sample\n        scale_w = float(tw)/w\n        scale_h = float(th)/h\n\n        for kk in kks:\n            if sample[kk] is None:\n                continue\n            sample[kk] = cv2.resize(sample[kk], (tw,th), interpolation=cv2.INTER_LINEAR)\n\n        if 'flow' in sample:\n            sample['flow'][...,0] = sample['flow'][...,0] * scale_w\n            sample['flow'][...,1] = sample['flow'][...,1] * scale_h\n\n        return sample\n\nclass ToTensor(object):\n    def __call__(self, sample):\n        kks = list(sample)\n\n        for kk in kks:\n            data = sample[kk]\n            data = data.astype(np.float32) \n            if len(data.shape) == 3: # transpose image-like data\n                data = data.transpose(2,0,1)\n            elif len(data.shape) == 2:\n                data = data.reshape((1,)+data.shape)  # add a dummy channel\n                \n            if len(data.shape) == 3 and data.shape[0]==3: # normalization of rgb images\n                data = data/255.0\n\n            sample[kk] = torch.from_numpy(data.copy()) # copy to make memory continuous\n\n        return sample\n\ndef tensor2img(tensImg,mean,std):\n    \"\"\"\n    convert a tensor a numpy array, for visualization\n    \"\"\"\n    # undo normalize\n    for t, m, s in zip(tensImg, mean, std):\n        t.mul_(s).add_(m) \n    tensImg = tensImg * float(255)\n    # undo transpose\n    tensImg = (tensImg.numpy().transpose(1,2,0)).astype(np.uint8)\n    return tensImg\n\ndef bilinear_interpolate(img, h, w):\n    # assert round(h)>=0 and round(h)<img.shape[0]\n    # assert round(w)>=0 and round(w)<img.shape[1]\n\n    h0 = int(math.floor(h))\n    h1 = h0 + 1\n    w0 = int(math.floor(w))\n    w1 = w0 + 1\n\n    a = h - h0 \n    b = w - w0\n\n    h0 = max(h0, 0)\n    w0 = max(w0, 0)\n    h1 = min(h1, img.shape[0]-1)\n    w1 = min(w1, img.shape[1]-1)\n\n    A = img[h0,w0,:]\n    B = img[h1,w0,:]\n    C = img[h0,w1,:]\n    D = img[h1,w1,:]\n\n    res = (1-a)*(1-b)*A + a*(1-b)*B + (1-a)*b*C + a*b*D\n\n    return res \n\ndef calculate_angle_distance_from_du_dv(du, dv, flagDegree=False):\n    a = np.arctan2( dv, du )\n\n    angleShift = np.pi\n\n    if ( True == flagDegree ):\n        a = a / np.pi * 180\n        angleShift = 180\n        # print(\"Convert angle from radian to degree as demanded by the input file.\")\n\n    d = np.sqrt( du * du + dv * dv )\n\n    return a, d, angleShift\n\ndef visflow(flownp, maxF=500.0, n=8, mask=None, hueMax=179, angShift=0.0): \n    \"\"\"\n    Show a optical flow field as the KITTI dataset does.\n    Some parts of this function is the transform of the original MATLAB code flow_to_color.m.\n    \"\"\"\n\n    ang, mag, _ = calculate_angle_distance_from_du_dv( flownp[:, :, 0], flownp[:, :, 1], flagDegree=False )\n\n    # Use Hue, Saturation, Value colour model \n    hsv = np.zeros( ( ang.shape[0], ang.shape[1], 3 ) , dtype=np.float32)\n\n    am = ang < 0\n    ang[am] = ang[am] + np.pi * 2\n\n    hsv[ :, :, 0 ] = np.remainder( ( ang + angShift ) / (2*np.pi), 1 )\n    hsv[ :, :, 1 ] = mag / maxF * n\n    hsv[ :, :, 2 ] = (n - hsv[:, :, 1])/n\n\n    hsv[:, :, 0] = np.clip( hsv[:, :, 0], 0, 1 ) * hueMax\n    hsv[:, :, 1:3] = np.clip( hsv[:, :, 1:3], 0, 1 ) * 255\n    hsv = hsv.astype(np.uint8)\n\n    bgr = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)\n\n    if ( mask is not None ):\n        mask = mask != 255\n        bgr[mask] = np.array([0, 0 ,0], dtype=np.uint8)\n\n    return bgr\n\n\ndef dataset_intrinsics(dataset='tartanair', is_15mm=False):\n    if dataset == 'kitti':\n        focalx, focaly, centerx, centery = 707.0912, 707.0912, 601.8873, 183.1104\n        baseline = None  # to be determined using load_kitti_intrinsics\n    elif dataset == 'airdos':\n        focalx, focaly, centerx, centery = 772.54834, 772.54834, 320.0, 180.0\n        baseline = 1\n    elif dataset == 'rs_d435':\n        focalx, focaly, centerx, centery = 384.5080871582031, 384.5080871582031, 316.88897705078125, 240.05723571777344\n        baseline = 0.05\n    elif dataset == 'sceneflow':\n        focalx, focaly, centerx, centery = 1050.0, 1050.0, 479.5, 269.5\n        if is_15mm:\n            focalx = focaly = 450.0\n        baseline = 0.5\n    elif dataset == 'tartanair':\n        focalx, focaly, centerx, centery = 320.0, 320.0, 320.0, 240.0\n        baseline = 1\n    elif dataset == 'commaai':\n        focalx, focaly, centerx, centery = 910.0, 910.0, 582.0, 437.0\n        baseline = 1\n    else:\n        return None\n    return focalx, focaly, centerx, centery, baseline\n\n\n\ndef plot_traj(gtposes, estposes, vis=False, savefigname=None, title=''):\n    fig = plt.figure(figsize=(4,4))\n    cm = plt.cm.get_cmap('Spectral')\n\n    plt.subplot(111)\n    plt.plot(gtposes[:,0],gtposes[:,1], linestyle='dashed',c='k')\n    plt.plot(estposes[:, 0], estposes[:, 1],c='#ff7f0e')\n    plt.xlabel('x (m)')\n    plt.ylabel('y (m)')\n    plt.legend(['Ground Truth','TartanVO'])\n    plt.title(title)\n    if savefigname is not None:\n        plt.savefig(savefigname)\n    if vis:\n        plt.show()\n    plt.close(fig)\n\ndef make_intrinsics_layer(w, h, fx, fy, ox, oy):\n    ww, hh = np.meshgrid(range(w), range(h))\n    ww = (ww.astype(np.float32) - ox + 0.5 )/fx\n    hh = (hh.astype(np.float32) - oy + 0.5 )/fy\n    intrinsicLayer = np.stack((ww,hh)).transpose(1,2,0)\n\n    return intrinsicLayer\n\ndef load_kiiti_intrinsics(filename):\n    '''\n    load intrinsics from kitti intrinsics file\n    '''\n    data = {}\n\n    with open(filename, 'r') as f:\n        for line in f.readlines():\n            key, value = line.split(':', 1)\n            # The only non-float values in these files are dates, which\n            # we don't care about anyway\n            try:\n                data[key] = np.array([float(x) for x in value.split()])\n            except ValueError:\n                pass\n\n    P2 = np.reshape(data['P2'], (3,4))\n    P3 = np.reshape(data['P3'], (3,4))\n    focalx, focaly, centerx, centery = float(P2[0,0]), float(P2[1,1]), float(P2[0,2]), float(P2[1,2])\n    baseline = P2[0,3] / P2[0,0] - P3[0,3] / P3[0,0]\n\n    return focalx, focaly, centerx, centery, baseline\n\ndef load_sceneflow_extrinsics(filename):\n    with open(filename, 'r') as f:\n        lines = f.readlines()\n\n    l_exts = []\n    r_exts = []\n    for l in lines:\n        if 'L ' in l:\n            l_exts.append(np.asarray([float(i) for i in l[2:].strip().split(' ')]).reshape(4,4))\n        if 'R ' in l:\n            r_exts.append(np.asarray([float(i) for i in l[2:].strip().split(' ')]).reshape(4,4))\n\n    if 'into_future' in filename:\n        fids = np.arange(0, len(l_exts))\n    else:\n        fids = np.arange(len(l_exts) - 1, -1, -1)\n    \n    # assuming left camera is used by default\n    camT = np.eye(4); camT[1,1] = -1; camT[2,2] = -1  # Sceneflow uses Blender's coordinate system\n    pose_quats = []\n    pose = np.eye(4)\n    for fid in fids:\n        ext0 = l_exts[fid]\n        ext1 = l_exts[fid+1] if 'into_future' in filename else l_exts[fid-1]\n        motion = camT.dot(np.linalg.inv(ext0)).dot(ext1).dot(camT)  # ext is from camera space to world space\n        pose = pose @ motion\n        pose_quat = np.zeros(7)\n        pose_quat[3:] = R.from_matrix(pose[:3,:3]).as_quat()\n        pose_quat[:3] = pose[:3,3]\n        pose_quats.append(pose_quat)\n    \n    return pose_quats"
  },
  {
    "path": "DytanVO.py",
    "content": "# Software License Agreement (BSD License)\n#\n# Copyright (c) 2020, Shihao Shen, CMU\n# All rights reserved.\n#\n# Redistribution and use in source and binary forms, with or without\n# modification, are permitted provided that the following conditions\n# are met:\n#\n#  * Redistributions of source code must retain the above copyright\n#    notice, this list of conditions and the following disclaimer.\n#  * Redistributions in binary form must reproduce the above\n#    copyright notice, this list of conditions and the following\n#    disclaimer in the documentation and/or other materials provided\n#    with the distribution.\n#  * Neither the name of CMU nor the names of its\n#    contributors may be used to endorse or promote products derived\n#    from this software without specific prior written permission.\n#\n# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n# \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS\n# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE\n# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,\n# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,\n# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\n# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\n# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT\n# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN\n# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\n# POSSIBILITY OF SUCH DAMAGE.\n\nimport cv2\nimport torch\nimport torch.nn as nn\nimport numpy as np\nimport time\n\nnp.set_printoptions(precision=4, suppress=True, threshold=10000)\n\nfrom torch.autograd import Variable\nfrom Network.VONet import VONet\nfrom Network.rigidmask.VCNplus import SegNet, WarpModule, flow_reg\nfrom Datasets.utils import CropCenter, ResizeData\nfrom Datasets.cowmask import cow_masks\nfrom evaluator.transformation import se2SE\n\nclass DytanVO(object):\n    def __init__(self, vo_model_name, seg_model_name, image_height, image_width, is_kitti=False, flow_model_name=None, pose_model_name=None):\n        # import ipdb;ipdb.set_trace()\n        self.vonet = VONet() \n\n        # load VO model separately (flow + pose) or at once\n        if flow_model_name.endswith('.pkl') and pose_model_name.endswith('.pkl'):\n            modelname = 'models/' + flow_model_name\n            self.load_vo_model(self.vonet.flowNet, modelname)\n            modelname = 'models/' + pose_model_name\n            self.load_vo_model(self.vonet.flowPoseNet, modelname)\n        else:\n            modelname = 'models/' + vo_model_name\n            self.load_vo_model(self.vonet, modelname)\n\n        self.vonet.cuda()\n\n        self.test_count = 0\n        self.pose_norm = np.array([ 0.13,  0.13,  0.13,  0.013 ,  0.013,  0.013], dtype=np.float32) # the output scale factor\n        self.flow_norm = 20 # scale factor for flow\n\n        # load the segmentation model\n        self.testres = 1.2\n        if is_kitti:\n            maxw, maxh = [int(self.testres * 1280), int(self.testres * 384)]\n        else:\n            maxw, maxh = [int(self.testres * 1024), int(self.testres * 448)]\n        max_h = int(maxh // 64 * 64)\n        max_w = int(maxw // 64 * 64)\n        if max_h < maxh: max_h += 64\n        if max_w < maxw: max_w += 64\n        maxh = max_h\n        maxw = max_w\n        self.segnet = SegNet([1, maxw, maxh], md=[4, 4, 4, 4, 4], fac=1, exp_unc=not ('kitti' in seg_model_name))\n        segmodelname = 'models/' + seg_model_name\n        self.segnet = self.load_seg_model(self.segnet, segmodelname)\n        \n        self.segnet.cuda()\n\n        self.segnet_initialize = False\n\n        # To resize/crop segmentation mask\n        self.resizedata = ResizeData(size=(image_height,1226)) if is_kitti else None\n        self.cropdata = CropCenter((image_height, image_width))\n\n        # To transform coordinates from NED to Blender\n        Ry90 = np.array([[0,0,1,0], [0,1,0,0], [-1,0,0,0], [0,0,0,1]])\n        Rx90 = np.array([[1,0,0,0], [0,0,-1,0], [0,1,0,0], [0,0,0,1]])\n        self.camT = Rx90.dot(Ry90)\n\n        self.sigmoid = lambda x: 1/(1 + np.exp(-x))\n\n    def load_vo_model(self, model, modelname):\n        preTrainDict = torch.load(modelname)\n        model_dict = model.state_dict()\n        preTrainDictTemp = {k:v for k,v in preTrainDict.items() if k in model_dict}\n\n        if( 0 == len(preTrainDictTemp) ):\n            print(\"Does not find any module to load. Try DataParallel version.\")\n            for k, v in preTrainDict.items():\n                kk = k[7:]\n                if ( kk in model_dict ):\n                    preTrainDictTemp[kk] = v\n\n        if ( 0 == len(preTrainDictTemp) ):\n            raise Exception(\"Could not load model from %s.\" % (modelname), \"load_model\")\n\n        model_dict.update(preTrainDictTemp)\n        model.load_state_dict(model_dict)\n        print('VO Model %s loaded...' % modelname)\n        return model\n\n    def load_seg_model(self, model, modelname):\n        model = nn.DataParallel(model, device_ids=[0])\n        preTrainDict = torch.load(modelname, map_location='cpu')\n        self.mean_L = preTrainDict['mean_L']\n        self.mean_R = preTrainDict['mean_R']\n        preTrainDict['state_dict'] = {k:v for k,v in preTrainDict['state_dict'].items()}\n        model.load_state_dict(preTrainDict['state_dict'], strict=False)\n        print('Segmentation Model %s loaded...' % modelname)\n        return model\n\n    def test_batch(self, sample, intrinsics, seg_thresh, iter_num):\n        print(\"=\"*20)\n        self.test_count += 1\n        \n        # import ipdb;ipdb.set_trace()\n        img0   = sample['img1'].cuda()\n        img1   = sample['img2'].cuda()\n        intrinsic = sample['intrinsic'].cuda()  # intrinsic layer\n\n        img0_raw = sample['img1_raw'].detach().numpy().squeeze()\n        img1_raw = sample['img2_raw'].detach().numpy().squeeze()\n\n        if not self.segnet_initialize:\n            self.vonet.eval()\n            self.segnet.eval()\n            self.initialize_segnet_input(img0_raw, intrinsics)\n            self.segnet_initialize = True\n        \n        with torch.no_grad():\n            imgL_noaug, imgLR = self.transform_segnet_input(img0_raw, img1_raw)\n            flowdc = self.segnet.module.forward_VCN(imgLR)\n            \n            total_time = 0\n            start_time = time.time()\n            flow_output, _ = self.vonet([img0, img1], only_flow=True)\n            flownet_time = time.time() - start_time\n            total_time += flownet_time\n\n            print(\"Flownet time: %.2f\" % flownet_time)\n\n            seg_thresholds = np.linspace(seg_thresh, 0.95, iter_num - 1)[::-1]\n            for iter in range(iter_num):\n                flow = flow_output.clone()\n                if iter == 0:\n                    cow_sigma_range = (20, 60)\n                    log_sigma_range = (np.log(cow_sigma_range[0]), np.log(cow_sigma_range[1]))\n                    cow_prop_range = (0.3, 0.6)\n                    segmask = cow_masks(flow.shape[-2:], log_sigma_range, cow_sigma_range[1], cow_prop_range).astype(np.float32)\n                    segmask = segmask[None,None,...]\n                    segmask = torch.from_numpy(np.concatenate((segmask,) * img0.shape[0], axis=0)).cuda()\n\n                start_time = time.time()\n                _, pose_output = self.vonet([img0, img1, intrinsic, flow, segmask], only_pose=True)\n                posenet_time = time.time() - start_time\n                total_time += posenet_time\n\n                print(\"Iter %d, Posenet time: %.2f; \" % (iter, posenet_time), end='')\n\n                # Do not pass segnet in the last iteration\n                if iter == iter_num - 1:\n                    break\n\n                seg_thresh = seg_thresholds[iter] if iter < iter_num-1 else seg_thresh\n                pose_input = pose_output.data.cpu().detach().numpy().squeeze()\n                pose_input = pose_input * self.pose_norm\n                pose_input = self.camT.T.dot(se2SE(pose_input)).dot(self.camT)\n                \n                start_time = time.time()\n                disc_aux = [self.intr_list, imgL_noaug, pose_input[:3,:]]\n                fgmask = self.segnet(imgLR, disc_aux, flowdc)\n                segnet_time = time.time() - start_time\n                total_time += segnet_time\n                \n                fgmask = cv2.resize(fgmask.cpu().numpy(), (self.input_size[1], self.input_size[0]), interpolation=cv2.INTER_LINEAR).astype(np.float32)\n                fg_probs = self.sigmoid(fgmask)\n                segmask = np.zeros(fgmask.shape[:2])\n                segmask[fg_probs > seg_thresh] = 1.0\n\n                # Resize/Crop segmask (Resize + Crop + Downscale 1/4)\n                dummysample = {'segmask': segmask}\n                if self.resizedata is not None:\n                    dummysample = self.resizedata(dummysample)\n                dummysample = self.cropdata(dummysample)\n                segmask = dummysample['segmask']\n                segmask = cv2.resize(segmask, (0,0), fx=0.25, fy=0.25, interpolation=cv2.INTER_LINEAR)\n                segmask = segmask[None,None,...].astype(np.float32)\n                segmask = torch.from_numpy(np.concatenate((segmask,) * img0.shape[0], axis=0)).cuda()\n\n                print(\"Segnet time: %.2f\" % segnet_time)\n            \n            posenp = pose_output.data.cpu().detach().numpy().squeeze()\n            posenp = posenp * self.pose_norm  # The output is normalized during training, now scale it back\n            flownp = flow.data.cpu().detach().numpy().squeeze()\n            flownp = flownp * self.flow_norm\n\n        # # calculate scale from GT posefile\n        # if 'motion' in sample:\n        #     motions_gt = sample['motion']\n        #     scale = np.linalg.norm(motions_gt[:,:3], axis=1)\n        #     trans_est = posenp[:,:3]\n        #     trans_est = trans_est/np.linalg.norm(trans_est,axis=1).reshape(-1,1)*scale.reshape(-1,1)\n        #     posenp[:,:3] = trans_est \n        # else:\n        #     print('    scale is not given, using 1 as the default scale value..')\n\n        print(\"\\n{} Pose inference using {}s: \\n{}\\n\".format(self.test_count, total_time, posenp))\n\n        return posenp, flownp\n\n    def initialize_segnet_input(self, imgL_o, intrinsics):\n        maxh = imgL_o.shape[0] * self.testres\n        maxw = imgL_o.shape[1] * self.testres\n        self.max_h = int(maxh // 64 * 64)\n        self.max_w = int(maxw // 64 * 64)\n        if self.max_h < maxh: self.max_h += 64\n        if self.max_w < maxw: self.max_w += 64\n        self.input_size = imgL_o.shape\n\n        # modify module according to inputs\n        for i in range(len(self.segnet.module.reg_modules)):\n            self.segnet.module.reg_modules[i] = flow_reg([1, self.max_w//(2**(6-i)), self.max_h//(2**(6-i))], \n                            ent=getattr(self.segnet.module, 'flow_reg%d'%2**(6-i)).ent,\\\n                            maxdisp=getattr(self.segnet.module, 'flow_reg%d'%2**(6-i)).md,\\\n                            fac=getattr(self.segnet.module, 'flow_reg%d'%2**(6-i)).fac).cuda()\n        for i in range(len(self.segnet.module.warp_modules)):\n            self.segnet.module.warp_modules[i] = WarpModule([1, self.max_w//(2**(6-i)), self.max_h//(2**(6-i))]).cuda()\n\n        # foramt intrinsics input\n        fl, cx, cy, bl = intrinsics\n        fl_next = fl  # assuming focal length remains the same across frames\n        self.intr_list = [torch.Tensor(inxx).cuda() for inxx in [[fl],[cx],[cy],[bl],[1],[0],[0],[1],[0],[0]]]\n        self.intr_list.append(torch.Tensor([self.input_size[1] / self.max_w]).cuda()) # delta fx\n        self.intr_list.append(torch.Tensor([self.input_size[0] / self.max_h]).cuda()) # delta fy\n        self.intr_list.append(torch.Tensor([fl_next]).cuda())\n\n    def transform_segnet_input(self, imgL_o, imgR_o):\n        imgL = cv2.resize(imgL_o, (self.max_w, self.max_h))\n        imgR = cv2.resize(imgR_o, (self.max_w, self.max_h))\n        imgL_noaug = torch.Tensor(imgL / 255.)[np.newaxis].float().cuda()\n        \n        # flip channel, subtract mean\n        imgL = imgL[:,:,::-1].copy() / 255. - np.asarray(self.mean_L).mean(0)[np.newaxis,np.newaxis,:]\n        imgR = imgR[:,:,::-1].copy() / 255. - np.asarray(self.mean_R).mean(0)[np.newaxis,np.newaxis,:]\n        imgL = np.transpose(imgL, [2,0,1])[np.newaxis]\n        imgR = np.transpose(imgR, [2,0,1])[np.newaxis]\n        imgL = Variable(torch.FloatTensor(imgL).cuda())\n        imgR = Variable(torch.FloatTensor(imgR).cuda())\n        imgLR = torch.cat([imgL,imgR],0)\n\n        return imgL_noaug, imgLR"
  },
  {
    "path": "LICENSE",
    "content": "BSD 3-Clause License\n\nCopyright (c) 2020, Air Lab Stacks\nAll rights reserved.\n\nRedistribution and use in source and binary forms, with or without\nmodification, are permitted provided that the following conditions are met:\n\n1. Redistributions of source code must retain the above copyright notice, this\n   list of conditions and the following disclaimer.\n\n2. Redistributions in binary form must reproduce the above copyright notice,\n   this list of conditions and the following disclaimer in the documentation\n   and/or other materials provided with the distribution.\n\n3. Neither the name of the copyright holder nor the names of its\n   contributors may be used to endorse or promote products derived from\n   this software without specific prior written permission.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\nAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\nIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\nDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE\nFOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\nDAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\nCAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\nOR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\nOF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n"
  },
  {
    "path": "Network/PWC/PWCNet.py",
    "content": "\"\"\"\nimplementation of the PWC-DC network for optical flow estimation by Sun et al., 2018\n\nJinwei Gu and Zhile Ren\n\n\"\"\"\n\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport os\nimport numpy as np\nfrom .correlation import FunctionCorrelation\nimport cv2 # debug\n\ndef conv(in_planes, out_planes, kernel_size=3, stride=1, padding=1, dilation=1):   \n    return nn.Sequential(\n            nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, \n                        padding=padding, dilation=dilation, bias=True),\n            nn.LeakyReLU(0.1))\n\ndef predict_flow(in_planes):\n    return nn.Conv2d(in_planes,2,kernel_size=3,stride=1,padding=1,bias=True)\n\ndef deconv(in_planes, out_planes, kernel_size=4, stride=2, padding=1):\n    return nn.ConvTranspose2d(in_planes, out_planes, kernel_size, stride, padding, bias=True)\n\n\n\nclass PWCDCNet(nn.Module):\n    \"\"\"\n    PWC-DC net. add dilation convolution and densenet connections\n\n    \"\"\"\n    def __init__(self, md=4, flow_norm=20.0):\n        \"\"\"\n        input: md --- maximum displacement (for correlation. default: 4), after warpping\n\n        \"\"\"\n        super(PWCDCNet,self).__init__()\n\n        self.flow_norm = flow_norm\n        \n        self.conv1a  = conv(3,   16, kernel_size=3, stride=2)\n        self.conv1aa = conv(16,  16, kernel_size=3, stride=1)\n        self.conv1b  = conv(16,  16, kernel_size=3, stride=1)\n        self.conv2a  = conv(16,  32, kernel_size=3, stride=2)\n        self.conv2aa = conv(32,  32, kernel_size=3, stride=1)\n        self.conv2b  = conv(32,  32, kernel_size=3, stride=1)\n        self.conv3a  = conv(32,  64, kernel_size=3, stride=2)\n        self.conv3aa = conv(64,  64, kernel_size=3, stride=1)\n        self.conv3b  = conv(64,  64, kernel_size=3, stride=1)\n        self.conv4a  = conv(64,  96, kernel_size=3, stride=2)\n        self.conv4aa = conv(96,  96, kernel_size=3, stride=1)\n        self.conv4b  = conv(96,  96, kernel_size=3, stride=1)\n        self.conv5a  = conv(96, 128, kernel_size=3, stride=2)\n        self.conv5aa = conv(128,128, kernel_size=3, stride=1)\n        self.conv5b  = conv(128,128, kernel_size=3, stride=1)\n        self.conv6aa = conv(128,196, kernel_size=3, stride=2)\n        self.conv6a  = conv(196,196, kernel_size=3, stride=1)\n        self.conv6b  = conv(196,196, kernel_size=3, stride=1)\n\n        # self.corr    = Correlation(pad_size=md, kernel_size=1, max_displacement=md, stride1=1, stride2=1, corr_multiply=1)\n        self.leakyRELU = nn.LeakyReLU(0.1)\n        \n        nd = (2*md+1)**2\n        dd = np.cumsum([128,128,96,64,32])\n\n        od = nd\n        self.conv6_0 = conv(od,      128, kernel_size=3, stride=1)\n        self.conv6_1 = conv(od+dd[0],128, kernel_size=3, stride=1)\n        self.conv6_2 = conv(od+dd[1],96,  kernel_size=3, stride=1)\n        self.conv6_3 = conv(od+dd[2],64,  kernel_size=3, stride=1)\n        self.conv6_4 = conv(od+dd[3],32,  kernel_size=3, stride=1)        \n        self.predict_flow6 = predict_flow(od+dd[4])\n        self.deconv6 = deconv(2, 2, kernel_size=4, stride=2, padding=1) \n        self.upfeat6 = deconv(od+dd[4], 2, kernel_size=4, stride=2, padding=1) \n        \n        od = nd+128+4\n        self.conv5_0 = conv(od,      128, kernel_size=3, stride=1)\n        self.conv5_1 = conv(od+dd[0],128, kernel_size=3, stride=1)\n        self.conv5_2 = conv(od+dd[1],96,  kernel_size=3, stride=1)\n        self.conv5_3 = conv(od+dd[2],64,  kernel_size=3, stride=1)\n        self.conv5_4 = conv(od+dd[3],32,  kernel_size=3, stride=1)\n        self.predict_flow5 = predict_flow(od+dd[4]) \n        self.deconv5 = deconv(2, 2, kernel_size=4, stride=2, padding=1) \n        self.upfeat5 = deconv(od+dd[4], 2, kernel_size=4, stride=2, padding=1) \n        \n        od = nd+96+4\n        self.conv4_0 = conv(od,      128, kernel_size=3, stride=1)\n        self.conv4_1 = conv(od+dd[0],128, kernel_size=3, stride=1)\n        self.conv4_2 = conv(od+dd[1],96,  kernel_size=3, stride=1)\n        self.conv4_3 = conv(od+dd[2],64,  kernel_size=3, stride=1)\n        self.conv4_4 = conv(od+dd[3],32,  kernel_size=3, stride=1)\n        self.predict_flow4 = predict_flow(od+dd[4]) \n        self.deconv4 = deconv(2, 2, kernel_size=4, stride=2, padding=1) \n        self.upfeat4 = deconv(od+dd[4], 2, kernel_size=4, stride=2, padding=1) \n        \n        od = nd+64+4\n        self.conv3_0 = conv(od,      128, kernel_size=3, stride=1)\n        self.conv3_1 = conv(od+dd[0],128, kernel_size=3, stride=1)\n        self.conv3_2 = conv(od+dd[1],96,  kernel_size=3, stride=1)\n        self.conv3_3 = conv(od+dd[2],64,  kernel_size=3, stride=1)\n        self.conv3_4 = conv(od+dd[3],32,  kernel_size=3, stride=1)\n        self.predict_flow3 = predict_flow(od+dd[4]) \n        self.deconv3 = deconv(2, 2, kernel_size=4, stride=2, padding=1) \n        self.upfeat3 = deconv(od+dd[4], 2, kernel_size=4, stride=2, padding=1) \n        \n        od = nd+32+4\n        self.conv2_0 = conv(od,      128, kernel_size=3, stride=1)\n        self.conv2_1 = conv(od+dd[0],128, kernel_size=3, stride=1)\n        self.conv2_2 = conv(od+dd[1],96,  kernel_size=3, stride=1)\n        self.conv2_3 = conv(od+dd[2],64,  kernel_size=3, stride=1)\n        self.conv2_4 = conv(od+dd[3],32,  kernel_size=3, stride=1)\n        self.predict_flow2 = predict_flow(od+dd[4]) \n        self.deconv2 = deconv(2, 2, kernel_size=4, stride=2, padding=1) \n        \n        self.dc_conv1 = conv(od+dd[4], 128, kernel_size=3, stride=1, padding=1,  dilation=1)\n        self.dc_conv2 = conv(128,      128, kernel_size=3, stride=1, padding=2,  dilation=2)\n        self.dc_conv3 = conv(128,      128, kernel_size=3, stride=1, padding=4,  dilation=4)\n        self.dc_conv4 = conv(128,      96,  kernel_size=3, stride=1, padding=8,  dilation=8)\n        self.dc_conv5 = conv(96,       64,  kernel_size=3, stride=1, padding=16, dilation=16)\n        self.dc_conv6 = conv(64,       32,  kernel_size=3, stride=1, padding=1,  dilation=1)\n        self.dc_conv7 = predict_flow(32)\n\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):\n                nn.init.kaiming_normal(m.weight.data, mode='fan_in')\n                if m.bias is not None:\n                    m.bias.data.zero_()\n\n\n    def warp(self, x, flo):\n        \"\"\"\n        warp an image/tensor (im2) back to im1, according to the optical flow\n\n        x: [B, C, H, W] (im2)\n        flo: [B, 2, H, W] flow\n\n        \"\"\"\n        B, C, H, W = x.size()\n        # mesh grid \n        xx = torch.arange(0, W).view(1,-1).repeat(H,1)\n        yy = torch.arange(0, H).view(-1,1).repeat(1,W)\n        xx = xx.view(1,1,H,W).repeat(B,1,1,1)\n        yy = yy.view(1,1,H,W).repeat(B,1,1,1)\n        grid = torch.cat((xx,yy),1).float()\n\n        if x.is_cuda:\n            grid = grid.cuda()\n        vgrid = grid + flo\n\n        # scale grid to [-1,1] \n        vgrid[:,0,:,:] = 2.0*vgrid[:,0,:,:].clone() / max(W-1,1)-1.0\n        vgrid[:,1,:,:] = 2.0*vgrid[:,1,:,:].clone() / max(H-1,1)-1.0\n\n        vgrid = vgrid.permute(0,2,3,1)        \n        output = nn.functional.grid_sample(x, vgrid, align_corners=True)\n        mask = torch.ones(x.size()).cuda()\n        mask = nn.functional.grid_sample(mask, vgrid, align_corners=True)\n\n        # if W==128:\n            # np.save('mask.npy', mask.cpu().data.numpy())\n            # np.save('warp.npy', output.cpu().data.numpy())\n        \n        mask[mask<0.9999] = 0\n        mask[mask>0] = 1\n        \n        return output*mask\n\n    def multi_scale_conv(self, conv0_func, conv1_func, conv2_func, conv3_func, conv4_func, input_feat):\n        x = torch.cat((conv0_func(input_feat), input_feat),1)\n        x = torch.cat((conv1_func(x), x),1)\n        x = torch.cat((conv2_func(x), x),1)\n        x = torch.cat((conv3_func(x), x),1)\n        x = torch.cat((conv4_func(x), x),1)\n        return x\n\n    def concate_two_layers(self, pred_func, decon_func, upfeat_func, feat_high, feat_low1, feat_low2, scale):\n        flow_high = pred_func(feat_high)\n        up_flow_high = decon_func(flow_high)\n        up_feat_high = upfeat_func(feat_high)\n\n        warp_feat = self.warp(feat_low2, up_flow_high*scale)\n        corr_low = FunctionCorrelation(tenFirst=feat_low1, tenSecond=warp_feat)\n        corr_low = self.leakyRELU(corr_low)\n        x = torch.cat((corr_low, feat_low1, up_flow_high, up_feat_high), 1)\n\n        return x, flow_high\n\n    def forward(self,x):\n        im1 = x[:,0:3,...]\n        im2 = x[:,3:6,...]\n        \n        c11 = self.conv1b(self.conv1aa(self.conv1a(im1)))\n        c21 = self.conv1b(self.conv1aa(self.conv1a(im2)))\n        c12 = self.conv2b(self.conv2aa(self.conv2a(c11)))\n        c22 = self.conv2b(self.conv2aa(self.conv2a(c21)))\n        c13 = self.conv3b(self.conv3aa(self.conv3a(c12)))\n        c23 = self.conv3b(self.conv3aa(self.conv3a(c22)))\n        c14 = self.conv4b(self.conv4aa(self.conv4a(c13)))\n        c24 = self.conv4b(self.conv4aa(self.conv4a(c23)))\n        c15 = self.conv5b(self.conv5aa(self.conv5a(c14)))\n        c25 = self.conv5b(self.conv5aa(self.conv5a(c24)))\n        c16 = self.conv6b(self.conv6a(self.conv6aa(c15)))\n        c26 = self.conv6b(self.conv6a(self.conv6aa(c25)))\n\n\n        # corr6 = self.corr(c16, c26) \n        corr6 = FunctionCorrelation(tenFirst=c16, tenSecond=c26)\n        corr6 = self.leakyRELU(corr6)   \n\n        x = self.multi_scale_conv(self.conv6_0, self.conv6_1, self.conv6_2, self.conv6_3, self.conv6_4, corr6)\n        x, flow6 = self.concate_two_layers(self.predict_flow6, self.deconv6, self.upfeat6, x, c15, c25, 0.625)\n\n        x = self.multi_scale_conv(self.conv5_0, self.conv5_1, self.conv5_2, self.conv5_3, self.conv5_4, x)\n        x, flow5 = self.concate_two_layers(self.predict_flow5, self.deconv5, self.upfeat5, x, c14, c24, 1.25)\n\n        x = self.multi_scale_conv(self.conv4_0, self.conv4_1, self.conv4_2, self.conv4_3, self.conv4_4, x)\n        x, flow4 = self.concate_two_layers(self.predict_flow4, self.deconv4, self.upfeat4, x, c13, c23, 2.5)\n\n        x = self.multi_scale_conv(self.conv3_0, self.conv3_1, self.conv3_2, self.conv3_3, self.conv3_4, x)\n        x, flow3 = self.concate_two_layers(self.predict_flow3, self.deconv3, self.upfeat3, x, c12, c22, 5.0)\n\n        x = self.multi_scale_conv(self.conv2_0, self.conv2_1, self.conv2_2, self.conv2_3, self.conv2_4, x)\n\n        flow2 = self.predict_flow2(x)\n \n        x = self.dc_conv4(self.dc_conv3(self.dc_conv2(self.dc_conv1(x))))\n        refine = self.dc_conv7(self.dc_conv6(self.dc_conv5(x)))\n        flow2 = flow2 + refine\n        \n        return flow2\n\n\ndef pwc_dc_net(path=None):\n\n    model = PWCDCNet()\n    if path is not None:\n        data = torch.load(path)\n        if 'state_dict' in data.keys():\n            model.load_state_dict(data['state_dict'])\n        else:\n            model.load_state_dict(data)\n    return model\n\n\n\n\n"
  },
  {
    "path": "Network/PWC/__init__.py",
    "content": "from .PWCNet import *\n"
  },
  {
    "path": "Network/PWC/correlation.py",
    "content": "#!/usr/bin/env python\n\nimport torch\n\nimport cupy\nimport re\n\nkernel_Correlation_rearrange = '''\n\textern \"C\" __global__ void kernel_Correlation_rearrange(\n\t\tconst int n,\n\t\tconst float* input,\n\t\tfloat* output\n\t) {\n\t  int intIndex = (blockIdx.x * blockDim.x) + threadIdx.x;\n\n\t  if (intIndex >= n) {\n\t    return;\n\t  }\n\n\t  int intSample = blockIdx.z;\n\t  int intChannel = blockIdx.y;\n\n\t  float fltValue = input[(((intSample * SIZE_1(input)) + intChannel) * SIZE_2(input) * SIZE_3(input)) + intIndex];\n\n\t  __syncthreads();\n\n\t  int intPaddedY = (intIndex / SIZE_3(input)) + 4;\n\t  int intPaddedX = (intIndex % SIZE_3(input)) + 4;\n\t  int intRearrange = ((SIZE_3(input) + 8) * intPaddedY) + intPaddedX;\n\n\t  output[(((intSample * SIZE_1(output) * SIZE_2(output)) + intRearrange) * SIZE_1(input)) + intChannel] = fltValue;\n\t}\n'''\n\nkernel_Correlation_updateOutput = '''\n\textern \"C\" __global__ void kernel_Correlation_updateOutput(\n\t  const int n,\n\t  const float* rbot0,\n\t  const float* rbot1,\n\t  float* top\n\t) {\n\t  extern __shared__ char patch_data_char[];\n\t  \n\t  float *patch_data = (float *)patch_data_char;\n\t  \n\t  // First (upper left) position of kernel upper-left corner in current center position of neighborhood in image 1\n\t  int x1 = blockIdx.x + 4;\n\t  int y1 = blockIdx.y + 4;\n\t  int item = blockIdx.z;\n\t  int ch_off = threadIdx.x;\n\t  \n\t  // Load 3D patch into shared shared memory\n\t  for (int j = 0; j < 1; j++) { // HEIGHT\n\t    for (int i = 0; i < 1; i++) { // WIDTH\n\t      int ji_off = (j + i) * SIZE_3(rbot0);\n\t      for (int ch = ch_off; ch < SIZE_3(rbot0); ch += 32) { // CHANNELS\n\t        int idx1 = ((item * SIZE_1(rbot0) + y1+j) * SIZE_2(rbot0) + x1+i) * SIZE_3(rbot0) + ch;\n\t        int idxPatchData = ji_off + ch;\n\t        patch_data[idxPatchData] = rbot0[idx1];\n\t      }\n\t    }\n\t  }\n\t  \n\t  __syncthreads();\n\t  \n\t  __shared__ float sum[32];\n\t  \n\t  // Compute correlation\n\t  for (int top_channel = 0; top_channel < SIZE_1(top); top_channel++) {\n\t    sum[ch_off] = 0;\n\t  \n\t    int s2o = top_channel % 9 - 4;\n\t    int s2p = top_channel / 9 - 4;\n\t    \n\t    for (int j = 0; j < 1; j++) { // HEIGHT\n\t      for (int i = 0; i < 1; i++) { // WIDTH\n\t        int ji_off = (j + i) * SIZE_3(rbot0);\n\t        for (int ch = ch_off; ch < SIZE_3(rbot0); ch += 32) { // CHANNELS\n\t          int x2 = x1 + s2o;\n\t          int y2 = y1 + s2p;\n\t          \n\t          int idxPatchData = ji_off + ch;\n\t          int idx2 = ((item * SIZE_1(rbot0) + y2+j) * SIZE_2(rbot0) + x2+i) * SIZE_3(rbot0) + ch;\n\t          \n\t          sum[ch_off] += patch_data[idxPatchData] * rbot1[idx2];\n\t        }\n\t      }\n\t    }\n\t    \n\t    __syncthreads();\n\t    \n\t    if (ch_off == 0) {\n\t      float total_sum = 0;\n\t      for (int idx = 0; idx < 32; idx++) {\n\t        total_sum += sum[idx];\n\t      }\n\t      const int sumelems = SIZE_3(rbot0);\n\t      const int index = ((top_channel*SIZE_2(top) + blockIdx.y)*SIZE_3(top))+blockIdx.x;\n\t      top[index + item*SIZE_1(top)*SIZE_2(top)*SIZE_3(top)] = total_sum / (float)sumelems;\n\t    }\n\t  }\n\t}\n'''\n\nkernel_Correlation_updateGradFirst = '''\n\t#define ROUND_OFF 50000\n\n\textern \"C\" __global__ void kernel_Correlation_updateGradFirst(\n\t  const int n,\n\t  const int intSample,\n\t  const float* rbot0,\n\t  const float* rbot1,\n\t  const float* gradOutput,\n\t  float* gradFirst,\n\t  float* gradSecond\n\t) { for (int intIndex = (blockIdx.x * blockDim.x) + threadIdx.x; intIndex < n; intIndex += blockDim.x * gridDim.x) {\n\t  int n = intIndex % SIZE_1(gradFirst); // channels\n\t  int l = (intIndex / SIZE_1(gradFirst)) % SIZE_3(gradFirst) + 4; // w-pos\n\t  int m = (intIndex / SIZE_1(gradFirst) / SIZE_3(gradFirst)) % SIZE_2(gradFirst) + 4; // h-pos\n\t  \n\t  // round_off is a trick to enable integer division with ceil, even for negative numbers\n\t  // We use a large offset, for the inner part not to become negative.\n\t  const int round_off = ROUND_OFF;\n\t  const int round_off_s1 = round_off;\n\t  \n\t  // We add round_off before_s1 the int division and subtract round_off after it, to ensure the formula matches ceil behavior:\n\t  int xmin = (l - 4 + round_off_s1 - 1) + 1 - round_off; // ceil (l - 4)\n\t  int ymin = (m - 4 + round_off_s1 - 1) + 1 - round_off; // ceil (l - 4)\n\t  \n\t  // Same here:\n\t  int xmax = (l - 4 + round_off_s1) - round_off; // floor (l - 4)\n\t  int ymax = (m - 4 + round_off_s1) - round_off; // floor (m - 4)\n\t  \n\t  float sum = 0;\n\t  if (xmax>=0 && ymax>=0 && (xmin<=SIZE_3(gradOutput)-1) && (ymin<=SIZE_2(gradOutput)-1)) {\n\t    xmin = max(0,xmin);\n\t    xmax = min(SIZE_3(gradOutput)-1,xmax);\n\t    \n\t    ymin = max(0,ymin);\n\t    ymax = min(SIZE_2(gradOutput)-1,ymax);\n\t    \n\t    for (int p = -4; p <= 4; p++) {\n\t      for (int o = -4; o <= 4; o++) {\n\t        // Get rbot1 data:\n\t        int s2o = o;\n\t        int s2p = p;\n\t        int idxbot1 = ((intSample * SIZE_1(rbot0) + (m+s2p)) * SIZE_2(rbot0) + (l+s2o)) * SIZE_3(rbot0) + n;\n\t        float bot1tmp = rbot1[idxbot1]; // rbot1[l+s2o,m+s2p,n]\n\t        \n\t        // Index offset for gradOutput in following loops:\n\t        int op = (p+4) * 9 + (o+4); // index[o,p]\n\t        int idxopoffset = (intSample * SIZE_1(gradOutput) + op);\n\t        \n\t        for (int y = ymin; y <= ymax; y++) {\n\t          for (int x = xmin; x <= xmax; x++) {\n\t            int idxgradOutput = (idxopoffset * SIZE_2(gradOutput) + y) * SIZE_3(gradOutput) + x; // gradOutput[x,y,o,p]\n\t            sum += gradOutput[idxgradOutput] * bot1tmp;\n\t          }\n\t        }\n\t      }\n\t    }\n\t  }\n\t  const int sumelems = SIZE_1(gradFirst);\n\t  const int bot0index = ((n * SIZE_2(gradFirst)) + (m-4)) * SIZE_3(gradFirst) + (l-4);\n\t  gradFirst[bot0index + intSample*SIZE_1(gradFirst)*SIZE_2(gradFirst)*SIZE_3(gradFirst)] = sum / (float)sumelems;\n\t} }\n'''\n\nkernel_Correlation_updateGradSecond = '''\n\t#define ROUND_OFF 50000\n\n\textern \"C\" __global__ void kernel_Correlation_updateGradSecond(\n\t  const int n,\n\t  const int intSample,\n\t  const float* rbot0,\n\t  const float* rbot1,\n\t  const float* gradOutput,\n\t  float* gradFirst,\n\t  float* gradSecond\n\t) { for (int intIndex = (blockIdx.x * blockDim.x) + threadIdx.x; intIndex < n; intIndex += blockDim.x * gridDim.x) {\n\t  int n = intIndex % SIZE_1(gradSecond); // channels\n\t  int l = (intIndex / SIZE_1(gradSecond)) % SIZE_3(gradSecond) + 4; // w-pos\n\t  int m = (intIndex / SIZE_1(gradSecond) / SIZE_3(gradSecond)) % SIZE_2(gradSecond) + 4; // h-pos\n\t  \n\t  // round_off is a trick to enable integer division with ceil, even for negative numbers\n\t  // We use a large offset, for the inner part not to become negative.\n\t  const int round_off = ROUND_OFF;\n\t  const int round_off_s1 = round_off;\n\t  \n\t  float sum = 0;\n\t  for (int p = -4; p <= 4; p++) {\n\t    for (int o = -4; o <= 4; o++) {\n\t      int s2o = o;\n\t      int s2p = p;\n\t      \n\t      //Get X,Y ranges and clamp\n\t      // We add round_off before_s1 the int division and subtract round_off after it, to ensure the formula matches ceil behavior:\n\t      int xmin = (l - 4 - s2o + round_off_s1 - 1) + 1 - round_off; // ceil (l - 4 - s2o)\n\t      int ymin = (m - 4 - s2p + round_off_s1 - 1) + 1 - round_off; // ceil (l - 4 - s2o)\n\t      \n\t      // Same here:\n\t      int xmax = (l - 4 - s2o + round_off_s1) - round_off; // floor (l - 4 - s2o)\n\t      int ymax = (m - 4 - s2p + round_off_s1) - round_off; // floor (m - 4 - s2p)\n          \n\t      if (xmax>=0 && ymax>=0 && (xmin<=SIZE_3(gradOutput)-1) && (ymin<=SIZE_2(gradOutput)-1)) {\n\t        xmin = max(0,xmin);\n\t        xmax = min(SIZE_3(gradOutput)-1,xmax);\n\t        \n\t        ymin = max(0,ymin);\n\t        ymax = min(SIZE_2(gradOutput)-1,ymax);\n\t        \n\t        // Get rbot0 data:\n\t        int idxbot0 = ((intSample * SIZE_1(rbot0) + (m-s2p)) * SIZE_2(rbot0) + (l-s2o)) * SIZE_3(rbot0) + n;\n\t        float bot0tmp = rbot0[idxbot0]; // rbot1[l+s2o,m+s2p,n]\n\t        \n\t        // Index offset for gradOutput in following loops:\n\t        int op = (p+4) * 9 + (o+4); // index[o,p]\n\t        int idxopoffset = (intSample * SIZE_1(gradOutput) + op);\n\t        \n\t        for (int y = ymin; y <= ymax; y++) {\n\t          for (int x = xmin; x <= xmax; x++) {\n\t            int idxgradOutput = (idxopoffset * SIZE_2(gradOutput) + y) * SIZE_3(gradOutput) + x; // gradOutput[x,y,o,p]\n\t            sum += gradOutput[idxgradOutput] * bot0tmp;\n\t          }\n\t        }\n\t      }\n\t    }\n\t  }\n\t  const int sumelems = SIZE_1(gradSecond);\n\t  const int bot1index = ((n * SIZE_2(gradSecond)) + (m-4)) * SIZE_3(gradSecond) + (l-4);\n\t  gradSecond[bot1index + intSample*SIZE_1(gradSecond)*SIZE_2(gradSecond)*SIZE_3(gradSecond)] = sum / (float)sumelems;\n\t} }\n'''\n\ndef cupy_kernel(strFunction, objVariables):\n\tstrKernel = globals()[strFunction]\n\n\twhile True:\n\t\tobjMatch = re.search('(SIZE_)([0-4])(\\()([^\\)]*)(\\))', strKernel)\n\n\t\tif objMatch is None:\n\t\t\tbreak\n\t\t# end\n\n\t\tintArg = int(objMatch.group(2))\n\n\t\tstrTensor = objMatch.group(4)\n\t\tintSizes = objVariables[strTensor].size()\n\n\t\tstrKernel = strKernel.replace(objMatch.group(), str(intSizes[intArg]))\n\t# end\n\n\twhile True:\n\t\tobjMatch = re.search('(VALUE_)([0-4])(\\()([^\\)]+)(\\))', strKernel)\n\n\t\tif objMatch is None:\n\t\t\tbreak\n\t\t# end\n\n\t\tintArgs = int(objMatch.group(2))\n\t\tstrArgs = objMatch.group(4).split(',')\n\n\t\tstrTensor = strArgs[0]\n\t\tintStrides = objVariables[strTensor].stride()\n\t\tstrIndex = [ '((' + strArgs[intArg + 1].replace('{', '(').replace('}', ')').strip() + ')*' + str(intStrides[intArg]) + ')' for intArg in range(intArgs) ]\n\n\t\tstrKernel = strKernel.replace(objMatch.group(0), strTensor + '[' + str.join('+', strIndex) + ']')\n\t# end\n\n\treturn strKernel\n# end\n\n# @cupy.util.memoize(for_each_device=True)\ndef cupy_launch(strFunction, strKernel):\n\treturn cupy.cuda.compile_with_cache(strKernel).get_function(strFunction)\n# end\n\nclass _FunctionCorrelation(torch.autograd.Function):\n\t@staticmethod\n\tdef forward(self, first, second):\n\t\trbot0 = first.new_zeros([ first.shape[0], first.shape[2] + 8, first.shape[3] + 8, first.shape[1] ])\n\t\trbot1 = first.new_zeros([ first.shape[0], first.shape[2] + 8, first.shape[3] + 8, first.shape[1] ])\n\n\t\tself.save_for_backward(first, second, rbot0, rbot1)\n\n\t\tassert(first.is_contiguous() == True)\n\t\tassert(second.is_contiguous() == True)\n\n\t\toutput = first.new_zeros([ first.shape[0], 81, first.shape[2], first.shape[3] ])\n\n\t\tif first.is_cuda == True:\n\t\t\tn = first.shape[2] * first.shape[3]\n\t\t\tcupy_launch('kernel_Correlation_rearrange', cupy_kernel('kernel_Correlation_rearrange', {\n\t\t\t\t'input': first,\n\t\t\t\t'output': rbot0\n\t\t\t}))(\n\t\t\t\tgrid=tuple([ int((n + 16 - 1) / 16), first.shape[1], first.shape[0] ]),\n\t\t\t\tblock=tuple([ 16, 1, 1 ]),\n\t\t\t\targs=[ n, first.data_ptr(), rbot0.data_ptr() ]\n\t\t\t)\n\n\t\t\tn = second.shape[2] * second.shape[3]\n\t\t\tcupy_launch('kernel_Correlation_rearrange', cupy_kernel('kernel_Correlation_rearrange', {\n\t\t\t\t'input': second,\n\t\t\t\t'output': rbot1\n\t\t\t}))(\n\t\t\t\tgrid=tuple([ int((n + 16 - 1) / 16), second.shape[1], second.shape[0] ]),\n\t\t\t\tblock=tuple([ 16, 1, 1 ]),\n\t\t\t\targs=[ n, second.data_ptr(), rbot1.data_ptr() ]\n\t\t\t)\n\n\t\t\tn = output.shape[1] * output.shape[2] * output.shape[3]\n\t\t\tcupy_launch('kernel_Correlation_updateOutput', cupy_kernel('kernel_Correlation_updateOutput', {\n\t\t\t\t'rbot0': rbot0,\n\t\t\t\t'rbot1': rbot1,\n\t\t\t\t'top': output\n\t\t\t}))(\n\t\t\t\tgrid=tuple([ output.shape[3], output.shape[2], output.shape[0] ]),\n\t\t\t\tblock=tuple([ 32, 1, 1 ]),\n\t\t\t\tshared_mem=first.shape[1] * 4,\n\t\t\t\targs=[ n, rbot0.data_ptr(), rbot1.data_ptr(), output.data_ptr() ]\n\t\t\t)\n\n\t\telif first.is_cuda == False:\n\t\t\traise NotImplementedError()\n\n\t\t# end\n\n\t\treturn output\n\t# end\n\n\t@staticmethod\n\tdef backward(self, gradOutput):\n\t\tfirst, second, rbot0, rbot1 = self.saved_tensors\n\n\t\tassert(gradOutput.is_contiguous() == True)\n\n\t\tgradFirst = first.new_zeros([ first.shape[0], first.shape[1], first.shape[2], first.shape[3] ]) if self.needs_input_grad[0] == True else None\n\t\tgradSecond = first.new_zeros([ first.shape[0], first.shape[1], first.shape[2], first.shape[3] ]) if self.needs_input_grad[1] == True else None\n\n\t\tif first.is_cuda == True:\n\t\t\tif gradFirst is not None:\n\t\t\t\tfor intSample in range(first.shape[0]):\n\t\t\t\t\tn = first.shape[1] * first.shape[2] * first.shape[3]\n\t\t\t\t\tcupy_launch('kernel_Correlation_updateGradFirst', cupy_kernel('kernel_Correlation_updateGradFirst', {\n\t\t\t\t\t\t'rbot0': rbot0,\n\t\t\t\t\t\t'rbot1': rbot1,\n\t\t\t\t\t\t'gradOutput': gradOutput,\n\t\t\t\t\t\t'gradFirst': gradFirst,\n\t\t\t\t\t\t'gradSecond': None\n\t\t\t\t\t}))(\n\t\t\t\t\t\tgrid=tuple([ int((n + 512 - 1) / 512), 1, 1 ]),\n\t\t\t\t\t\tblock=tuple([ 512, 1, 1 ]),\n\t\t\t\t\t\targs=[ n, intSample, rbot0.data_ptr(), rbot1.data_ptr(), gradOutput.data_ptr(), gradFirst.data_ptr(), None ]\n\t\t\t\t\t)\n\t\t\t\t# end\n\t\t\t# end\n\n\t\t\tif gradSecond is not None:\n\t\t\t\tfor intSample in range(first.shape[0]):\n\t\t\t\t\tn = first.shape[1] * first.shape[2] * first.shape[3]\n\t\t\t\t\tcupy_launch('kernel_Correlation_updateGradSecond', cupy_kernel('kernel_Correlation_updateGradSecond', {\n\t\t\t\t\t\t'rbot0': rbot0,\n\t\t\t\t\t\t'rbot1': rbot1,\n\t\t\t\t\t\t'gradOutput': gradOutput,\n\t\t\t\t\t\t'gradFirst': None,\n\t\t\t\t\t\t'gradSecond': gradSecond\n\t\t\t\t\t}))(\n\t\t\t\t\t\tgrid=tuple([ int((n + 512 - 1) / 512), 1, 1 ]),\n\t\t\t\t\t\tblock=tuple([ 512, 1, 1 ]),\n\t\t\t\t\t\targs=[ n, intSample, rbot0.data_ptr(), rbot1.data_ptr(), gradOutput.data_ptr(), None, gradSecond.data_ptr() ]\n\t\t\t\t\t)\n\t\t\t\t# end\n\t\t\t# end\n\n\t\telif first.is_cuda == False:\n\t\t\traise NotImplementedError()\n\n\t\t# end\n\n\t\treturn gradFirst, gradSecond\n\t# end\n# end\n\ndef FunctionCorrelation(tenFirst, tenSecond):\n\treturn _FunctionCorrelation.apply(tenFirst, tenSecond)\n# end\n\nclass ModuleCorrelation(torch.nn.Module):\n\tdef __init__(self):\n\t\tsuper(ModuleCorrelation, self).__init__()\n\t# end\n\n\tdef forward(self, tenFirst, tenSecond):\n\t\treturn _FunctionCorrelation.apply(tenFirst, tenSecond)\n\t# end\n# end"
  },
  {
    "path": "Network/VOFlowNet.py",
    "content": "# Software License Agreement (BSD License)\n#\n# Copyright (c) 2020, Wenshan Wang, CMU\n# All rights reserved.\n#\n# Redistribution and use in source and binary forms, with or without\n# modification, are permitted provided that the following conditions\n# are met:\n#\n#  * Redistributions of source code must retain the above copyright\n#    notice, this list of conditions and the following disclaimer.\n#  * Redistributions in binary form must reproduce the above\n#    copyright notice, this list of conditions and the following\n#    disclaimer in the documentation and/or other materials provided\n#    with the distribution.\n#  * Neither the name of CMU nor the names of its\n#    contributors may be used to endorse or promote products derived\n#    from this software without specific prior written permission.\n#\n# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n# \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS\n# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE\n# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,\n# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,\n# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\n# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\n# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT\n# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN\n# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\n# POSSIBILITY OF SUCH DAMAGE.\n\nimport torch \nimport torch.nn as nn\nimport torch.nn.functional as F\nimport math\n\ndef conv(in_planes, out_planes, kernel_size=3, stride=2, padding=1, dilation=1, bn_layer=False, bias=True):\n    if bn_layer:\n        return nn.Sequential(\n            nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, padding=padding, stride=stride, dilation=dilation, bias=bias),\n            nn.BatchNorm2d(out_planes),\n            nn.ReLU(inplace=True)\n        )\n    else: \n        return nn.Sequential(\n            nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, padding=padding, stride=stride, dilation=dilation),\n            nn.ReLU(inplace=True)\n        )\n\ndef linear(in_planes, out_planes):\n    return nn.Sequential(\n        nn.Linear(in_planes, out_planes), \n        nn.ReLU(inplace=True)\n        )\n\nclass BasicBlock(nn.Module):\n    expansion = 1\n    def __init__(self, inplanes, planes, stride, downsample, pad, dilation):\n        super(BasicBlock, self).__init__()\n\n        self.conv1 = conv(inplanes, planes, 3, stride, pad, dilation)\n        self.conv2 = nn.Conv2d(planes, planes, 3, 1, pad, dilation)\n\n        self.downsample = downsample\n        self.stride = stride\n\n    def forward(self, x):\n        out = self.conv1(x)\n        out = self.conv2(out)\n\n        if self.downsample is not None:\n            x = self.downsample(x)\n        out += x\n\n        return F.relu(out, inplace=True)\n\nclass VOFlowRes(nn.Module):\n    def __init__(self):\n        super(VOFlowRes, self).__init__()\n        inputnum = 5\n        blocknums = [2,2,3,4,6,7,3]\n        outputnums = [32,64,64,128,128,256,256]\n\n        self.firstconv = nn.Sequential(conv(inputnum, 32, 3, 2, 1, 1, False),\n                                       conv(32, 32, 3, 1, 1, 1),\n                                       conv(32, 32, 3, 1, 1, 1))\n\n        self.inplanes = 32\n\n        self.layer1 = self._make_layer(BasicBlock, outputnums[2], blocknums[2], 2, 1, 1) # 40 x 28\n        self.layer2 = self._make_layer(BasicBlock, outputnums[3], blocknums[3], 2, 1, 1) # 20 x 14\n        self.layer3 = self._make_layer(BasicBlock, outputnums[4], blocknums[4], 2, 1, 1) # 10 x 7\n        self.layer4 = self._make_layer(BasicBlock, outputnums[5], blocknums[5], 2, 1, 1) # 5 x 4\n        self.layer5 = self._make_layer(BasicBlock, outputnums[6], blocknums[6], 2, 1, 1) # 3 x 2\n        fcnum = outputnums[6] * 6\n\n        fc1_trans = linear(fcnum, 128)\n        fc2_trans = linear(128,32)\n        fc3_trans = nn.Linear(32,3)\n\n        fc1_rot = linear(fcnum, 128)\n        fc2_rot = linear(128,32)\n        fc3_rot = nn.Linear(32,3)\n\n\n        self.voflow_trans = nn.Sequential(fc1_trans, fc2_trans, fc3_trans)\n        self.voflow_rot = nn.Sequential(fc1_rot, fc2_rot, fc3_rot)\n\n\n    def _make_layer(self, block, planes, blocks, stride, pad, dilation):\n        downsample = None\n        if stride != 1 or self.inplanes != planes * block.expansion:\n           downsample = nn.Conv2d(self.inplanes, planes * block.expansion,\n                          kernel_size=1, stride=stride)\n\n        layers = []\n        layers.append(block(self.inplanes, planes, stride, downsample, pad, dilation))\n        self.inplanes = planes * block.expansion\n        for i in range(1, blocks):\n            layers.append(block(self.inplanes, planes,1,None,pad,dilation))\n\n        return nn.Sequential(*layers)\n\n    def forward(self, x):\n        x = self.firstconv(x)\n        x = self.layer1(x)\n        x = self.layer2(x)\n        x = self.layer3(x)\n        x = self.layer4(x)\n        x = self.layer5(x)\n        \n        x = x.view(x.shape[0], -1)\n        x_trans = self.voflow_trans(x)\n        x_rot = self.voflow_rot(x)\n        return torch.cat((x_trans, x_rot), dim=1)"
  },
  {
    "path": "Network/VONet.py",
    "content": "# Software License Agreement (BSD License)\n#\n# Copyright (c) 2020, Wenshan Wang, Yaoyu Hu,  CMU\n# All rights reserved.\n#\n# Redistribution and use in source and binary forms, with or without\n# modification, are permitted provided that the following conditions\n# are met:\n#\n#  * Redistributions of source code must retain the above copyright\n#    notice, this list of conditions and the following disclaimer.\n#  * Redistributions in binary form must reproduce the above\n#    copyright notice, this list of conditions and the following\n#    disclaimer in the documentation and/or other materials provided\n#    with the distribution.\n#  * Neither the name of CMU nor the names of its\n#    contributors may be used to endorse or promote products derived\n#    from this software without specific prior written permission.\n#\n# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n# \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS\n# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE\n# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,\n# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,\n# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\n# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\n# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT\n# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN\n# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\n# POSSIBILITY OF SUCH DAMAGE.\n\nimport torch \nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom .PWC import PWCDCNet as FlowNet\nfrom .VOFlowNet import VOFlowRes as FlowPoseNet\n\nclass VONet(nn.Module):\n    def __init__(self):\n        super(VONet, self).__init__()\n\n        self.flowNet     = FlowNet()\n        self.flowPoseNet = FlowPoseNet()\n\n    def forward(self, x, only_flow=False, only_pose=False):\n        '''\n        x[0]: rgb frame t-1\n        x[1]: rgb frame t\n        x[2]: intrinsics\n        x[3]: flow t-1 -> t (optional)\n        x[4]: motion segmentation mask\n        '''\n        # import ipdb;ipdb.set_trace()\n        if not only_pose:\n            flow_out = self.flowNet(torch.cat((x[0], x[1]), dim=1))\n\n            if only_flow:\n                return flow_out, None\n            \n            flow = flow_out[0]\n\n        else:\n            assert(len(x) > 3)\n            flow_out = None\n\n        if len(x) > 3 and x[3] is not None:\n            flow_input = x[3]\n        else:\n            flow_input = flow\n\n        # Mask out input flow using the segmentation result\n        assert(len(x) > 4)\n        mask = torch.gt(x[4], 0)\n        for i in range(flow_input.shape[0]):\n            zeros = torch.cat([mask[i], ]*2, dim=0)\n            flow_input[i][zeros] = 0\n\n        flow_input = torch.cat((flow_input, 1 - x[4]), dim=1)  # segmentation layer\n        flow_input = torch.cat((flow_input, x[2]), dim=1)  # intrinsics layer\n    \n        pose = self.flowPoseNet(flow_input)\n\n        return flow_out, pose\n"
  },
  {
    "path": "Network/__init__.py",
    "content": ""
  },
  {
    "path": "Network/rigidmask/.gitignore",
    "content": "__pycache__\n"
  },
  {
    "path": "Network/rigidmask/VCNplus.py",
    "content": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom torch.autograd import Variable\nimport numpy as np\nimport math\nimport pdb\nimport cv2\nimport kornia\n\nfrom .submodule import pspnet, bfmodule, bfmodule_feat, conv, compute_geo_costs, get_skew_mat, get_intrinsics\nfrom .conv4d import sepConv4d, butterfly4D\nfrom scipy.spatial.transform import Rotation\nfrom .det import create_model\n\nclass flow_reg(nn.Module):\n    \"\"\"\n    Soft winner-take-all that selects the most likely diplacement.\n    Set ent=True to enable entropy output.\n    Set maxdisp to adjust maximum allowed displacement towards one side.\n        maxdisp=4 searches for a 9x9 region.\n    Set fac to squeeze search window.\n        maxdisp=4 and fac=2 gives search window of 9x5\n    \"\"\"\n    def __init__(self, size, ent=False, maxdisp = int(4), fac=1):\n        B,W,H = size\n        super(flow_reg, self).__init__()\n        self.ent = ent\n        self.md = maxdisp\n        self.fac = fac\n        self.truncated = True\n        self.wsize = 3  # by default using truncation 7x7\n\n        flowrangey = range(-maxdisp,maxdisp+1)\n        flowrangex = range(-int(maxdisp//self.fac),int(maxdisp//self.fac)+1)\n        meshgrid = np.meshgrid(flowrangex,flowrangey)\n        flowy = np.tile( np.reshape(meshgrid[0],[1,2*maxdisp+1,2*int(maxdisp//self.fac)+1,1,1]), (B,1,1,H,W) )\n        flowx = np.tile( np.reshape(meshgrid[1],[1,2*maxdisp+1,2*int(maxdisp//self.fac)+1,1,1]), (B,1,1,H,W) )\n        self.register_buffer('flowx',torch.Tensor(flowx))\n        self.register_buffer('flowy',torch.Tensor(flowy))\n\n        self.pool3d = nn.MaxPool3d((self.wsize*2+1,self.wsize*2+1,1),stride=1,padding=(self.wsize,self.wsize,0))\n\n    def forward(self, x):\n        b,u,v,h,w = x.shape\n        oldx = x\n\n        if self.truncated:\n            # truncated softmax\n            x = x.view(b,u*v,h,w)\n\n            idx = x.argmax(1)[:,np.newaxis]\n            if x.is_cuda:\n                mask = Variable(torch.cuda.HalfTensor(b,u*v,h,w)).fill_(0)\n            else:\n                mask = Variable(torch.FloatTensor(b,u*v,h,w)).fill_(0)\n            mask.scatter_(1,idx,1)\n            mask = mask.view(b,1,u,v,-1)\n            mask = self.pool3d(mask)[:,0].view(b,u,v,h,w)\n\n            ninf = x.clone().fill_(-np.inf).view(b,u,v,h,w)\n            x = torch.where(mask.byte(),oldx,ninf)\n        else:\n            self.wsize = (np.sqrt(u*v)-1)/2\n\n        b,u,v,h,w = x.shape\n        x = F.softmax(x.view(b,-1,h,w),1).view(b,u,v,h,w)\n        if np.isnan(x.min().detach().cpu()):\n            #pdb.set_trace()\n            x[torch.isnan(x)] = F.softmax(oldx[torch.isnan(x)])\n        outx = torch.sum(torch.sum(x*self.flowx,1),1,keepdim=True)\n        outy = torch.sum(torch.sum(x*self.flowy,1),1,keepdim=True)\n\n        if self.ent:\n            # local\n            local_entropy = (-x*torch.clamp(x,1e-9,1-1e-9).log()).sum(1).sum(1)[:,np.newaxis]\n            if self.wsize == 0:\n                local_entropy[:] = 1.\n            else:\n                local_entropy /= np.log((self.wsize*2+1)**2)\n\n            # global\n            x = F.softmax(oldx.view(b,-1,h,w),1).view(b,u,v,h,w)\n            global_entropy = (-x*torch.clamp(x,1e-9,1-1e-9).log()).sum(1).sum(1)[:,np.newaxis]\n            global_entropy /= np.log(x.shape[1]*x.shape[2])\n            return torch.cat([outx,outy],1),torch.cat([local_entropy, global_entropy],1)\n        else:\n            return torch.cat([outx,outy],1),None\n\n\nclass WarpModule(nn.Module):\n    \"\"\"\n    taken from https://github.com/NVlabs/PWC-Net/blob/master/PyTorch/models/PWCNet.py\n    \"\"\"\n    def __init__(self, size):\n        super(WarpModule, self).__init__()\n        B,W,H = size\n        # mesh grid \n        xx = torch.arange(0, W).view(1,-1).repeat(H,1)\n        yy = torch.arange(0, H).view(-1,1).repeat(1,W)\n        xx = xx.view(1,1,H,W).repeat(B,1,1,1)\n        yy = yy.view(1,1,H,W).repeat(B,1,1,1)\n        self.register_buffer('grid',torch.cat((xx,yy),1).float())\n\n    def forward(self, x, flo):\n        \"\"\"\n        warp an image/tensor (im2) back to im1, according to the optical flow\n\n        x: [B, C, H, W] (im2)\n        flo: [B, 2, H, W] flow\n\n        \"\"\"\n        B, C, H, W = x.size()\n        vgrid = self.grid + flo\n\n        # scale grid to [-1,1] \n        vgrid[:,0,:,:] = 2.0*vgrid[:,0,:,:]/max(W-1,1)-1.0\n        vgrid[:,1,:,:] = 2.0*vgrid[:,1,:,:]/max(H-1,1)-1.0\n\n        vgrid = vgrid.permute(0,2,3,1)        \n        #output = nn.functional.grid_sample(x, vgrid)\n        output = nn.functional.grid_sample(x, vgrid, align_corners=True)\n        mask = ((vgrid[:,:,:,0].abs()<1) * (vgrid[:,:,:,1].abs()<1)) >0\n        return output*mask.unsqueeze(1).float(), mask\n\n\ndef get_grid(B,H,W):\n    meshgrid_base = np.meshgrid(range(0,W), range(0,H))[::-1]\n    basey = np.reshape(meshgrid_base[0],[1,1,1,H,W])\n    basex = np.reshape(meshgrid_base[1],[1,1,1,H,W])\n    grid = torch.tensor(np.concatenate((basex.reshape((-1,H,W,1)),basey.reshape((-1,H,W,1))),-1)).cuda().float()\n    return grid.view(1,1,H,W,2)\n\n\nclass SegNet(nn.Module):\n    \"\"\"\n    Motion Segmentation Network\n    \"\"\"\n    def __init__(self, size, md=[4,4,4,4,4], fac=1., exp_unc=True):\n        super(SegNet,self).__init__()\n        self.md = md\n        self.fac = fac\n        use_entropy = True\n        withbn = True\n\n        ## pspnet\n        self.pspnet = pspnet(is_proj=False)\n\n        ### Volumetric-UNet\n        fdima1 = 128 # 6/5/4\n        fdima2 = 64 # 3/2\n        fdimb1 = 16 # 6/5/4/3\n        fdimb2 = 12 # 2\n\n        full=False\n        self.f6 = butterfly4D(fdima1, fdimb1,withbn=withbn,full=full)\n        self.p6 = sepConv4d(fdimb1,fdimb1, with_bn=False, full=full)\n\n        self.f5 = butterfly4D(fdima1, fdimb1,withbn=withbn, full=full)\n        self.p5 = sepConv4d(fdimb1,fdimb1, with_bn=False,full=full)\n\n        self.f4 = butterfly4D(fdima1, fdimb1,withbn=withbn,full=full)\n        self.p4 = sepConv4d(fdimb1,fdimb1, with_bn=False,full=full)\n\n        self.f3 = butterfly4D(fdima2, fdimb1,withbn=withbn,full=full)\n        self.p3 = sepConv4d(fdimb1,fdimb1, with_bn=False,full=full)\n\n        full=True\n        self.f2 = butterfly4D(fdima2, fdimb2,withbn=withbn,full=full)\n        self.p2 = sepConv4d(fdimb2,fdimb2, with_bn=False,full=full)\n    \n        self.flow_reg64 = flow_reg([fdimb1*size[0],size[1]//64,size[2]//64], ent=use_entropy, maxdisp=self.md[0], fac=self.fac)\n        self.flow_reg32 = flow_reg([fdimb1*size[0],size[1]//32,size[2]//32], ent=use_entropy, maxdisp=self.md[1])\n        self.flow_reg16 = flow_reg([fdimb1*size[0],size[1]//16,size[2]//16], ent=use_entropy, maxdisp=self.md[2])\n        self.flow_reg8 =  flow_reg([fdimb1*size[0],size[1]//8,size[2]//8]  , ent=use_entropy, maxdisp=self.md[3])\n        self.flow_reg4 =  flow_reg([fdimb2*size[0],size[1]//4,size[2]//4]  , ent=use_entropy, maxdisp=self.md[4])\n\n        self.warp5 = WarpModule([size[0],size[1]//32,size[2]//32])\n        self.warp4 = WarpModule([size[0],size[1]//16,size[2]//16])\n        self.warp3 = WarpModule([size[0],size[1]//8,size[2]//8])\n        self.warp2 = WarpModule([size[0],size[1]//4,size[2]//4])\n        if self.training:\n            self.warpx = WarpModule([size[0],size[1],size[2]])\n\n        ## hypotheses fusion modules, adopted from the refinement module of PWCNet\n        # https://github.com/NVlabs/PWC-Net/blob/master/PyTorch/models/PWCNet.py\n        # c6\n        self.dc6_conv1 = conv(128+4*fdimb1, 128, kernel_size=3, stride=1, padding=1,  dilation=1)\n        self.dc6_conv2 = conv(128,      128, kernel_size=3, stride=1, padding=2,  dilation=2)\n        self.dc6_conv3 = conv(128,      128, kernel_size=3, stride=1, padding=4,  dilation=4)\n        self.dc6_conv4 = conv(128,      96,  kernel_size=3, stride=1, padding=8,  dilation=8)\n        self.dc6_conv5 = conv(96,       64,  kernel_size=3, stride=1, padding=16, dilation=16)\n        self.dc6_conv6 = conv(64,       32,  kernel_size=3, stride=1, padding=1,  dilation=1)\n        self.dc6_conv7 = nn.Conv2d(32,2*fdimb1,kernel_size=3,stride=1,padding=1,bias=True)\n\n        # c5\n        self.dc5_conv1 = conv(128+4*fdimb1*2, 128, kernel_size=3, stride=1, padding=1,  dilation=1)\n        self.dc5_conv2 = conv(128,      128, kernel_size=3, stride=1, padding=2,  dilation=2)\n        self.dc5_conv3 = conv(128,      128, kernel_size=3, stride=1, padding=4,  dilation=4)\n        self.dc5_conv4 = conv(128,      96,  kernel_size=3, stride=1, padding=8,  dilation=8)\n        self.dc5_conv5 = conv(96,       64,  kernel_size=3, stride=1, padding=16, dilation=16)\n        self.dc5_conv6 = conv(64,       32,  kernel_size=3, stride=1, padding=1,  dilation=1)\n        self.dc5_conv7 = nn.Conv2d(32,2*fdimb1*2,kernel_size=3,stride=1,padding=1,bias=True)\n\n        # c4\n        self.dc4_conv1 = conv(128+4*fdimb1*3, 128, kernel_size=3, stride=1, padding=1,  dilation=1)\n        self.dc4_conv2 = conv(128,      128, kernel_size=3, stride=1, padding=2,  dilation=2)\n        self.dc4_conv3 = conv(128,      128, kernel_size=3, stride=1, padding=4,  dilation=4)\n        self.dc4_conv4 = conv(128,      96,  kernel_size=3, stride=1, padding=8,  dilation=8)\n        self.dc4_conv5 = conv(96,       64,  kernel_size=3, stride=1, padding=16, dilation=16)\n        self.dc4_conv6 = conv(64,       32,  kernel_size=3, stride=1, padding=1,  dilation=1)\n        self.dc4_conv7 = nn.Conv2d(32,2*fdimb1*3,kernel_size=3,stride=1,padding=1,bias=True)\n\n        # c3\n        self.dc3_conv1 = conv(64+16*fdimb1, 128, kernel_size=3, stride=1, padding=1,  dilation=1)\n        self.dc3_conv2 = conv(128,      128, kernel_size=3, stride=1, padding=2,  dilation=2)\n        self.dc3_conv3 = conv(128,      128, kernel_size=3, stride=1, padding=4,  dilation=4)\n        self.dc3_conv4 = conv(128,      96,  kernel_size=3, stride=1, padding=8,  dilation=8)\n        self.dc3_conv5 = conv(96,       64,  kernel_size=3, stride=1, padding=16, dilation=16)\n        self.dc3_conv6 = conv(64,       32,  kernel_size=3, stride=1, padding=1,  dilation=1)\n        self.dc3_conv7 = nn.Conv2d(32,8*fdimb1,kernel_size=3,stride=1,padding=1,bias=True)\n\n        # c2\n        self.dc2_conv1 = conv(64+16*fdimb1+4*fdimb2, 128, kernel_size=3, stride=1, padding=1,  dilation=1)\n        self.dc2_conv2 = conv(128,      128, kernel_size=3, stride=1, padding=2,  dilation=2)\n        self.dc2_conv3 = conv(128,      128, kernel_size=3, stride=1, padding=4,  dilation=4)\n        self.dc2_conv4 = conv(128,      96,  kernel_size=3, stride=1, padding=8,  dilation=8)\n        self.dc2_conv5 = conv(96,       64,  kernel_size=3, stride=1, padding=16, dilation=16)\n        self.dc2_conv6 = conv(64,       32,  kernel_size=3, stride=1, padding=1,  dilation=1)\n        self.dc2_conv7 = nn.Conv2d(32,4*2*fdimb1 + 2*fdimb2,kernel_size=3,stride=1,padding=1,bias=True)\n\n        self.dc6_conv = nn.Sequential(  self.dc6_conv1,\n                                        self.dc6_conv2,\n                                        self.dc6_conv3,\n                                        self.dc6_conv4,\n                                        self.dc6_conv5,\n                                        self.dc6_conv6,\n                                        self.dc6_conv7)\n        self.dc5_conv = nn.Sequential(  self.dc5_conv1,\n                                        self.dc5_conv2,\n                                        self.dc5_conv3,\n                                        self.dc5_conv4,\n                                        self.dc5_conv5,\n                                        self.dc5_conv6,\n                                        self.dc5_conv7)\n        self.dc4_conv = nn.Sequential(  self.dc4_conv1,\n                                        self.dc4_conv2,\n                                        self.dc4_conv3,\n                                        self.dc4_conv4,\n                                        self.dc4_conv5,\n                                        self.dc4_conv6,\n                                        self.dc4_conv7)\n        self.dc3_conv = nn.Sequential(  self.dc3_conv1,\n                                        self.dc3_conv2,\n                                        self.dc3_conv3,\n                                        self.dc3_conv4,\n                                        self.dc3_conv5,\n                                        self.dc3_conv6,\n                                        self.dc3_conv7)\n        self.dc2_conv = nn.Sequential(  self.dc2_conv1,\n                                        self.dc2_conv2,\n                                        self.dc2_conv3,\n                                        self.dc2_conv4,\n                                        self.dc2_conv5,\n                                        self.dc2_conv6,\n                                        self.dc2_conv7)\n\n        ## Out-of-range detection\n        self.dc6_convo = nn.Sequential(conv(128+4*fdimb1, 128, kernel_size=3, stride=1, padding=1,  dilation=1),\n                            conv(128,      128, kernel_size=3, stride=1, padding=2,  dilation=2),\n                            conv(128,      128, kernel_size=3, stride=1, padding=4,  dilation=4),\n                            conv(128,      96,  kernel_size=3, stride=1, padding=8,  dilation=8),\n                            conv(96,       64,  kernel_size=3, stride=1, padding=16, dilation=16),\n                            conv(64,       32,  kernel_size=3, stride=1, padding=1,  dilation=1),\n                            nn.Conv2d(32,1,kernel_size=3,stride=1,padding=1,bias=True))\n\n        self.dc5_convo = nn.Sequential(conv(128+2*4*fdimb1, 128, kernel_size=3, stride=1, padding=1,  dilation=1),\n                            conv(128,      128, kernel_size=3, stride=1, padding=2,  dilation=2),\n                            conv(128,      128, kernel_size=3, stride=1, padding=4,  dilation=4),\n                            conv(128,      96,  kernel_size=3, stride=1, padding=8,  dilation=8),\n                            conv(96,       64,  kernel_size=3, stride=1, padding=16, dilation=16),\n                            conv(64,       32,  kernel_size=3, stride=1, padding=1,  dilation=1),\n                            nn.Conv2d(32,1,kernel_size=3,stride=1,padding=1,bias=True))\n\n        self.dc4_convo = nn.Sequential(conv(128+3*4*fdimb1, 128, kernel_size=3, stride=1, padding=1,  dilation=1),\n                            conv(128,      128, kernel_size=3, stride=1, padding=2,  dilation=2),\n                            conv(128,      128, kernel_size=3, stride=1, padding=4,  dilation=4),\n                            conv(128,      96,  kernel_size=3, stride=1, padding=8,  dilation=8),\n                            conv(96,       64,  kernel_size=3, stride=1, padding=16, dilation=16),\n                            conv(64,       32,  kernel_size=3, stride=1, padding=1,  dilation=1),\n                            nn.Conv2d(32,1,kernel_size=3,stride=1,padding=1,bias=True))\n\n        self.dc3_convo = nn.Sequential(conv(64+16*fdimb1, 128, kernel_size=3, stride=1, padding=1,  dilation=1),\n                            conv(128,      128, kernel_size=3, stride=1, padding=2,  dilation=2),\n                            conv(128,      128, kernel_size=3, stride=1, padding=4,  dilation=4),\n                            conv(128,      96,  kernel_size=3, stride=1, padding=8,  dilation=8),\n                            conv(96,       64,  kernel_size=3, stride=1, padding=16, dilation=16),\n                            conv(64,       32,  kernel_size=3, stride=1, padding=1,  dilation=1),\n                            nn.Conv2d(32,1,kernel_size=3,stride=1,padding=1,bias=True))\n\n        self.dc2_convo = nn.Sequential(conv(64+16*fdimb1+4*fdimb2, 128, kernel_size=3, stride=1, padding=1,  dilation=1),\n                            conv(128,      128, kernel_size=3, stride=1, padding=2,  dilation=2),\n                            conv(128,      128, kernel_size=3, stride=1, padding=4,  dilation=4),\n                            conv(128,      96,  kernel_size=3, stride=1, padding=8,  dilation=8),\n                            conv(96,       64,  kernel_size=3, stride=1, padding=16, dilation=16),\n                            conv(64,       32,  kernel_size=3, stride=1, padding=1,  dilation=1),\n                            nn.Conv2d(32,1,kernel_size=3,stride=1,padding=1,bias=True))\n\n        # affine-exp\n        self.f3d2v1 = conv(64, 32, kernel_size=3, stride=1, padding=1,dilation=1) # \n        self.f3d2v2 = conv(1,   32, kernel_size=3, stride=1, padding=1,dilation=1) # \n        self.f3d2v3 = conv(1,   32, kernel_size=3, stride=1, padding=1,dilation=1) # \n        self.f3d2v4 = conv(1,   32, kernel_size=3, stride=1, padding=1,dilation=1) # \n        self.f3d2v5 = conv(64,   32, kernel_size=3, stride=1, padding=1,dilation=1) # \n        self.f3d2v6 = conv(12*81,   32, kernel_size=3, stride=1, padding=1,dilation=1) # \n        self.f3d2 = bfmodule(128-64,1)\n\n        # depth change net\n        self.dcnetv1 = conv(64, 32, kernel_size=3, stride=1, padding=1,dilation=1) # \n        self.dcnetv2 = conv(1,   32, kernel_size=3, stride=1, padding=1,dilation=1) # \n        self.dcnetv3 = conv(1,   32, kernel_size=3, stride=1, padding=1,dilation=1) # \n        self.dcnetv4 = conv(1,   32, kernel_size=3, stride=1, padding=1,dilation=1) # \n        self.dcnetv5 = conv(12*81,   32, kernel_size=3, stride=1, padding=1,dilation=1) # \n        self.dcnetv6 = conv(4,   32, kernel_size=3, stride=1, padding=1,dilation=1) # \n        if exp_unc:\n            self.dcnet = bfmodule(128,2)\n        else:\n            self.dcnet = bfmodule(128,1)\n            \n        # moseg net\n        self.fgnetv1 = conv(1,   16, kernel_size=3, stride=1, padding=1,dilation=1) # \n        self.fgnetv2 = conv(1,   16, kernel_size=3, stride=1, padding=1,dilation=1) # \n        self.fgnetv3 = conv(1,   16, kernel_size=3, stride=1, padding=1,dilation=1) # \n        self.fgnetv4 = conv(1,   16, kernel_size=3, stride=1, padding=1,dilation=1) # \n        self.fgnetv5 = conv(1,   16, kernel_size=3, stride=1, padding=1,dilation=1) # \n        self.fgnetv6 = conv(1,   16, kernel_size=3, stride=1, padding=1,dilation=1) # \n        self.fgnetv7 = conv(1,   16, kernel_size=3, stride=1, padding=1,dilation=1) # \n        self.fgnetv8 = conv(1,   16, kernel_size=3, stride=1, padding=1,dilation=1) # \n        self.fgnetv9 = conv(3,   16, kernel_size=3, stride=1, padding=1,dilation=1) # \n        self.fgnetv10 = conv(3,   16, kernel_size=3, stride=1, padding=1,dilation=1) # \n        self.fgnet = bfmodule_feat(208-3*16,7)\n\n        #from midas.midas_net import MidasNet\n        #self.midas = MidasNet('/data/gengshay/midas.pt', non_negative=True)\n        self.midas = torch.hub.load(\"intel-isl/MiDaS\", \"MiDaS\")\n        \n        # detection branch\n        self.det = create_model('dla_34', {'hm': 2, 'wh': 36}, 256,num_input=14)\n\n        for m in self.modules():\n            if isinstance(m, nn.Conv3d):\n                n = m.kernel_size[0] * m.kernel_size[1]*m.kernel_size[2] * m.out_channels\n                m.weight.data.normal_(0, math.sqrt(2. / n))\n                if hasattr(m.bias,'data'):\n                    m.bias.data.zero_()\n\n        self.facs = [self.fac,1,1,1,1]\n        self.warp_modules = nn.ModuleList([None, self.warp5, self.warp4, self.warp3, self.warp2])\n        self.f_modules = nn.ModuleList([self.f6, self.f5, self.f4, self.f3, self.f2])\n        self.p_modules = nn.ModuleList([self.p6, self.p5, self.p4, self.p3, self.p2])\n        self.reg_modules = nn.ModuleList([self.flow_reg64, self.flow_reg32, self.flow_reg16, self.flow_reg8, self.flow_reg4])\n        self.oor_modules = nn.ModuleList([self.dc6_convo, self.dc5_convo, self.dc4_convo, self.dc3_convo, self.dc2_convo])\n        self.fuse_modules = nn.ModuleList([self.dc6_conv, self.dc5_conv, self.dc4_conv, self.dc3_conv, self.dc2_conv])\n\n    def corrf(self, refimg_fea, targetimg_fea,maxdisp, fac=1):\n        if self.training:\n            #fast correlation function\n            b,c,h,w = refimg_fea.shape\n            targetimg_fea = F.unfold(targetimg_fea, (2*int(maxdisp)//fac+1,2*maxdisp+1), padding=(int(maxdisp)//fac,maxdisp)).view(b,c, 2*int(maxdisp)//fac+1,2*maxdisp+1,h,w).permute(0,1,3,2,4,5).contiguous()\n            cost = refimg_fea.view(b,c,h,w)[:,:,np.newaxis, np.newaxis]*targetimg_fea\n            cost = F.leaky_relu(cost, 0.1,inplace=True)\n        else:\n            #slow correlation function\n            b,c,height,width = refimg_fea.shape\n            if refimg_fea.is_cuda:\n                cost = Variable(torch.cuda.FloatTensor(b,c,2*maxdisp+1,2*int(maxdisp//fac)+1,height,width)).fill_(0.) # b,c,u,v,h,w\n            else:\n                cost = Variable(torch.FloatTensor(b,c,2*maxdisp+1,2*int(maxdisp//fac)+1,height,width)).fill_(0.) # b,c,u,v,h,w\n            for i in range(2*maxdisp+1):\n                ind = i-maxdisp\n                for j in range(2*int(maxdisp//fac)+1):\n                    indd = j-int(maxdisp//fac)\n                    feata = refimg_fea[:,:,max(0,-indd):height-indd,max(0,-ind):width-ind]\n                    featb = targetimg_fea[:,:,max(0,+indd):height+indd,max(0,ind):width+ind]\n                    diff = (feata*featb)\n                    cost[:, :, i,j,max(0,-indd):height-indd,max(0,-ind):width-ind]   = diff  # standard\n            cost = F.leaky_relu(cost, 0.1,inplace=True)\n        return cost\n\n    def cost_matching(self,up_flow, c1, c2, flowh, enth, level):\n        \"\"\"\n        up_flow: upsample coarse flow\n        c1: normalized feature of image 1\n        c2: normalized feature of image 2\n        flowh: flow hypotheses\n        enth: entropy\n        oor: out of range score for flow\n        \"\"\"\n\n        # normalize\n        c1n = c1 / (c1.norm(dim=1, keepdim=True)+1e-9)\n        c2n = c2 / (c2.norm(dim=1, keepdim=True)+1e-9)\n\n        # cost volume\n        if level == 0:\n            warp = c2n\n        else:\n            warp,_ = self.warp_modules[level](c2n, up_flow)\n\n        feat = self.corrf(c1n,warp,self.md[level],fac=self.facs[level])\n        feat = self.f_modules[level](feat) \n        cost = self.p_modules[level](feat) # b, 16, u,v,h,w\n\n        # soft WTA\n        b,c,u,v,h,w = cost.shape\n        cost = cost.view(-1,u,v,h,w)  # bx16, 9,9,h,w, also predict uncertainty from here\n        flowhh,enthh = self.reg_modules[level](cost) # bx16, 2, h, w\n        flowhh = flowhh.view(b,c,2,h,w)\n        if level > 0:\n            flowhh = flowhh + up_flow[:,np.newaxis]\n        flowhh = flowhh.view(b,-1,h,w) # b, 16*2, h, w\n        enthh =  enthh.view(b,-1,h,w) # b, 16*1, h, w\n\n        # append coarse hypotheses\n        if level == 0:\n            flowh = flowhh\n            enth = enthh\n        else:\n            flowh = torch.cat((flowhh, F.upsample(flowh.detach()*2, [flowhh.shape[2],flowhh.shape[3]], mode='bilinear')),1) # b, k2--k2, h, w\n            enth = torch.cat((enthh, F.upsample(enth, [flowhh.shape[2],flowhh.shape[3]], mode='bilinear')),1)\n\n        if self.training or level==4:\n            x = torch.cat((enth.detach(), flowh.detach(), c1),1)\n            oor = self.oor_modules[level](x)[:,0]\n        else: oor = None\n\n        # hypotheses fusion\n        x = torch.cat((enth.detach(), flowh.detach(), c1),1)\n        va = self.fuse_modules[level](x)\n        va = va.view(b,-1,2,h,w)\n        flow = ( flowh.view(b,-1,2,h,w) * F.softmax(va,1) ).sum(1) # b, 2k, 2, h, w\n\n        return flow, flowh, enth, oor\n\n    def affine(self,pref,flow, pw=1):\n        b,_,lh,lw=flow.shape\n        ptar = pref + flow\n        pw = 1\n        pref = F.unfold(pref, (pw*2+1,pw*2+1), padding=(pw)).view(b,2,(pw*2+1)**2,lh,lw)-pref[:,:,np.newaxis]\n        ptar = F.unfold(ptar, (pw*2+1,pw*2+1), padding=(pw)).view(b,2,(pw*2+1)**2,lh,lw)-ptar[:,:,np.newaxis] # b, 2,9,h,w\n        pref = pref.permute(0,3,4,1,2).reshape(b*lh*lw,2,(pw*2+1)**2)\n        ptar = ptar.permute(0,3,4,1,2).reshape(b*lh*lw,2,(pw*2+1)**2)\n\n        prefprefT = pref.matmul(pref.permute(0,2,1))\n        ppdet = prefprefT[:,0,0]*prefprefT[:,1,1]-prefprefT[:,1,0]*prefprefT[:,0,1]\n        ppinv = torch.cat((prefprefT[:,1,1:],-prefprefT[:,0,1:], -prefprefT[:,1:,0], prefprefT[:,0:1,0]),1).view(-1,2,2)/ppdet.clamp(1e-10,np.inf)[:,np.newaxis,np.newaxis]\n\n        Affine = ptar.matmul(pref.permute(0,2,1)).matmul(ppinv)\n        Error = (Affine.matmul(pref)-ptar).norm(2,1).mean(1).view(b,1,lh,lw)\n\n        Avol = (Affine[:,0,0]*Affine[:,1,1]-Affine[:,1,0]*Affine[:,0,1]).view(b,1,lh,lw).abs().clamp(1e-10,np.inf)\n        exp = Avol.sqrt()\n        mask = (exp>0.5) & (exp<2) & (Error<0.1)\n        mask = mask[:,0]\n\n        exp = exp.clamp(0.5,2)\n        exp[Error>0.1]=1\n        return exp, Error, mask\n\n    def forward_VCN(self, im):\n        bs = im.shape[0]//2\n\n        ### compute optical flow\n        c06,c05,c04,c03,c02 = self.pspnet(im)\n        c16 = c06[:bs];  c26 = c06[bs:]\n        c15 = c05[:bs];  c25 = c05[bs:]\n        c14 = c04[:bs];  c24 = c04[bs:]\n        c13 = c03[:bs];  c23 = c03[bs:]\n        c12 = c02[:bs];  c22 = c02[bs:]\n\n        ## matching 6\n        flow6, flow6h, ent6h, oor6 = self.cost_matching(None, c16, c26, None, None,level=0)\n\n        ## matching 5\n        up_flow6 = F.upsample(flow6, [im.size()[2]//32,im.size()[3]//32], mode='bilinear')*2\n        flow5, flow5h, ent5h, oor5 = self.cost_matching(up_flow6, c15, c25, flow6h, ent6h,level=1)\n\n        ## matching 4\n        up_flow5 = F.upsample(flow5, [im.size()[2]//16,im.size()[3]//16], mode='bilinear')*2\n        flow4, flow4h, ent4h, oor4 = self.cost_matching(up_flow5, c14, c24, flow5h, ent5h,level=2)\n\n        ## matching 3\n        up_flow4 = F.upsample(flow4, [im.size()[2]//8,im.size()[3]//8], mode='bilinear')*2\n        flow3, flow3h, ent3h, oor3 = self.cost_matching(up_flow4, c13, c23, flow4h, ent4h,level=3)\n\n        ## matching 2\n        up_flow3 = F.upsample(flow3, [im.size()[2]//4,im.size()[3]//4], mode='bilinear')*2\n        flow2, flow2h, ent2h, oor2 = self.cost_matching(up_flow3, c12, c22, flow3h, ent3h,level=4)\n\n        ### optical expansion\n        b,_,h,w = flow2.shape \n        exp2,err2,_ = self.affine(get_grid(b,h,w)[:,0].permute(0,3,1,2).repeat(b,1,1,1).clone(), flow2.detach(),pw=1)\n        x = torch.cat((\n                        self.f3d2v2(-exp2.log()),\n                        self.f3d2v3(err2),\n                        ),1)\n        dchange2 = -exp2.log()+1./200*self.f3d2(x)[0]\n\n        # depth change net\n        iexp2 = F.upsample(dchange2.clone(), [im.size()[2],im.size()[3]], mode='bilinear')\n        x = torch.cat((self.dcnetv1(c12.detach()),\n                        self.dcnetv2(dchange2.detach()),\n                        self.dcnetv3(-exp2.log()),\n                        self.dcnetv4(err2),\n                    ),1)\n        dcneto = 1./200*self.dcnet(x)[0]\n        dchange2 = dchange2.detach() + dcneto[:,:1]\n        dchange2 = F.upsample(dchange2, [im.size()[2],im.size()[3]], mode='bilinear')\n\n        if dcneto.shape[1]>1:\n            dc_unc = dcneto[:,1:2]\n        else:\n            dc_unc = torch.zeros_like(dcneto)\n        dc_unc = F.upsample(dc_unc, [im.size()[2],im.size()[3]], mode='bilinear')[:,0]\n\n        return flow2, oor2, dchange2, dc_unc\n\n    def forward(self,im,disc_aux=None,flowdc=None):\n        bs = im.shape[0]//2\n        flow2, oor2, dchange2, dc_unc = flowdc\n\n        ### rigid motion segmentation\n        ## pre-processing\n        Kinv, Kinv_n = get_intrinsics(disc_aux[0], noise=False)\n        # get full res flow/expansion inputs\n        H,W = im.size()[2:4]\n        flow = 4*F.upsample(flow2, [H,W], mode='bilinear').detach()\n        oor2 = F.upsample(oor2[:,np.newaxis], [H,W], mode='bilinear').detach()[:,0]\n        tau = (-dchange2[:,0]).exp().detach()\n\n        # use different number of correspondences for bg, obj segmentation and pose\n        fscale=128./H; fscalex=32./H;fscaled=448./H\n        hp0o = torch.cat( [torch.arange(0, W,out=torch.cuda.FloatTensor()).view(1,-1).repeat(H,1)[np.newaxis],  # 1,2,H,W\n                            torch.arange(0, H,out=torch.cuda.FloatTensor()).view(-1,1).repeat(1,W)[np.newaxis]], 0)[np.newaxis]\n        hp1o = hp0o + flow  # b,2,H,W\n        # to deal with input resizing (TODO: move it inside intrinsics)\n        hp0o[:,0] *= disc_aux[0][10]\n        hp0o[:,1] *= disc_aux[0][11]\n        hp1o[:,0] *= disc_aux[0][10]\n        hp1o[:,1] *= disc_aux[0][11]\n\n        # sample correspondence for object segmentation (fscaled)\n        hp0d = F.interpolate(hp0o,scale_factor=fscaled,mode='nearest')\n        hp1d = F.interpolate(hp1o,scale_factor=fscaled,mode='nearest')\n        _,_,hd,wd=hp0d.shape\n        hp0d = hp0d.view(1,2,-1).permute(0,2,1)\n        hp1d = hp1d.view(bs,2,-1).permute(0,2,1)\n        hp0d = torch.cat((hp0d,torch.ones(1,hp0d.shape[1],1).cuda()),-1)\n        hp1d = torch.cat((hp1d,torch.ones(bs,hp0d.shape[1],1).cuda()),-1)\n        uncd = torch.cat((F.interpolate(oor2[:,np.newaxis],scale_factor=fscaled,mode='nearest'),\n                F.interpolate(dc_unc[:,np.newaxis].detach(),scale_factor=fscaled,mode='nearest')),1)\n        taud = F.interpolate(tau[:,np.newaxis],scale_factor=fscaled,mode='nearest').view(bs,1,-1)\n\n        # sample correspondence for fg/bg seg (fscale)\n        hp0 = F.interpolate(hp0o,scale_factor=fscale,mode='nearest')\n        hp1 = F.interpolate(hp1o,scale_factor=fscale,mode='nearest')\n        _,_,h,w=hp0.shape\n        hp0 = hp0.view(1,2,-1).permute(0,2,1)\n        hp1 = hp1.view(bs,2,-1).permute(0,2,1)\n        hp0 = torch.cat((hp0,torch.ones(1,hp0.shape[1],1).cuda()),-1)\n        hp1 = torch.cat((hp1,torch.ones(bs,hp0.shape[1],1).cuda()),-1)\n        unc = torch.cat((F.interpolate(oor2[:,np.newaxis],scale_factor=fscale,mode='nearest'),\n                F.interpolate(dc_unc[:,np.newaxis].detach(),scale_factor=fscale,mode='nearest')),1)\n        tau = F.interpolate(tau[:,np.newaxis],scale_factor=fscale,mode='nearest').view(bs,1,-1)\n        \n        # sample correspondence for pose estimation (fscalex)\n        hp0x = F.interpolate(hp0o,scale_factor=fscalex,mode='nearest')\n        hp1x = F.interpolate(hp1o,scale_factor=fscalex,mode='nearest')\n        hp0x = hp0x.view(1,2,-1).permute(0,2,1)\n        hp1x = hp1x.view(bs,2,-1).permute(0,2,1)\n        hp0x = torch.cat((hp0x,torch.ones(1,hp0x.shape[1],1).cuda()),-1)\n        hp1x = torch.cat((hp1x,torch.ones(bs,hp0x.shape[1],1).cuda()),-1)\n\n        ## camera pose estimation\n        # using input pose from VONet\n        rot = torch.from_numpy(cv2.Rodrigues(disc_aux[2][:,:3])[0][:,0].astype(np.float32)).unsqueeze(0)\n        trans = torch.from_numpy(disc_aux[2][:,3:].astype(np.float32)).squeeze().unsqueeze(0)\n        trans = trans / trans.norm(2,1)[:,np.newaxis]\n        rot = rot.cuda().detach()\n        trans = trans.cuda().detach()\n        Ex = get_skew_mat(trans.cpu(), rot.cpu())\n\n        ## fg/bg segmentation\n        # rigidity cost maps\n        mcost00, mcost01, mcost1, mcost2, mcost3, mcost4, p3dmag,_ = compute_geo_costs(rot, trans, Ex, Kinv, hp0, hp1, tau, Kinv_n = Kinv_n)\n        \n        # depth contrast cost\n        with torch.no_grad():\n            self.midas.eval()\n            input_im  = (disc_aux[1].permute(0,3,1,2) -\\\n                    torch.Tensor([0.485, 0.456, 0.406]).cuda()[np.newaxis,:,np.newaxis,np.newaxis]) /\\\n                    torch.Tensor([0.229, 0.224, 0.225]).cuda()[np.newaxis,:,np.newaxis,np.newaxis]\n            wsize = int((input_im.shape[3] * 448./input_im.shape[2])//32*32)\n            input_im = F.interpolate(input_im, (448, wsize), mode='bilinear')\n            dispo = self.midas.forward(input_im)[None].clamp(1e-6,np.inf)\n\n        disp = F.interpolate(dispo, [h,w], mode='bilinear')\n        med_dgt = torch.median(disp.view(bs,-1),dim=-1)[0]\n        med_dp3d = torch.median(p3dmag.view(bs,-1),dim=-1)[0]\n        med_ratio = (med_dgt/med_dp3d)[:,np.newaxis,np.newaxis,np.newaxis]\n        # disp[disp == float('inf')] = p3dmag.view(bs,1,h,w)[disp == float('inf')] * med_ratio\n        log_dratio = ( med_ratio * p3dmag.view(bs,1,h,w) / disp.view(bs,1,h,w) ).log()\n        #pdb.set_trace()\n\n        # pseudo 3D point compute\n        depth = (1./ disp).view(bs,1,-1)\n        depth = depth.clamp(depth.median()/10, depth.median()*10)\n        p03d = depth *      Kinv.matmul(hp0.permute(0,2,1))\n        p13d = depth/tau*Kinv_n.matmul(hp1.permute(0,2,1))\n        p13d = kornia.angle_axis_to_rotation_matrix(rot).matmul(p13d)  # remove rotation\n        pts = torch.cat([p03d, p13d],-1) # bs, 3, 2*N\n        # normalize it \n        for i in range(bs):\n            pts[i] = pts[i] - pts[i].mean(-1,keepdims=True)  # zero mean\n            pts[i] = pts[i] / pts[i].flatten().std() # unit std\n        p03d = pts[:,:,:p03d.shape[-1]]\n        p13d = pts[:,:,p03d.shape[-1]:]\n\n        # fg/bg segmentation network\n        # the constants are empirical values multiplied to cost maps to \n        # ensure they have similar scales\n        costs = torch.cat((\n                        self.fgnetv1( 0.01*(mcost00+mcost01).view(bs,1,h,w).detach()),\n                        self.fgnetv2( 2e3*       mcost1.view(bs,1,h,w).detach()),\n                        self.fgnetv3(            mcost2.view(bs,1,h,w).detach()),\n                        self.fgnetv4(   30*      mcost3.view(bs,1,h,w).detach()),\n                        self.fgnetv5(            mcost4.view(bs,1,h,w).detach()),\n                        self.fgnetv6(  0.2*      unc[:,:1].view(bs,1,h,w).detach()),\n                        self.fgnetv7(  0.2*      unc[:,1:].view(bs,1,h,w).detach()),\n                        self.fgnetv8(   3*      log_dratio.view(bs,1,h,w).detach()),\n                        self.fgnetv9( p03d.view(bs,3,h,w).detach()),\n                        self.fgnetv10( p13d.view(bs,3,h,w).detach()),\n                    ),1)\n        x,featx = self.fgnet(costs)\n        fg_va =  1./20*x[:,:-1]\n        fg_res = 1./200*x[:,-1:]\n        fg_hps = torch.cat( (\n                                0.01*(mcost00+mcost01).view(bs,1,h,w).detach(),\n                                2e3* mcost1.view(bs,1,h,w).detach(),\n                                    mcost2.view(bs,1,h,w).detach(),\n                                30*  mcost3.view(bs,1,h,w).detach(),\n                                    mcost4.view(bs,1,h,w).detach(),\n                        3*      log_dratio.view(bs,1,h,w).detach(),\n                            ),1)\n        # fgmask: prelogits of 0-1 probability foreground vs background\n        fgmask = (fg_va * fg_hps).sum(1, keepdims=True) + fg_res\n        fgmask = F.upsample(fgmask, [im.size()[2],im.size()[3]], mode='bilinear')\n\n\n        return fgmask[0,0]"
  },
  {
    "path": "Network/rigidmask/__init__.py",
    "content": ""
  },
  {
    "path": "Network/rigidmask/conv4d.py",
    "content": "import pdb\nimport torch.nn as nn\nimport math\nimport torch\nfrom torch.nn.parameter import Parameter\nimport torch.nn.functional as F\nfrom torch.nn import Module\nfrom torch.nn.modules.conv import _ConvNd\nfrom torch.nn.modules.utils import _quadruple\nfrom torch.autograd import Variable\nfrom torch.nn import Conv2d\n\ndef conv4d(data,filters,bias=None,permute_filters=True,use_half=False):\n    \"\"\"\n    This is done by stacking results of multiple 3D convolutions, and is very slow.\n    Taken from https://github.com/ignacio-rocco/ncnet\n    \"\"\"\n    b,c,h,w,d,t=data.size()\n\n    data=data.permute(2,0,1,3,4,5).contiguous() # permute to avoid making contiguous inside loop    \n        \n    # Same permutation is done with filters, unless already provided with permutation\n    if permute_filters:\n        filters=filters.permute(2,0,1,3,4,5).contiguous() # permute to avoid making contiguous inside loop    \n\n    c_out=filters.size(1)\n    if use_half:\n        output = Variable(torch.HalfTensor(h,b,c_out,w,d,t),requires_grad=data.requires_grad)\n    else:\n        output = Variable(torch.zeros(h,b,c_out,w,d,t),requires_grad=data.requires_grad)\n    \n    padding=filters.size(0)//2\n    if use_half:\n        Z=Variable(torch.zeros(padding,b,c,w,d,t).half())\n    else:\n        Z=Variable(torch.zeros(padding,b,c,w,d,t))\n    \n    if data.is_cuda:\n        Z=Z.cuda(data.get_device())    \n        output=output.cuda(data.get_device())\n        \n    data_padded = torch.cat((Z,data,Z),0)\n    \n\n    for i in range(output.size(0)): # loop on first feature dimension\n        # convolve with center channel of filter (at position=padding)\n        output[i,:,:,:,:,:]=F.conv3d(data_padded[i+padding,:,:,:,:,:], \n                                     filters[padding,:,:,:,:,:], bias=bias, stride=1, padding=padding)\n        # convolve with upper/lower channels of filter (at postions [:padding] [padding+1:])\n        for p in range(1,padding+1):\n            output[i,:,:,:,:,:]=output[i,:,:,:,:,:]+F.conv3d(data_padded[i+padding-p,:,:,:,:,:], \n                                                             filters[padding-p,:,:,:,:,:], bias=None, stride=1, padding=padding)\n            output[i,:,:,:,:,:]=output[i,:,:,:,:,:]+F.conv3d(data_padded[i+padding+p,:,:,:,:,:], \n                                                             filters[padding+p,:,:,:,:,:], bias=None, stride=1, padding=padding)\n\n    output=output.permute(1,2,0,3,4,5).contiguous()\n    return output\n\nclass Conv4d(_ConvNd):\n    \"\"\"Applies a 4D convolution over an input signal composed of several input\n    planes.\n    \"\"\"\n\n    def __init__(self, in_channels, out_channels, kernel_size, bias=True, pre_permuted_filters=True): \n        # stride, dilation and groups !=1 functionality not tested \n        stride=1\n        dilation=1\n        groups=1\n        # zero padding is added automatically in conv4d function to preserve tensor size\n        padding = 0\n        kernel_size = _quadruple(kernel_size)\n        stride = _quadruple(stride)\n        padding = _quadruple(padding)\n        dilation = _quadruple(dilation)\n        super(Conv4d, self).__init__(\n            in_channels, out_channels, kernel_size, stride, padding, dilation,\n            False, _quadruple(0), groups, bias)  \n        # weights will be sliced along one dimension during convolution loop\n        # make the looping dimension to be the first one in the tensor, \n        # so that we don't need to call contiguous() inside the loop\n        self.pre_permuted_filters=pre_permuted_filters\n        if self.pre_permuted_filters:\n            self.weight.data=self.weight.data.permute(2,0,1,3,4,5).contiguous()\n        self.use_half=False\n    #    self.isbias = bias\n    #    if not self.isbias:\n    #        self.bn = torch.nn.BatchNorm1d(out_channels)\n\n\n    def forward(self, input):\n        out = conv4d(input, self.weight, bias=self.bias,permute_filters=not self.pre_permuted_filters,use_half=self.use_half) # filters pre-permuted in constructor\n    #    if not self.isbias:\n    #        b,c,u,v,h,w = out.shape\n    #        out = self.bn(out.view(b,c,-1)).view(b,c,u,v,h,w)\n        return out\n\nclass fullConv4d(torch.nn.Module):\n    def __init__(self, in_channels, out_channels, kernel_size, bias=True, pre_permuted_filters=True):\n        super(fullConv4d, self).__init__()\n        self.conv = Conv4d(in_channels, out_channels, kernel_size, bias=bias, pre_permuted_filters=pre_permuted_filters)\n        self.isbias = bias\n        if not self.isbias:\n            self.bn = torch.nn.BatchNorm1d(out_channels)\n\n    def forward(self, input):\n        out = self.conv(input)\n        if not self.isbias:\n            b,c,u,v,h,w = out.shape\n            out = self.bn(out.view(b,c,-1)).view(b,c,u,v,h,w)\n        return out\n\nclass butterfly4D(torch.nn.Module):\n    '''\n    butterfly 4d\n    '''\n    def __init__(self, fdima, fdimb, withbn=True, full=True,groups=1):\n        super(butterfly4D, self).__init__()\n        self.proj = nn.Sequential(projfeat4d(fdima, fdimb, 1, with_bn=withbn,groups=groups),\n                                  nn.ReLU(inplace=True),)\n        self.conva1 = sepConv4dBlock(fdimb,fdimb,with_bn=withbn, stride=(2,1,1),full=full,groups=groups)\n        self.conva2 = sepConv4dBlock(fdimb,fdimb,with_bn=withbn, stride=(2,1,1),full=full,groups=groups)\n        self.convb3 = sepConv4dBlock(fdimb,fdimb,with_bn=withbn, stride=(1,1,1),full=full,groups=groups)\n        self.convb2 = sepConv4dBlock(fdimb,fdimb,with_bn=withbn, stride=(1,1,1),full=full,groups=groups)\n        self.convb1 = sepConv4dBlock(fdimb,fdimb,with_bn=withbn, stride=(1,1,1),full=full,groups=groups)\n\n    #@profile\n    def forward(self,x):\n        out = self.proj(x)\n        b,c,u,v,h,w = out.shape # 9x9\n\n        out1 = self.conva1(out) # 5x5, 3\n        _,c1,u1,v1,h1,w1 = out1.shape\n\n        out2 = self.conva2(out1) # 3x3, 9\n        _,c2,u2,v2,h2,w2 = out2.shape\n\n        out2 = self.convb3(out2) # 3x3, 9\n\n        tout1 = F.upsample(out2.view(b,c,u2,v2,-1),(u1,v1,h2*w2),mode='trilinear').view(b,c,u1,v1,h2,w2) # 5x5\n        tout1 = F.upsample(tout1.view(b,c,-1,h2,w2),(u1*v1,h1,w1),mode='trilinear').view(b,c,u1,v1,h1,w1) # 5x5\n        out1 = tout1 + out1\n        out1 = self.convb2(out1)\n\n        tout = F.upsample(out1.view(b,c,u1,v1,-1),(u,v,h1*w1),mode='trilinear').view(b,c,u,v,h1,w1)\n        tout = F.upsample(tout.view(b,c,-1,h1,w1),(u*v,h,w),mode='trilinear').view(b,c,u,v,h,w)\n        out = tout + out\n        out = self.convb1(out)\n\n        return out\n\n\n\nclass projfeat4d(torch.nn.Module):\n    '''\n    Turn 3d projection into 2d projection\n    '''\n    def __init__(self, in_planes, out_planes, stride, with_bn=True,groups=1):\n        super(projfeat4d, self).__init__()\n        self.with_bn = with_bn\n        self.stride = stride\n        self.conv1 = nn.Conv3d(in_planes, out_planes, 1, (stride,stride,1), padding=0,bias=not with_bn,groups=groups)\n        self.bn = nn.BatchNorm3d(out_planes)\n\n    def forward(self,x):\n        b,c,u,v,h,w = x.size()\n        x = self.conv1(x.view(b,c,u,v,h*w))\n        if self.with_bn:\n            x = self.bn(x)\n        _,c,u,v,_ = x.shape\n        x = x.view(b,c,u,v,h,w)\n        return x\n\nclass sepConv4d(torch.nn.Module):\n    '''\n    Separable 4d convolution block as 2 3D convolutions\n    '''\n    def __init__(self, in_planes, out_planes, stride=(1,1,1), with_bn=True, ksize=3, full=True,groups=1):\n        super(sepConv4d, self).__init__()\n        bias = not with_bn\n        self.isproj = False\n        self.stride = stride[0]\n        expand = 1\n\n        if with_bn:\n            if in_planes != out_planes:\n                self.isproj = True\n                self.proj = nn.Sequential(nn.Conv2d(in_planes, out_planes, 1, bias=bias, padding=0,groups=groups),\n                                          nn.BatchNorm2d(out_planes))\n            if full:\n                self.conv1 = nn.Sequential(nn.Conv3d(in_planes*expand, in_planes, (1,ksize,ksize), stride=(1,self.stride,self.stride), bias=bias, padding=(0,ksize//2,ksize//2),groups=groups),\n                                           nn.BatchNorm3d(in_planes))\n            else:\n                self.conv1 = nn.Sequential(nn.Conv3d(in_planes*expand, in_planes, (1,ksize,ksize), stride=1,                           bias=bias, padding=(0,ksize//2,ksize//2),groups=groups),\n                                           nn.BatchNorm3d(in_planes))\n            self.conv2 = nn.Sequential(nn.Conv3d(in_planes, in_planes*expand, (ksize,ksize,1), stride=(self.stride,self.stride,1), bias=bias, padding=(ksize//2,ksize//2,0),groups=groups),\n                                       nn.BatchNorm3d(in_planes*expand))\n        else:\n            if in_planes != out_planes:\n                self.isproj = True\n                self.proj = nn.Conv2d(in_planes, out_planes, 1, bias=bias, padding=0,groups=groups)\n            if full:\n                self.conv1 = nn.Conv3d(in_planes*expand, in_planes, (1,ksize,ksize), stride=(1,self.stride,self.stride), bias=bias, padding=(0,ksize//2,ksize//2),groups=groups)\n            else:\n                self.conv1 = nn.Conv3d(in_planes*expand, in_planes, (1,ksize,ksize), stride=1,                           bias=bias, padding=(0,ksize//2,ksize//2),groups=groups)\n            self.conv2 = nn.Conv3d(in_planes, in_planes*expand, (ksize,ksize,1), stride=(self.stride,self.stride,1), bias=bias, padding=(ksize//2,ksize//2,0),groups=groups)\n        self.relu = nn.ReLU(inplace=True)\n        \n    #@profile\n    def forward(self,x):\n        b,c,u,v,h,w = x.shape\n        x = self.conv2(x.view(b,c,u,v,-1))\n        b,c,u,v,_ = x.shape\n        x = self.relu(x)\n        x = self.conv1(x.view(b,c,-1,h,w))\n        b,c,_,h,w = x.shape\n\n        if self.isproj:\n            x = self.proj(x.view(b,c,-1,w))\n        x = x.view(b,-1,u,v,h,w)\n        return x\n\n\nclass sepConv4dBlock(torch.nn.Module):\n    '''\n    Separable 4d convolution block as 2 2D convolutions and a projection\n    layer\n    '''\n    def __init__(self, in_planes, out_planes, stride=(1,1,1), with_bn=True, full=True,groups=1):\n        super(sepConv4dBlock, self).__init__()\n        if in_planes == out_planes and stride==(1,1,1):\n            self.downsample = None\n        else:\n            if full:\n                self.downsample = sepConv4d(in_planes, out_planes, stride, with_bn=with_bn,ksize=1, full=full,groups=groups)\n            else:\n                self.downsample = projfeat4d(in_planes, out_planes,stride[0], with_bn=with_bn,groups=groups)\n        self.conv1 = sepConv4d(in_planes, out_planes, stride, with_bn=with_bn, full=full ,groups=groups)\n        self.conv2 = sepConv4d(out_planes, out_planes,(1,1,1), with_bn=with_bn, full=full,groups=groups)\n        self.relu1 = nn.ReLU(inplace=True)\n        self.relu2 = nn.ReLU(inplace=True)\n\n    #@profile\n    def forward(self,x):\n        out = self.relu1(self.conv1(x))\n        if self.downsample:\n            x = self.downsample(x)\n        out = self.relu2(x + self.conv2(out))\n        return out\n\n\n##import torch.backends.cudnn as cudnn\n##cudnn.benchmark = True\n#import time\n##im = torch.randn(9,64,9,160,224).cuda()\n##net = torch.nn.Conv3d(64, 64, 3).cuda()\n##net = Conv4d(1,1,3,bias=True,pre_permuted_filters=True).cuda()\n##net = sepConv4dBlock(2,2,stride=(1,1,1)).cuda()\n#\n##im = torch.randn(1,16,9,9,96,320).cuda()\n##net = sepConv4d(16,16,with_bn=False).cuda()\n#\n##im = torch.randn(1,16,81,96,320).cuda()\n##net = torch.nn.Conv3d(16,16,(1,3,3),padding=(0,1,1)).cuda()\n#\n##im = torch.randn(1,16,9,9,96*320).cuda()\n##net = torch.nn.Conv3d(16,16,(3,3,1),padding=(1,1,0)).cuda()\n#\n##im = torch.randn(10000,10,9,9).cuda()\n##net = torch.nn.Conv2d(10,10,3,padding=1).cuda()\n#\n##im = torch.randn(81,16,96,320).cuda()\n##net = torch.nn.Conv2d(16,16,3,padding=1).cuda()\n#c=   int(16 *1)\n#cp = int(16 *1)\n#h=int(96  *4)\n#w=int(320 *4)\n#k=3\n#im = torch.randn(1,c,h,w).cuda()\n#net = torch.nn.Conv2d(c,cp,k,padding=k//2).cuda()\n#\n#im2 = torch.randn(cp,k*k*c).cuda()\n#im1 = F.unfold(im, (k,k), padding=k//2)[0]\n# \n#\n#net(im)\n#net(im)\n#torch.mm(im2,im1)\n#torch.mm(im2,im1)\n#torch.cuda.synchronize()\n#beg = time.time()\n#for i in range(100):\n#    net(im)\n#    #im1 = F.unfold(im, (k,k), padding=k//2)[0]\n#    torch.mm(im2,im1)\n#torch.cuda.synchronize()\n#print('%f'%((time.time()-beg)*10.))\n"
  },
  {
    "path": "Network/rigidmask/det.py",
    "content": "from __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport torchvision.models as models\nimport torch\nimport torch.nn as nn\nimport os\n\nfrom .networks.msra_resnet import get_pose_net\nfrom .networks.dlav0 import get_pose_net as get_dlav0\nfrom .networks.pose_dla_dcn import get_pose_net as get_dla_dcn\nfrom .networks.resnet_dcn import get_pose_net as get_pose_net_dcn\nfrom .networks.large_hourglass import get_large_hourglass_net\n\n_model_factory = {\n  'res': get_pose_net, # default Resnet with deconv\n  'dlav0': get_dlav0, # default DLAup\n  'dla': get_dla_dcn,\n  'resdcn': get_pose_net_dcn,\n  'hourglass': get_large_hourglass_net,\n}\n\ndef create_model(arch, heads, head_conv,num_input):\n  num_layers = int(arch[arch.find('_') + 1:]) if '_' in arch else 0\n  arch = arch[:arch.find('_')] if '_' in arch else arch\n  get_model = _model_factory[arch]\n  model = get_model(num_layers=num_layers, heads=heads, head_conv=head_conv,num_input=num_input)\n  return model\n\ndef load_model(model, model_path, optimizer=None, resume=False, \n               lr=None, lr_step=None):\n  start_epoch = 0\n  checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage)\n  print('loaded {}, epoch {}'.format(model_path, checkpoint['epoch']))\n  state_dict_ = checkpoint['state_dict']\n  state_dict = {}\n  \n  # convert data_parallal to model\n  for k in state_dict_:\n    if k.startswith('module') and not k.startswith('module_list'):\n      state_dict[k[7:]] = state_dict_[k]\n    else:\n      state_dict[k] = state_dict_[k]\n  model_state_dict = model.state_dict()\n\n  # check loaded parameters and created model parameters\n  msg = 'If you see this, your model does not fully load the ' + \\\n        'pre-trained weight. Please make sure ' + \\\n        'you have correctly specified --arch xxx ' + \\\n        'or set the correct --num_classes for your own dataset.'\n  for k in state_dict:\n    if k in model_state_dict:\n      if state_dict[k].shape != model_state_dict[k].shape:\n        print('Skip loading parameter {}, required shape{}, '\\\n              'loaded shape{}. {}'.format(\n          k, model_state_dict[k].shape, state_dict[k].shape, msg))\n        state_dict[k] = model_state_dict[k]\n    else:\n      print('Drop parameter {}.'.format(k) + msg)\n  for k in model_state_dict:\n    if not (k in state_dict):\n      print('No param {}.'.format(k) + msg)\n      state_dict[k] = model_state_dict[k]\n  model.load_state_dict(state_dict, strict=False)\n\n  # resume optimizer parameters\n  if optimizer is not None and resume:\n    if 'optimizer' in checkpoint:\n      optimizer.load_state_dict(checkpoint['optimizer'])\n      start_epoch = checkpoint['epoch']\n      start_lr = lr\n      for step in lr_step:\n        if start_epoch >= step:\n          start_lr *= 0.1\n      for param_group in optimizer.param_groups:\n        param_group['lr'] = start_lr\n      print('Resumed optimizer with start lr', start_lr)\n    else:\n      print('No optimizer parameters in checkpoint.')\n  if optimizer is not None:\n    return model, optimizer, start_epoch\n  else:\n    return model\n\ndef save_model(path, epoch, model, optimizer=None):\n  if isinstance(model, torch.nn.DataParallel):\n    state_dict = model.module.state_dict()\n  else:\n    state_dict = model.state_dict()\n  data = {'epoch': epoch,\n          'state_dict': state_dict}\n  if not (optimizer is None):\n    data['optimizer'] = optimizer.state_dict()\n  torch.save(data, path)\n\n"
  },
  {
    "path": "Network/rigidmask/det_losses.py",
    "content": "# ------------------------------------------------------------------------------\n# Portions of this code are from\n# CornerNet (https://github.com/princeton-vl/CornerNet)\n# Copyright (c) 2018, University of Michigan\n# Licensed under the BSD 3-Clause License\n# ------------------------------------------------------------------------------\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport pdb\nimport torch\nimport torch.nn as nn\nfrom .det_utils import _transpose_and_gather_feat\nimport torch.nn.functional as F\n\n\ndef _slow_neg_loss(pred, gt):\n  '''focal loss from CornerNet'''\n  pos_inds = gt.eq(1)\n  neg_inds = gt.lt(1)\n\n  neg_weights = torch.pow(1 - gt[neg_inds], 4)\n\n  loss = 0\n  pos_pred = pred[pos_inds]\n  neg_pred = pred[neg_inds]\n\n  pos_loss = torch.log(pos_pred) * torch.pow(1 - pos_pred, 2)\n  neg_loss = torch.log(1 - neg_pred) * torch.pow(neg_pred, 2) * neg_weights\n\n  num_pos  = pos_inds.float().sum()\n  pos_loss = pos_loss.sum()\n  neg_loss = neg_loss.sum()\n\n  if pos_pred.nelement() == 0:\n    loss = loss - neg_loss\n  else:\n    loss = loss - (pos_loss + neg_loss) / num_pos\n  return loss\n\n\ndef _neg_loss(pred, gt, heat_logits):\n  ''' Modified focal loss. Exactly the same as CornerNet.\n      Runs faster and costs a little bit more memory\n    Arguments:\n      pred (batch x c x h x w)\n      gt_regr (batch x c x h x w)\n  '''\n  pos_inds = gt.eq(1).float()\n  neg_inds = gt.lt(1).float()\n\n  neg_weights = torch.pow(1 - gt, 4)\n\n  loss = 0\n\n  logpred = torch.nn.functional.log_softmax(heat_logits,1)\n  pos_loss = logpred[:,0:1] * torch.pow(1 - pred, 2) * pos_inds\n  neg_loss = logpred[:,1:2] * torch.pow(pred, 2) * neg_weights * neg_inds\n\n  num_pos  = pos_inds.float().sum()\n  pos_loss = pos_loss.sum()\n  neg_loss = neg_loss.sum()\n\n  if num_pos == 0:\n    loss = loss - neg_loss\n  else:\n    loss = loss - (pos_loss + neg_loss) / num_pos\n  return loss\n\ndef _not_faster_neg_loss(pred, gt):\n    pos_inds = gt.eq(1).float()\n    neg_inds = gt.lt(1).float()    \n    num_pos  = pos_inds.float().sum()\n    neg_weights = torch.pow(1 - gt, 4)\n\n    loss = 0\n    trans_pred = pred * neg_inds + (1 - pred) * pos_inds\n    weight = neg_weights * neg_inds + pos_inds\n    all_loss = torch.log(1 - trans_pred) * torch.pow(trans_pred, 2) * weight\n    all_loss = all_loss.sum()\n\n    if num_pos > 0:\n        all_loss /= num_pos\n    loss -=  all_loss\n    return loss\n\ndef _slow_reg_loss(regr, gt_regr, mask):\n    num  = mask.float().sum()\n    mask = mask.unsqueeze(2).expand_as(gt_regr)\n\n    regr    = regr[mask]\n    gt_regr = gt_regr[mask]\n    \n    regr_loss = nn.functional.smooth_l1_loss(regr, gt_regr, size_average=False)\n    regr_loss = regr_loss / (num + 1e-4)\n    return regr_loss\n\ndef _reg_loss(regr, gt_regr, mask):\n  ''' L1 regression loss\n    Arguments:\n      regr (batch x max_objects x dim)\n      gt_regr (batch x max_objects x dim)\n      mask (batch x max_objects)\n  '''\n  num = mask.float().sum()\n  mask = mask.unsqueeze(2).expand_as(gt_regr).float()\n\n  regr = regr * mask\n  gt_regr = gt_regr * mask\n    \n  regr_loss = nn.functional.smooth_l1_loss(regr, gt_regr, size_average=False)\n  regr_loss = regr_loss / (num + 1e-4)\n  return regr_loss\n\nclass FocalLoss(nn.Module):\n  '''nn.Module warpper for focal loss'''\n  def __init__(self):\n    super(FocalLoss, self).__init__()\n    self.neg_loss = _neg_loss\n\n  def forward(self, out, target, logits):\n    return self.neg_loss(out, target, logits)\n\nclass RegLoss(nn.Module):\n  '''Regression loss for an output tensor\n    Arguments:\n      output (batch x dim x h x w)\n      mask (batch x max_objects)\n      ind (batch x max_objects)\n      target (batch x max_objects x dim)\n  '''\n  def __init__(self):\n    super(RegLoss, self).__init__()\n  \n  def forward(self, output, mask, ind, target):\n    pred = _transpose_and_gather_feat(output, ind)\n    loss = _reg_loss(pred, target, mask)\n    return loss\n\nclass RegL1Loss(nn.Module):\n  def __init__(self):\n    super(RegL1Loss, self).__init__()\n  \n  def forward(self, output, mask, ind, target):\n    pred = _transpose_and_gather_feat(output, ind)\n    mask = mask.unsqueeze(2).expand_as(pred).float()\n    # loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean')\n    loss = F.l1_loss(pred * mask, target * mask, size_average=False)\n    loss = loss / (mask.sum() + 1e-4)\n    return loss\n\nclass NormRegL1Loss(nn.Module):\n  def __init__(self):\n    super(NormRegL1Loss, self).__init__()\n  \n  def forward(self, output, mask, ind, target):\n    pred = _transpose_and_gather_feat(output, ind)\n    mask = mask.unsqueeze(2).expand_as(pred).float()\n    # loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean')\n    pred = pred / (target + 1e-4)\n    target = target * 0 + 1\n    loss = F.l1_loss(pred * mask, target * mask, size_average=False)\n    loss = loss / (mask.sum() + 1e-4)\n    return loss\n\nclass RegWeightedL1Loss(nn.Module):\n  def __init__(self):\n    super(RegWeightedL1Loss, self).__init__()\n  \n  def forward(self, output, mask, ind, target):\n    pred = _transpose_and_gather_feat(output, ind)\n    mask = mask.float()\n    # loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean')\n    loss = F.l1_loss(pred * mask, target * mask, size_average=False)\n    loss = loss / (mask.sum() + 1e-4)\n    return loss\n\nclass L1Loss(nn.Module):\n  def __init__(self):\n    super(L1Loss, self).__init__()\n  \n  def forward(self, output, mask, ind, target):\n    pred = _transpose_and_gather_feat(output, ind)\n    mask = mask.unsqueeze(2).expand_as(pred).float()\n    loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean')\n    return loss\n\nclass BinRotLoss(nn.Module):\n  def __init__(self):\n    super(BinRotLoss, self).__init__()\n  \n  def forward(self, output, mask, ind, rotbin, rotres):\n    pred = _transpose_and_gather_feat(output, ind)\n    loss = compute_rot_loss(pred, rotbin, rotres, mask)\n    return loss\n\ndef compute_res_loss(output, target):\n    return F.smooth_l1_loss(output, target, reduction='elementwise_mean')\n\n# TODO: weight\ndef compute_bin_loss(output, target, mask):\n    mask = mask.expand_as(output)\n    output = output * mask.float()\n    return F.cross_entropy(output, target, reduction='elementwise_mean')\n\ndef compute_rot_loss(output, target_bin, target_res, mask):\n    # output: (B, 128, 8) [bin1_cls[0], bin1_cls[1], bin1_sin, bin1_cos, \n    #                 bin2_cls[0], bin2_cls[1], bin2_sin, bin2_cos]\n    # target_bin: (B, 128, 2) [bin1_cls, bin2_cls]\n    # target_res: (B, 128, 2) [bin1_res, bin2_res]\n    # mask: (B, 128, 1)\n    # import pdb; pdb.set_trace()\n    output = output.view(-1, 8)\n    target_bin = target_bin.view(-1, 2)\n    target_res = target_res.view(-1, 2)\n    mask = mask.view(-1, 1)\n    loss_bin1 = compute_bin_loss(output[:, 0:2], target_bin[:, 0], mask)\n    loss_bin2 = compute_bin_loss(output[:, 4:6], target_bin[:, 1], mask)\n    loss_res = torch.zeros_like(loss_bin1)\n    if target_bin[:, 0].nonzero().shape[0] > 0:\n        idx1 = target_bin[:, 0].nonzero()[:, 0]\n        valid_output1 = torch.index_select(output, 0, idx1.long())\n        valid_target_res1 = torch.index_select(target_res, 0, idx1.long())\n        loss_sin1 = compute_res_loss(\n          valid_output1[:, 2], torch.sin(valid_target_res1[:, 0]))\n        loss_cos1 = compute_res_loss(\n          valid_output1[:, 3], torch.cos(valid_target_res1[:, 0]))\n        loss_res += loss_sin1 + loss_cos1\n    if target_bin[:, 1].nonzero().shape[0] > 0:\n        idx2 = target_bin[:, 1].nonzero()[:, 0]\n        valid_output2 = torch.index_select(output, 0, idx2.long())\n        valid_target_res2 = torch.index_select(target_res, 0, idx2.long())\n        loss_sin2 = compute_res_loss(\n          valid_output2[:, 6], torch.sin(valid_target_res2[:, 1]))\n        loss_cos2 = compute_res_loss(\n          valid_output2[:, 7], torch.cos(valid_target_res2[:, 1]))\n        loss_res += loss_sin2 + loss_cos2\n    return loss_bin1 + loss_bin2 + loss_res\n"
  },
  {
    "path": "Network/rigidmask/det_utils.py",
    "content": "from __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport torch\nimport torch.nn as nn\n\ndef _sigmoid(x):\n  y = torch.clamp(x.sigmoid_(), min=1e-4, max=1-1e-4)\n  return y\n\ndef _gather_feat(feat, ind, mask=None):\n    dim  = feat.size(2)\n    ind  = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim)\n    feat = feat.gather(1, ind)\n    if mask is not None:\n        mask = mask.unsqueeze(2).expand_as(feat)\n        feat = feat[mask]\n        feat = feat.view(-1, dim)\n    return feat\n\ndef _transpose_and_gather_feat(feat, ind):\n    feat = feat.permute(0, 2, 3, 1).contiguous()\n    feat = feat.view(feat.size(0), -1, feat.size(3))\n    feat = _gather_feat(feat, ind)\n    return feat\n\ndef flip_tensor(x):\n    return torch.flip(x, [3])\n    # tmp = x.detach().cpu().numpy()[..., ::-1].copy()\n    # return torch.from_numpy(tmp).to(x.device)\n\ndef flip_lr(x, flip_idx):\n  tmp = x.detach().cpu().numpy()[..., ::-1].copy()\n  shape = tmp.shape\n  for e in flip_idx:\n    tmp[:, e[0], ...], tmp[:, e[1], ...] = \\\n      tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy()\n  return torch.from_numpy(tmp.reshape(shape)).to(x.device)\n\ndef flip_lr_off(x, flip_idx):\n  tmp = x.detach().cpu().numpy()[..., ::-1].copy()\n  shape = tmp.shape\n  tmp = tmp.reshape(tmp.shape[0], 17, 2, \n                    tmp.shape[2], tmp.shape[3])\n  tmp[:, :, 0, :, :] *= -1\n  for e in flip_idx:\n    tmp[:, e[0], ...], tmp[:, e[1], ...] = \\\n      tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy()\n  return torch.from_numpy(tmp.reshape(shape)).to(x.device)"
  },
  {
    "path": "Network/rigidmask/networks/DCNv2/.gitignore",
    "content": ".vscode\n.idea\n*.so\n*.o\n*pyc\n_ext\nbuild\nDCNv2.egg-info\ndist"
  },
  {
    "path": "Network/rigidmask/networks/DCNv2/DCN/__init__.py",
    "content": "from .dcn_v2 import *\n"
  },
  {
    "path": "Network/rigidmask/networks/DCNv2/DCN/dcn_v2.py",
    "content": "#!/usr/bin/env python\nfrom __future__ import absolute_import\nfrom __future__ import print_function\nfrom __future__ import division\n\nimport math\nimport torch\nfrom torch import nn\nfrom torch.autograd import Function\nfrom torch.nn.modules.utils import _pair\nfrom torch.autograd.function import once_differentiable\n\nimport _ext as _backend\n\n\nclass _DCNv2(Function):\n    @staticmethod\n    def forward(ctx, input, offset, mask, weight, bias,\n                stride, padding, dilation, deformable_groups):\n        ctx.stride = _pair(stride)\n        ctx.padding = _pair(padding)\n        ctx.dilation = _pair(dilation)\n        ctx.kernel_size = _pair(weight.shape[2:4])\n        ctx.deformable_groups = deformable_groups\n        output = _backend.dcn_v2_forward(input, weight, bias,\n                                         offset, mask,\n                                         ctx.kernel_size[0], ctx.kernel_size[1],\n                                         ctx.stride[0], ctx.stride[1],\n                                         ctx.padding[0], ctx.padding[1],\n                                         ctx.dilation[0], ctx.dilation[1],\n                                         ctx.deformable_groups)\n        ctx.save_for_backward(input, offset, mask, weight, bias)\n        return output\n\n    @staticmethod\n    @once_differentiable\n    def backward(ctx, grad_output):\n        input, offset, mask, weight, bias = ctx.saved_tensors\n        grad_input, grad_offset, grad_mask, grad_weight, grad_bias = \\\n            _backend.dcn_v2_backward(input, weight,\n                                     bias,\n                                     offset, mask,\n                                     grad_output,\n                                     ctx.kernel_size[0], ctx.kernel_size[1],\n                                     ctx.stride[0], ctx.stride[1],\n                                     ctx.padding[0], ctx.padding[1],\n                                     ctx.dilation[0], ctx.dilation[1],\n                                     ctx.deformable_groups)\n\n        return grad_input, grad_offset, grad_mask, grad_weight, grad_bias,\\\n            None, None, None, None,\n\n\ndcn_v2_conv = _DCNv2.apply\n\n\nclass DCNv2(nn.Module):\n\n    def __init__(self, in_channels, out_channels,\n                 kernel_size, stride, padding, dilation=1, deformable_groups=1):\n        super(DCNv2, self).__init__()\n        self.in_channels = in_channels\n        self.out_channels = out_channels\n        self.kernel_size = _pair(kernel_size)\n        self.stride = _pair(stride)\n        self.padding = _pair(padding)\n        self.dilation = _pair(dilation)\n        self.deformable_groups = deformable_groups\n\n        self.weight = nn.Parameter(torch.Tensor(\n            out_channels, in_channels, *self.kernel_size))\n        self.bias = nn.Parameter(torch.Tensor(out_channels))\n        self.reset_parameters()\n\n    def reset_parameters(self):\n        n = self.in_channels\n        for k in self.kernel_size:\n            n *= k\n        stdv = 1. / math.sqrt(n)\n        self.weight.data.uniform_(-stdv, stdv)\n        self.bias.data.zero_()\n\n    def forward(self, input, offset, mask):\n        assert 2 * self.deformable_groups * self.kernel_size[0] * self.kernel_size[1] == \\\n            offset.shape[1]\n        assert self.deformable_groups * self.kernel_size[0] * self.kernel_size[1] == \\\n            mask.shape[1]\n        return dcn_v2_conv(input, offset, mask,\n                           self.weight,\n                           self.bias,\n                           self.stride,\n                           self.padding,\n                           self.dilation,\n                           self.deformable_groups)\n\n\nclass DCN(DCNv2):\n\n    def __init__(self, in_channels, out_channels,\n                 kernel_size, stride, padding,\n                 dilation=1, deformable_groups=1):\n        super(DCN, self).__init__(in_channels, out_channels,\n                                  kernel_size, stride, padding, dilation, deformable_groups)\n\n        channels_ = self.deformable_groups * 3 * self.kernel_size[0] * self.kernel_size[1]\n        self.conv_offset_mask = nn.Conv2d(self.in_channels,\n                                          channels_,\n                                          kernel_size=self.kernel_size,\n                                          stride=self.stride,\n                                          padding=self.padding,\n                                          bias=True)\n        self.init_offset()\n\n    def init_offset(self):\n        self.conv_offset_mask.weight.data.zero_()\n        self.conv_offset_mask.bias.data.zero_()\n\n    def forward(self, input):\n        out = self.conv_offset_mask(input)\n        o1, o2, mask = torch.chunk(out, 3, dim=1)\n        offset = torch.cat((o1, o2), dim=1)\n        mask = torch.sigmoid(mask)\n        return dcn_v2_conv(input, offset, mask,\n                           self.weight, self.bias,\n                           self.stride,\n                           self.padding,\n                           self.dilation,\n                           self.deformable_groups)\n\n\n\nclass _DCNv2Pooling(Function):\n    @staticmethod\n    def forward(ctx, input, rois, offset,\n                spatial_scale,\n                pooled_size,\n                output_dim,\n                no_trans,\n                group_size=1,\n                part_size=None,\n                sample_per_part=4,\n                trans_std=.0):\n        ctx.spatial_scale = spatial_scale\n        ctx.no_trans = int(no_trans)\n        ctx.output_dim = output_dim\n        ctx.group_size = group_size\n        ctx.pooled_size = pooled_size\n        ctx.part_size = pooled_size if part_size is None else part_size\n        ctx.sample_per_part = sample_per_part\n        ctx.trans_std = trans_std\n\n        output, output_count = \\\n            _backend.dcn_v2_psroi_pooling_forward(input, rois, offset,\n                                                  ctx.no_trans, ctx.spatial_scale,\n                                                  ctx.output_dim, ctx.group_size,\n                                                  ctx.pooled_size, ctx.part_size,\n                                                  ctx.sample_per_part, ctx.trans_std)\n        ctx.save_for_backward(input, rois, offset, output_count)\n        return output\n\n    @staticmethod\n    @once_differentiable\n    def backward(ctx, grad_output):\n        input, rois, offset, output_count = ctx.saved_tensors\n        grad_input, grad_offset = \\\n            _backend.dcn_v2_psroi_pooling_backward(grad_output,\n                                                   input,\n                                                   rois,\n                                                   offset,\n                                                   output_count,\n                                                   ctx.no_trans,\n                                                   ctx.spatial_scale,\n                                                   ctx.output_dim,\n                                                   ctx.group_size,\n                                                   ctx.pooled_size,\n                                                   ctx.part_size,\n                                                   ctx.sample_per_part,\n                                                   ctx.trans_std)\n\n        return grad_input, None, grad_offset, \\\n            None, None, None, None, None, None, None, None\n\n\ndcn_v2_pooling = _DCNv2Pooling.apply\n\n\nclass DCNv2Pooling(nn.Module):\n\n    def __init__(self,\n                 spatial_scale,\n                 pooled_size,\n                 output_dim,\n                 no_trans,\n                 group_size=1,\n                 part_size=None,\n                 sample_per_part=4,\n                 trans_std=.0):\n        super(DCNv2Pooling, self).__init__()\n        self.spatial_scale = spatial_scale\n        self.pooled_size = pooled_size\n        self.output_dim = output_dim\n        self.no_trans = no_trans\n        self.group_size = group_size\n        self.part_size = pooled_size if part_size is None else part_size\n        self.sample_per_part = sample_per_part\n        self.trans_std = trans_std\n\n    def forward(self, input, rois, offset):\n        assert input.shape[1] == self.output_dim\n        if self.no_trans:\n            offset = input.new()\n        return dcn_v2_pooling(input, rois, offset,\n                              self.spatial_scale,\n                              self.pooled_size,\n                              self.output_dim,\n                              self.no_trans,\n                              self.group_size,\n                              self.part_size,\n                              self.sample_per_part,\n                              self.trans_std)\n\n\nclass DCNPooling(DCNv2Pooling):\n\n    def __init__(self,\n                 spatial_scale,\n                 pooled_size,\n                 output_dim,\n                 no_trans,\n                 group_size=1,\n                 part_size=None,\n                 sample_per_part=4,\n                 trans_std=.0,\n                 deform_fc_dim=1024):\n        super(DCNPooling, self).__init__(spatial_scale,\n                                         pooled_size,\n                                         output_dim,\n                                         no_trans,\n                                         group_size,\n                                         part_size,\n                                         sample_per_part,\n                                         trans_std)\n\n        self.deform_fc_dim = deform_fc_dim\n\n        if not no_trans:\n            self.offset_mask_fc = nn.Sequential(\n                nn.Linear(self.pooled_size * self.pooled_size *\n                          self.output_dim, self.deform_fc_dim),\n                nn.ReLU(inplace=True),\n                nn.Linear(self.deform_fc_dim, self.deform_fc_dim),\n                nn.ReLU(inplace=True),\n                nn.Linear(self.deform_fc_dim, self.pooled_size *\n                          self.pooled_size * 3)\n            )\n            self.offset_mask_fc[4].weight.data.zero_()\n            self.offset_mask_fc[4].bias.data.zero_()\n\n    def forward(self, input, rois):\n        offset = input.new()\n\n        if not self.no_trans:\n\n            # do roi_align first\n            n = rois.shape[0]\n            roi = dcn_v2_pooling(input, rois, offset,\n                                 self.spatial_scale,\n                                 self.pooled_size,\n                                 self.output_dim,\n                                 True,  # no trans\n                                 self.group_size,\n                                 self.part_size,\n                                 self.sample_per_part,\n                                 self.trans_std)\n\n            # build mask and offset\n            offset_mask = self.offset_mask_fc(roi.view(n, -1))\n            offset_mask = offset_mask.view(\n                n, 3, self.pooled_size, self.pooled_size)\n            o1, o2, mask = torch.chunk(offset_mask, 3, dim=1)\n            offset = torch.cat((o1, o2), dim=1)\n            mask = torch.sigmoid(mask)\n\n            # do pooling with offset and mask\n            return dcn_v2_pooling(input, rois, offset,\n                                  self.spatial_scale,\n                                  self.pooled_size,\n                                  self.output_dim,\n                                  self.no_trans,\n                                  self.group_size,\n                                  self.part_size,\n                                  self.sample_per_part,\n                                  self.trans_std) * mask\n        # only roi_align\n        return dcn_v2_pooling(input, rois, offset,\n                              self.spatial_scale,\n                              self.pooled_size,\n                              self.output_dim,\n                              self.no_trans,\n                              self.group_size,\n                              self.part_size,\n                              self.sample_per_part,\n                              self.trans_std)\n"
  },
  {
    "path": "Network/rigidmask/networks/DCNv2/DCN/src/cpu/dcn_v2_cpu.cpp",
    "content": "#include <vector>\n#include \"cpu/dcn_v2_im2col_cpu.h\"\n#include <iostream>\n\n#include <ATen/ATen.h>\n//#include <ATen/cuda/CUDAContext.h>\n\n#include <TH/TH.h>\n//#include <THC/THCAtomics.cuh>\n//#include <THC/THCDeviceUtils.cuh>\n\n//extern THCState *state;\n\n// author: Charles Shang\n// https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu\n\n// modified from the CUDA version for CPU use by Daniel K. Suhendro\n\n// edit by: James Bockman and Matthew Howe\n// modified for torch implementation to remove use of deprecated torch access to Blas\n\nat::Tensor\ndcn_v2_cpu_forward(const at::Tensor &input,\n                    const at::Tensor &weight,\n                    const at::Tensor &bias,\n                    const at::Tensor &offset,\n                    const at::Tensor &mask,\n                    const int kernel_h,\n                    const int kernel_w,\n                    const int stride_h,\n                    const int stride_w,\n                    const int pad_h,\n                    const int pad_w,\n                    const int dilation_h,\n                    const int dilation_w,\n                    const int deformable_group)\n{\n    // THCAssertSameGPU(THCudaTensor_checkGPU(state, 5, input, weight, bias, offset, mask));\n    /*AT_ASSERTM(input.type().is_cuda(), \"input must be a CUDA tensor\");\n    AT_ASSERTM(weight.type().is_cuda(), \"weight must be a CUDA tensor\");\n    AT_ASSERTM(bias.type().is_cuda(), \"bias must be a CUDA tensor\");\n    AT_ASSERTM(offset.type().is_cuda(), \"offset must be a CUDA tensor\");\n    AT_ASSERTM(mask.type().is_cuda(), \"mask must be a CUDA tensor\");*/\n\n    const int batch = input.size(0);\n    const int channels = input.size(1);\n    const int height = input.size(2);\n    const int width = input.size(3);\n\n    const int channels_out = weight.size(0);\n    const int channels_kernel = weight.size(1);\n    const int kernel_h_ = weight.size(2);\n    const int kernel_w_ = weight.size(3);\n\n    // printf(\"Kernels: %d %d %d %d\\n\", kernel_h_, kernel_w_, kernel_w, kernel_h);\n    // printf(\"Channels: %d %d\\n\", channels, channels_kernel);\n    // printf(\"Channels: %d %d\\n\", channels_out, channels_kernel);\n\n    AT_ASSERTM(kernel_h_ == kernel_h && kernel_w_ == kernel_w,\n               \"Input shape and kernel shape wont match: (%d x %d vs %d x %d).\", kernel_h_, kernel_w, kernel_h_, kernel_w_);\n\n    AT_ASSERTM(channels == channels_kernel,\n               \"Input shape and kernel channels wont match: (%d vs %d).\", channels, channels_kernel);\n\n    const int height_out = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;\n    const int width_out = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;\n\n    // auto ones = at::ones({height_out, width_out}, input.options());\n    auto ones = at::ones({bias.sizes()[0], height_out, width_out}, input.options());\n    auto columns = at::empty({channels * kernel_h * kernel_w, 1 * height_out * width_out}, input.options());\n    auto output = at::zeros({batch, channels_out, height_out, width_out}, input.options());\n\n    using scalar_t = float;\n    for (int b = 0; b < batch; b++)\n    {\n        auto input_n = input.select(0, b);\n        auto offset_n = offset.select(0, b);\n        auto mask_n = mask.select(0, b);\n        auto output_n = output.select(0, b);\n        // std::cout << \"output_n: \" << output_n << \"output.select(0,b): \" << output.select(0,b) << \"\\n\"; \n\n        // Do Bias first:\n        // M,N,K are dims of matrix A and B\n        // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)\n        // (N x 1) (1 x M)\n\n        // torch implementation\n        auto ones_T = at::transpose(ones.contiguous(), 2, 0);\n        ones_T = at::mul(ones_T, bias.contiguous());\n        ones_T = at::transpose(ones_T, 2, 0);\n        output_n = at::add(output_n, ones_T);\n\n        modulated_deformable_im2col_cpu(input_n.data_ptr<scalar_t>(),\n                                         offset_n.data_ptr<scalar_t>(),\n                                         mask_n.data_ptr<scalar_t>(),\n                                         1, channels, height, width,\n                                         height_out, width_out, kernel_h, kernel_w,\n                                         pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w,\n                                         deformable_group,\n                                         columns.data_ptr<scalar_t>());\n\n        //(k * m)  x  (m * n)\n        // Y = WC\n\n        // torch implementation\n        auto weight_flat = weight.view({channels_out, channels * kernel_h * kernel_w});\n        auto product = at::matmul(weight_flat, columns);\n        output.select(0, b) = at::add(output_n, product.view({channels_out, height_out, width_out}));\n    }\n    return output;\n}\n\nstd::vector<at::Tensor> dcn_v2_cpu_backward(const at::Tensor &input,\n                                             const at::Tensor &weight,\n                                             const at::Tensor &bias,\n                                             const at::Tensor &offset,\n                                             const at::Tensor &mask,\n                                             const at::Tensor &grad_output,\n                                             int kernel_h, int kernel_w,\n                                             int stride_h, int stride_w,\n                                             int pad_h, int pad_w,\n                                             int dilation_h, int dilation_w,\n                                             int deformable_group)\n{\n\n    THArgCheck(input.is_contiguous(), 1, \"input tensor has to be contiguous\");\n    THArgCheck(weight.is_contiguous(), 2, \"weight tensor has to be contiguous\");\n\n    /*AT_ASSERTM(input.type().is_cuda(), \"input must be a CUDA tensor\");\n    AT_ASSERTM(weight.type().is_cuda(), \"weight must be a CUDA tensor\");\n    AT_ASSERTM(bias.type().is_cuda(), \"bias must be a CUDA tensor\");\n    AT_ASSERTM(offset.type().is_cuda(), \"offset must be a CUDA tensor\");\n    AT_ASSERTM(mask.type().is_cuda(), \"mask must be a CUDA tensor\");*/\n\n    const int batch = input.size(0);\n    const int channels = input.size(1);\n    const int height = input.size(2);\n    const int width = input.size(3);\n\n    const int channels_out = weight.size(0);\n    const int channels_kernel = weight.size(1);\n    const int kernel_h_ = weight.size(2);\n    const int kernel_w_ = weight.size(3);\n\n    AT_ASSERTM(kernel_h_ == kernel_h && kernel_w_ == kernel_w,\n               \"Input shape and kernel shape wont match: (%d x %d vs %d x %d).\", kernel_h_, kernel_w, kernel_h_, kernel_w_);\n\n    AT_ASSERTM(channels == channels_kernel,\n               \"Input shape and kernel channels wont match: (%d vs %d).\", channels, channels_kernel);\n\n    const int height_out = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;\n    const int width_out = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;\n\n    auto ones = at::ones({height_out, width_out}, input.options());\n    auto columns = at::zeros({channels * kernel_h * kernel_w, 1 * height_out * width_out}, input.options());\n    auto output = at::empty({batch, channels_out, height_out, width_out}, input.options());\n\n    auto grad_input = at::zeros_like(input);\n    auto grad_weight = at::zeros_like(weight);\n    auto grad_bias = at::zeros_like(bias);\n    auto grad_offset = at::zeros_like(offset);\n    auto grad_mask = at::zeros_like(mask);\n\n    using scalar_t = float;\n\n    for (int b = 0; b < batch; b++)\n    {\n        auto input_n = input.select(0, b);\n        auto offset_n = offset.select(0, b);\n        auto mask_n = mask.select(0, b);\n        auto grad_output_n = grad_output.select(0, b);\n        auto grad_input_n = grad_input.select(0, b);\n        auto grad_offset_n = grad_offset.select(0, b);\n        auto grad_mask_n = grad_mask.select(0, b);\n\n\n\n        // Torch implementation\n        auto weight_flat = weight.view({channels_out, channels*kernel_h*kernel_w});\n        weight_flat = at::transpose(weight_flat, 1, 0);\n        auto grad_output_n_flat = grad_output_n.view({channels_out, height_out*width_out});\n        columns = at::matmul(weight_flat, grad_output_n_flat);\n\n        // gradient w.r.t. input coordinate data\n        modulated_deformable_col2im_coord_cpu(columns.data_ptr<scalar_t>(),\n                                               input_n.data_ptr<scalar_t>(),\n                                               offset_n.data_ptr<scalar_t>(),\n                                               mask_n.data_ptr<scalar_t>(),\n                                               1, channels, height, width,\n                                               height_out, width_out, kernel_h, kernel_w,\n                                               pad_h, pad_w, stride_h, stride_w,\n                                               dilation_h, dilation_w, deformable_group,\n                                               grad_offset_n.data_ptr<scalar_t>(),\n                                               grad_mask_n.data_ptr<scalar_t>());\n        // gradient w.r.t. input data\n        modulated_deformable_col2im_cpu(columns.data_ptr<scalar_t>(),\n                                         offset_n.data_ptr<scalar_t>(),\n                                         mask_n.data_ptr<scalar_t>(),\n                                         1, channels, height, width,\n                                         height_out, width_out, kernel_h, kernel_w,\n                                         pad_h, pad_w, stride_h, stride_w,\n                                         dilation_h, dilation_w, deformable_group,\n                                         grad_input_n.data_ptr<scalar_t>());\n\n        // gradient w.r.t. weight, dWeight should accumulate across the batch and group\n        modulated_deformable_im2col_cpu(input_n.data_ptr<scalar_t>(),\n                                         offset_n.data_ptr<scalar_t>(),\n                                         mask_n.data_ptr<scalar_t>(),\n                                         1, channels, height, width,\n                                         height_out, width_out, kernel_h, kernel_w,\n                                         pad_h, pad_w, stride_h, stride_w,\n                                         dilation_h, dilation_w, deformable_group,\n                                         columns.data_ptr<scalar_t>());\n\n        // Torch implementation\n        auto product = at::matmul(grad_output_n_flat, at::transpose(columns, 1, 0));\n        grad_weight = at::add(grad_weight, product.view({channels_out, channels, kernel_h, kernel_w}));\n\n\n        // Torch implementation\n        auto ones_flat = ones.view({height_out*width_out});\n        product = at::matmul(grad_output_n_flat, ones_flat);\n        grad_bias = at::add(grad_bias, product);\n    }\n\n    return {\n        grad_input, grad_offset, grad_mask, grad_weight, grad_bias\n    };\n}\n"
  },
  {
    "path": "Network/rigidmask/networks/DCNv2/DCN/src/cpu/dcn_v2_im2col_cpu.cpp",
    "content": "#include \"dcn_v2_im2col_cpu.h\"\n#include <cstdio>\n#include <algorithm>\n#include <cstring>\n\n#include <ATen/ATen.h>\n//#include <ATen/cuda/CUDAContext.h>\n\n#include <TH/TH.h>\n//#include <THC/THCAtomics.cuh>\n//#include <THC/THCDeviceUtils.cuh>\n\n// modified from the CUDA version for CPU use by Daniel K. Suhendro\n\n/*#define CUDA_KERNEL_LOOP(i, n)                          \\\n  for (int i = blockIdx.x * blockDim.x + threadIdx.x;   \\\n      i < (n);                                          \\\n      i += blockDim.x * gridDim.x)\n\nconst int CUDA_NUM_THREADS = 1024;\ninline int GET_BLOCKS(const int N)\n{\n  return (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS;\n}*/\n\n\nfloat dmcn_im2col_bilinear_cpu(const float *bottom_data, const int data_width,\n                           const int height, const int width, float h, float w)\n{\n  int h_low = floor(h);\n  int w_low = floor(w);\n  int h_high = h_low + 1;\n  int w_high = w_low + 1;\n\n  float lh = h - h_low;\n  float lw = w - w_low;\n  float hh = 1 - lh, hw = 1 - lw;\n\n  float v1 = 0;\n  if (h_low >= 0 && w_low >= 0)\n    v1 = bottom_data[h_low * data_width + w_low];\n  float v2 = 0;\n  if (h_low >= 0 && w_high <= width - 1)\n    v2 = bottom_data[h_low * data_width + w_high];\n  float v3 = 0;\n  if (h_high <= height - 1 && w_low >= 0)\n    v3 = bottom_data[h_high * data_width + w_low];\n  float v4 = 0;\n  if (h_high <= height - 1 && w_high <= width - 1)\n    v4 = bottom_data[h_high * data_width + w_high];\n\n  float w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;\n\n  float val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);\n  return val;\n}\n\nfloat dmcn_get_gradient_weight_cpu(float argmax_h, float argmax_w,\n                               const int h, const int w, const int height, const int width)\n{\n  if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width)\n  {\n    //empty\n    return 0;\n  }\n\n  int argmax_h_low = floor(argmax_h);\n  int argmax_w_low = floor(argmax_w);\n  int argmax_h_high = argmax_h_low + 1;\n  int argmax_w_high = argmax_w_low + 1;\n\n  float weight = 0;\n  if (h == argmax_h_low && w == argmax_w_low)\n    weight = (h + 1 - argmax_h) * (w + 1 - argmax_w);\n  if (h == argmax_h_low && w == argmax_w_high)\n    weight = (h + 1 - argmax_h) * (argmax_w + 1 - w);\n  if (h == argmax_h_high && w == argmax_w_low)\n    weight = (argmax_h + 1 - h) * (w + 1 - argmax_w);\n  if (h == argmax_h_high && w == argmax_w_high)\n    weight = (argmax_h + 1 - h) * (argmax_w + 1 - w);\n  return weight;\n}\n\nfloat dmcn_get_coordinate_weight_cpu(float argmax_h, float argmax_w,\n                                 const int height, const int width, const float *im_data,\n                                 const int data_width, const int bp_dir)\n{\n  if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width)\n  {\n    //empty\n    return 0;\n  }\n\n  int argmax_h_low = floor(argmax_h);\n  int argmax_w_low = floor(argmax_w);\n  int argmax_h_high = argmax_h_low + 1;\n  int argmax_w_high = argmax_w_low + 1;\n\n  float weight = 0;\n\n  if (bp_dir == 0)\n  {\n    if (argmax_h_low >= 0 && argmax_w_low >= 0)\n      weight += -1 * (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_low * data_width + argmax_w_low];\n    if (argmax_h_low >= 0 && argmax_w_high <= width - 1)\n      weight += -1 * (argmax_w - argmax_w_low) * im_data[argmax_h_low * data_width + argmax_w_high];\n    if (argmax_h_high <= height - 1 && argmax_w_low >= 0)\n      weight += (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_high * data_width + argmax_w_low];\n    if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)\n      weight += (argmax_w - argmax_w_low) * im_data[argmax_h_high * data_width + argmax_w_high];\n  }\n  else if (bp_dir == 1)\n  {\n    if (argmax_h_low >= 0 && argmax_w_low >= 0)\n      weight += -1 * (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_low];\n    if (argmax_h_low >= 0 && argmax_w_high <= width - 1)\n      weight += (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_high];\n    if (argmax_h_high <= height - 1 && argmax_w_low >= 0)\n      weight += -1 * (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_low];\n    if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)\n      weight += (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_high];\n  }\n\n  return weight;\n}\n\nvoid modulated_deformable_im2col_cpu_kernel(const int n, const float *data_im, const float *data_offset, const float *data_mask,\n                                                       const int height, const int width, const int kernel_h, const int kernel_w,\n                                                       const int pad_h, const int pad_w,\n                                                       const int stride_h, const int stride_w,\n                                                       const int dilation_h, const int dilation_w,\n                                                       const int channel_per_deformable_group,\n                                                       const int batch_size, const int num_channels, const int deformable_group,\n                                                       const int height_col, const int width_col,\n                                                       float *data_col)\n{\n  // launch channels * batch_size * height_col * width_col cores\n  for(int index=0; index<n; index++)\n  {\n    // NOTE(CharlesShang): different from Dai Jifeng's MXNet implementation, col_buffer is of shape (c*kw*kh, N, oh, ow)\n    // here columns is of shape (N, c*kw*kh, oh * ow), need to adapt axis\n\n    // index index of output matrix\n    const int w_col = index % width_col;\n    const int h_col = (index / width_col) % height_col;\n    // const int b_col = (index / width_col / height_col) % batch_size;\n    const int b_col = (index / width_col / height_col / num_channels) % batch_size;\n    // const int c_im = (index / width_col / height_col) / batch_size;\n    const int c_im = (index / width_col / height_col) % num_channels;\n    // const int c_col = c_im * kernel_h * kernel_w;\n    const int c_col = c_im * kernel_h * kernel_w;\n\n    // compute deformable group index\n    const int deformable_group_index = c_im / channel_per_deformable_group;\n\n    const int h_in = h_col * stride_h - pad_h;\n    const int w_in = w_col * stride_w - pad_w;\n\n    //  float *data_col_ptr = data_col + ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col;\n    float *data_col_ptr = data_col + ((b_col * num_channels * kernel_w * kernel_h + c_col) * height_col + h_col) * width_col + w_col;\n    //const float* data_im_ptr = data_im + ((b_col * num_channels + c_im) * height + h_in) * width + w_in;\n    const float *data_im_ptr = data_im + (b_col * num_channels + c_im) * height * width;\n    const float *data_offset_ptr = data_offset + (b_col * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col;\n\n    const float *data_mask_ptr = data_mask + (b_col * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col;\n\n    for (int i = 0; i < kernel_h; ++i)\n    {\n      for (int j = 0; j < kernel_w; ++j)\n      {\n        const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col;\n        const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col + w_col;\n        const int data_mask_hw_ptr = ((i * kernel_w + j) * height_col + h_col) * width_col + w_col;\n        const float offset_h = data_offset_ptr[data_offset_h_ptr];\n        const float offset_w = data_offset_ptr[data_offset_w_ptr];\n        const float mask = data_mask_ptr[data_mask_hw_ptr];\n        float val = static_cast<float>(0);\n        const float h_im = h_in + i * dilation_h + offset_h;\n        const float w_im = w_in + j * dilation_w + offset_w;\n        //if (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) {\n        if (h_im > -1 && w_im > -1 && h_im < height && w_im < width)\n        {\n          //const float map_h = i * dilation_h + offset_h;\n          //const float map_w = j * dilation_w + offset_w;\n          //const int cur_height = height - h_in;\n          //const int cur_width = width - w_in;\n          //val = dmcn_im2col_bilinear_cpu(data_im_ptr, width, cur_height, cur_width, map_h, map_w);\n          val = dmcn_im2col_bilinear_cpu(data_im_ptr, width, height, width, h_im, w_im);\n        }\n        *data_col_ptr = val * mask;\n        // data_col_ptr += batch_size * height_col * width_col;\n        data_col_ptr += height_col * width_col;\n      }\n    }\n  }\n}\n\nvoid modulated_deformable_col2im_cpu_kernel(const int n, const float *data_col, const float *data_offset, const float *data_mask,\n                                                       const int channels, const int height, const int width,\n                                                       const int kernel_h, const int kernel_w,\n                                                       const int pad_h, const int pad_w,\n                                                       const int stride_h, const int stride_w,\n                                                       const int dilation_h, const int dilation_w,\n                                                       const int channel_per_deformable_group,\n                                                       const int batch_size, const int deformable_group,\n                                                       const int height_col, const int width_col,\n                                                       float *grad_im)\n{\n  for(int index = 0; index < n; index++)\n  {\n    const int j = (index / width_col / height_col / batch_size) % kernel_w;\n    const int i = (index / width_col / height_col / batch_size / kernel_w) % kernel_h;\n    const int c = index / width_col / height_col / batch_size / kernel_w / kernel_h;\n    // compute the start and end of the output\n\n    const int deformable_group_index = c / channel_per_deformable_group;\n\n    int w_out = index % width_col;\n    int h_out = (index / width_col) % height_col;\n    int b = (index / width_col / height_col) % batch_size;\n    int w_in = w_out * stride_w - pad_w;\n    int h_in = h_out * stride_h - pad_h;\n\n    const float *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col;\n    const float *data_mask_ptr = data_mask + (b * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col;\n    const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out;\n    const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out;\n    const int data_mask_hw_ptr = ((i * kernel_w + j) * height_col + h_out) * width_col + w_out;\n    const float offset_h = data_offset_ptr[data_offset_h_ptr];\n    const float offset_w = data_offset_ptr[data_offset_w_ptr];\n    const float mask = data_mask_ptr[data_mask_hw_ptr];\n    const float cur_inv_h_data = h_in + i * dilation_h + offset_h;\n    const float cur_inv_w_data = w_in + j * dilation_w + offset_w;\n\n    const float cur_top_grad = data_col[index] * mask;\n    const int cur_h = (int)cur_inv_h_data;\n    const int cur_w = (int)cur_inv_w_data;\n    \n    for (int dy = -2; dy <= 2; dy++)\n    {\n      for (int dx = -2; dx <= 2; dx++)\n      {\n        if (cur_h + dy >= 0 && cur_h + dy < height &&\n            cur_w + dx >= 0 && cur_w + dx < width &&\n            abs(cur_inv_h_data - (cur_h + dy)) < 1 &&\n            abs(cur_inv_w_data - (cur_w + dx)) < 1)\n        {\n          int cur_bottom_grad_pos = ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx;\n          float weight = dmcn_get_gradient_weight_cpu(cur_inv_h_data, cur_inv_w_data, cur_h + dy, cur_w + dx, height, width);\n          //atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad);\n          *(grad_im + cur_bottom_grad_pos) += weight * cur_top_grad;\n\n        }\n      }\n    }\n  }\n}\n\nvoid modulated_deformable_col2im_coord_cpu_kernel(const int n, const float *data_col, const float *data_im,\n                                                             const float *data_offset, const float *data_mask,\n                                                             const int channels, const int height, const int width,\n                                                             const int kernel_h, const int kernel_w,\n                                                             const int pad_h, const int pad_w,\n                                                             const int stride_h, const int stride_w,\n                                                             const int dilation_h, const int dilation_w,\n                                                             const int channel_per_deformable_group,\n                                                             const int batch_size, const int offset_channels, const int deformable_group,\n                                                             const int height_col, const int width_col,\n                                                             float *grad_offset, float *grad_mask)\n{\n  for(int index = 0; index < n; index++)\n  {\n    float val = 0, mval = 0;\n    int w = index % width_col;\n    int h = (index / width_col) % height_col;\n    int c = (index / width_col / height_col) % offset_channels;\n    int b = (index / width_col / height_col) / offset_channels;\n    // compute the start and end of the output\n\n    const int deformable_group_index = c / (2 * kernel_h * kernel_w);\n    const int col_step = kernel_h * kernel_w;\n    int cnt = 0;\n    const float *data_col_ptr = data_col + deformable_group_index * channel_per_deformable_group * batch_size * width_col * height_col;\n    const float *data_im_ptr = data_im + (b * deformable_group + deformable_group_index) * channel_per_deformable_group / kernel_h / kernel_w * height * width;\n    const float *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col;\n    const float *data_mask_ptr = data_mask + (b * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col;\n\n    const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w;\n\n    for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group; col_c += col_step)\n    {\n      const int col_pos = (((col_c * batch_size + b) * height_col) + h) * width_col + w;\n      const int bp_dir = offset_c % 2;\n\n      int j = (col_pos / width_col / height_col / batch_size) % kernel_w;\n      int i = (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h;\n      int w_out = col_pos % width_col;\n      int h_out = (col_pos / width_col) % height_col;\n      int w_in = w_out * stride_w - pad_w;\n      int h_in = h_out * stride_h - pad_h;\n      const int data_offset_h_ptr = (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out);\n      const int data_offset_w_ptr = (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out);\n      const int data_mask_hw_ptr = (((i * kernel_w + j) * height_col + h_out) * width_col + w_out);\n      const float offset_h = data_offset_ptr[data_offset_h_ptr];\n      const float offset_w = data_offset_ptr[data_offset_w_ptr];\n      const float mask = data_mask_ptr[data_mask_hw_ptr];\n      float inv_h = h_in + i * dilation_h + offset_h;\n      float inv_w = w_in + j * dilation_w + offset_w;\n      if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width)\n      {\n        inv_h = inv_w = -2;\n      }\n      else\n      {\n        mval += data_col_ptr[col_pos] * dmcn_im2col_bilinear_cpu(data_im_ptr + cnt * height * width, width, height, width, inv_h, inv_w);\n      }\n      const float weight = dmcn_get_coordinate_weight_cpu(\n          inv_h, inv_w,\n          height, width, data_im_ptr + cnt * height * width, width, bp_dir);\n      val += weight * data_col_ptr[col_pos] * mask;\n      cnt += 1;\n    }\n    // KERNEL_ASSIGN(grad_offset[index], offset_req, val);\n    grad_offset[index] = val;\n    if (offset_c % 2 == 0)\n      // KERNEL_ASSIGN(grad_mask[(((b * deformable_group + deformable_group_index) * kernel_h * kernel_w + offset_c / 2) * height_col + h) * width_col + w], mask_req, mval);\n      grad_mask[(((b * deformable_group + deformable_group_index) * kernel_h * kernel_w + offset_c / 2) * height_col + h) * width_col + w] = mval;\n  }\n}\n\nvoid modulated_deformable_im2col_cpu(const float* data_im, const float* data_offset, const float* data_mask,\n  const int batch_size, const int channels, const int height_im, const int width_im, \n  const int height_col, const int width_col, const int kernel_h, const int kernel_w,\n  const int pad_h, const int pad_w, const int stride_h, const int stride_w, \n  const int dilation_h, const int dilation_w,\n  const int deformable_group, float* data_col) {\n  // num_axes should be smaller than block size\n  const int channel_per_deformable_group = channels / deformable_group;\n  const int num_kernels = channels * batch_size * height_col * width_col;\n  modulated_deformable_im2col_cpu_kernel(\n      num_kernels, data_im, data_offset, data_mask, height_im, width_im, kernel_h, kernel_w,\n      pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, channel_per_deformable_group,\n      batch_size, channels, deformable_group, height_col, width_col, data_col);\n  \n  /*cudaError_t err = cudaGetLastError();\n  if (err != cudaSuccess)\n  {\n    printf(\"error in modulated_deformable_im2col_cuda: %s\\n\", cudaGetErrorString(err));\n  }*/\n\n}\n\nvoid modulated_deformable_col2im_cpu(const float* data_col, const float* data_offset, const float* data_mask,\n  const int batch_size, const int channels, const int height_im, const int width_im, \n  const int height_col, const int width_col, const int kernel_h, const int kernel_w,\n  const int pad_h, const int pad_w, const int stride_h, const int stride_w, \n  const int dilation_h, const int dilation_w, \n  const int deformable_group, float* grad_im){\n\n  const int channel_per_deformable_group = channels / deformable_group;\n  const int num_kernels = channels * kernel_h * kernel_w * batch_size * height_col * width_col;\n  modulated_deformable_col2im_cpu_kernel(\n        num_kernels, data_col, data_offset, data_mask, channels, height_im, width_im,\n        kernel_h, kernel_w, pad_h, pad_h, stride_h, stride_w,\n        dilation_h, dilation_w, channel_per_deformable_group,\n        batch_size, deformable_group, height_col, width_col, grad_im);\n  /*cudaError_t err = cudaGetLastError();\n  if (err != cudaSuccess)\n  {\n    printf(\"error in modulated_deformable_col2im_cuda: %s\\n\", cudaGetErrorString(err));\n  }*/\n\n}\n\nvoid modulated_deformable_col2im_coord_cpu(const float* data_col, const float* data_im, const float* data_offset, const float* data_mask,\n  const int batch_size, const int channels, const int height_im, const int width_im, \n  const int height_col, const int width_col, const int kernel_h, const int kernel_w,\n  const int pad_h, const int pad_w, const int stride_h, const int stride_w, \n  const int dilation_h, const int dilation_w, \n  const int deformable_group,\n  float* grad_offset, float* grad_mask) {\n  const int num_kernels = batch_size * height_col * width_col * 2 * kernel_h * kernel_w * deformable_group;\n  const int channel_per_deformable_group = channels * kernel_h * kernel_w / deformable_group;\n  modulated_deformable_col2im_coord_cpu_kernel(\n        num_kernels, data_col, data_im, data_offset, data_mask, channels, height_im, width_im,\n        kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,\n        dilation_h, dilation_w, channel_per_deformable_group,\n        batch_size, 2 * kernel_h * kernel_w * deformable_group, deformable_group, height_col, width_col, \n        grad_offset, grad_mask);\n  /*cudaError_t err = cudaGetLastError();\n  if (err != cudaSuccess)\n  {\n    printf(\"error in modulated_deformable_col2im_coord_cuda: %s\\n\", cudaGetErrorString(err));\n  }*/\n}"
  },
  {
    "path": "Network/rigidmask/networks/DCNv2/DCN/src/cpu/dcn_v2_im2col_cpu.h",
    "content": "\n/*!\n ******************* BEGIN Caffe Copyright Notice and Disclaimer ****************\n *\n * COPYRIGHT\n *\n * All contributions by the University of California:\n * Copyright (c) 2014-2017 The Regents of the University of California (Regents)\n * All rights reserved.\n *\n * All other contributions:\n * Copyright (c) 2014-2017, the respective contributors\n * All rights reserved.\n *\n * Caffe uses a shared copyright model: each contributor holds copyright over\n * their contributions to Caffe. The project versioning records all such\n * contribution and copyright details. If a contributor wants to further mark\n * their specific copyright on a particular contribution, they should indicate\n * their copyright solely in the commit message of the change when it is\n * committed.\n *\n * LICENSE\n *\n * Redistribution and use in source and binary forms, with or without\n * modification, are permitted provided that the following conditions are met:\n *\n * 1. Redistributions of source code must retain the above copyright notice, this\n * list of conditions and the following disclaimer.\n * 2. Redistributions in binary form must reproduce the above copyright notice,\n * this list of conditions and the following disclaimer in the documentation\n * and/or other materials provided with the distribution.\n *\n * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND\n * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\n * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\n * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR\n * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES\n * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\n * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND\n * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\n * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n *\n * CONTRIBUTION AGREEMENT\n *\n * By contributing to the BVLC/caffe repository through pull-request, comment,\n * or otherwise, the contributor releases their content to the\n * license and copyright terms herein.\n *\n ***************** END Caffe Copyright Notice and Disclaimer ********************\n *\n * Copyright (c) 2018 Microsoft\n * Licensed under The MIT License [see LICENSE for details]\n * \\file modulated_deformable_im2col.h\n * \\brief Function definitions of converting an image to\n * column matrix based on kernel, padding, dilation, and offset.\n * These functions are mainly used in deformable convolution operators.\n * \\ref: https://arxiv.org/abs/1811.11168\n * \\author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu\n */\n\n/***************** Adapted by Charles Shang *********************/\n// modified from the CUDA version for CPU use by Daniel K. Suhendro\n\n#ifndef DCN_V2_IM2COL_CPU\n#define DCN_V2_IM2COL_CPU\n\n#ifdef __cplusplus\nextern \"C\"\n{\n#endif\n\n  void modulated_deformable_im2col_cpu(const float *data_im, const float *data_offset, const float *data_mask,\n                                        const int batch_size, const int channels, const int height_im, const int width_im,\n                                        const int height_col, const int width_col, const int kernel_h, const int kenerl_w,\n                                        const int pad_h, const int pad_w, const int stride_h, const int stride_w,\n                                        const int dilation_h, const int dilation_w,\n                                        const int deformable_group, float *data_col);\n\n  void modulated_deformable_col2im_cpu(const float *data_col, const float *data_offset, const float *data_mask,\n                                        const int batch_size, const int channels, const int height_im, const int width_im,\n                                        const int height_col, const int width_col, const int kernel_h, const int kenerl_w,\n                                        const int pad_h, const int pad_w, const int stride_h, const int stride_w,\n                                        const int dilation_h, const int dilation_w,\n                                        const int deformable_group, float *grad_im);\n\n  void modulated_deformable_col2im_coord_cpu(const float *data_col, const float *data_im, const float *data_offset, const float *data_mask,\n                                         const int batch_size, const int channels, const int height_im, const int width_im,\n                                         const int height_col, const int width_col, const int kernel_h, const int kenerl_w,\n                                         const int pad_h, const int pad_w, const int stride_h, const int stride_w,\n                                         const int dilation_h, const int dilation_w,\n                                         const int deformable_group,\n                                         float *grad_offset, float *grad_mask);\n\n#ifdef __cplusplus\n}\n#endif\n\n#endif"
  },
  {
    "path": "Network/rigidmask/networks/DCNv2/DCN/src/cpu/dcn_v2_psroi_pooling_cpu.cpp",
    "content": "/*!\n * Copyright (c) 2017 Microsoft\n * Licensed under The MIT License [see LICENSE for details]\n * \\file deformable_psroi_pooling.cu\n * \\brief\n * \\author Yi Li, Guodong Zhang, Jifeng Dai\n*/\n/***************** Adapted by Charles Shang *********************/\n// modified from the CUDA version for CPU use by Daniel K. Suhendro\n\n#include <cstdio>\n#include <algorithm>\n#include <cstring>\n\n#include <ATen/ATen.h>\n//#include <ATen/cuda/CUDAContext.h>\n\n#include <TH/TH.h>\n//#include <THC/THCAtomics.cuh>\n//#include <THC/THCDeviceUtils.cuh>\n\n/*#define CUDA_KERNEL_LOOP(i, n)                        \\\n  for (int i = blockIdx.x * blockDim.x + threadIdx.x; \\\n       i < (n);                                       \\\n       i += blockDim.x * gridDim.x)\n\nconst int CUDA_NUM_THREADS = 1024;\ninline int GET_BLOCKS(const int N)\n{\n  return (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS;\n}*/\n\ntemplate <typename T>\nT bilinear_interp_cpu(\n    const T *data,\n    const T x,\n    const T y,\n    const int width,\n    const int height)\n{\n  int x1 = floor(x);\n  int x2 = ceil(x);\n  int y1 = floor(y);\n  int y2 = ceil(y);\n  T dist_x = static_cast<T>(x - x1);\n  T dist_y = static_cast<T>(y - y1);\n  T value11 = data[y1 * width + x1];\n  T value12 = data[y2 * width + x1];\n  T value21 = data[y1 * width + x2];\n  T value22 = data[y2 * width + x2];\n  T value = (1 - dist_x) * (1 - dist_y) * value11 +\n            (1 - dist_x) * dist_y * value12 +\n            dist_x * (1 - dist_y) * value21 +\n            dist_x * dist_y * value22;\n  return value;\n}\n\ntemplate <typename T>\n void DeformablePSROIPoolForwardKernelCpu(\n    const int count,\n    const T *bottom_data,\n    const T spatial_scale,\n    const int channels,\n    const int height, const int width,\n    const int pooled_height, const int pooled_width,\n    const T *bottom_rois, const T *bottom_trans,\n    const int no_trans,\n    const T trans_std,\n    const int sample_per_part,\n    const int output_dim,\n    const int group_size,\n    const int part_size,\n    const int num_classes,\n    const int channels_each_class,\n    T *top_data,\n    T *top_count)\n{\n  for(int index = 0; index < count; index++)\n  {\n    // The output is in order (n, ctop, ph, pw)\n    int pw = index % pooled_width;\n    int ph = (index / pooled_width) % pooled_height;\n    int ctop = (index / pooled_width / pooled_height) % output_dim;\n    int n = index / pooled_width / pooled_height / output_dim;\n\n    // [start, end) interval for spatial sampling\n    const T *offset_bottom_rois = bottom_rois + n * 5;\n    int roi_batch_ind = offset_bottom_rois[0];\n    T roi_start_w = static_cast<T>(round(offset_bottom_rois[1])) * spatial_scale - 0.5;\n    T roi_start_h = static_cast<T>(round(offset_bottom_rois[2])) * spatial_scale - 0.5;\n    T roi_end_w = static_cast<T>(round(offset_bottom_rois[3]) + 1.) * spatial_scale - 0.5;\n    T roi_end_h = static_cast<T>(round(offset_bottom_rois[4]) + 1.) * spatial_scale - 0.5;\n\n    // Force too small ROIs to be 1x1\n    T roi_width = std::max(roi_end_w - roi_start_w, T(0.1)); //avoid 0\n    T roi_height = std::max(roi_end_h - roi_start_h, T(0.1));\n\n    // Compute w and h at bottom\n    T bin_size_h = roi_height / static_cast<T>(pooled_height);\n    T bin_size_w = roi_width / static_cast<T>(pooled_width);\n\n    T sub_bin_size_h = bin_size_h / static_cast<T>(sample_per_part);\n    T sub_bin_size_w = bin_size_w / static_cast<T>(sample_per_part);\n\n    int part_h = floor(static_cast<T>(ph) / pooled_height * part_size);\n    int part_w = floor(static_cast<T>(pw) / pooled_width * part_size);\n    int class_id = ctop / channels_each_class;\n    T trans_x = no_trans ? static_cast<T>(0) : bottom_trans[(((n * num_classes + class_id) * 2) * part_size + part_h) * part_size + part_w] * trans_std;\n    T trans_y = no_trans ? static_cast<T>(0) : bottom_trans[(((n * num_classes + class_id) * 2 + 1) * part_size + part_h) * part_size + part_w] * trans_std;\n\n    T wstart = static_cast<T>(pw) * bin_size_w + roi_start_w;\n    wstart += trans_x * roi_width;\n    T hstart = static_cast<T>(ph) * bin_size_h + roi_start_h;\n    hstart += trans_y * roi_height;\n\n    T sum = 0;\n    int count = 0;\n    int gw = floor(static_cast<T>(pw) * group_size / pooled_width);\n    int gh = floor(static_cast<T>(ph) * group_size / pooled_height);\n    gw = std::min(std::max(gw, 0), group_size - 1);\n    gh = std::min(std::max(gh, 0), group_size - 1);\n\n    const T *offset_bottom_data = bottom_data + (roi_batch_ind * channels) * height * width;\n    for (int ih = 0; ih < sample_per_part; ih++)\n    {\n      for (int iw = 0; iw < sample_per_part; iw++)\n      {\n        T w = wstart + iw * sub_bin_size_w;\n        T h = hstart + ih * sub_bin_size_h;\n        // bilinear interpolation\n        if (w < -0.5 || w > width - 0.5 || h < -0.5 || h > height - 0.5)\n        {\n          continue;\n        }\n        w = std::min(std::max(w, T(0.)), width - T(1.));\n        h = std::min(std::max(h, T(0.)), height - T(1.));\n        int c = (ctop * group_size + gh) * group_size + gw;\n        T val = bilinear_interp_cpu(offset_bottom_data + c * height * width, w, h, width, height);\n        sum += val;\n        count++;\n      }\n    }\n    top_data[index] = count == 0 ? static_cast<T>(0) : sum / count;\n    top_count[index] = count;\n  }\n}\n\ntemplate <typename T>\nvoid DeformablePSROIPoolBackwardAccKernelCpu(\n    const int count,\n    const T *top_diff,\n    const T *top_count,\n    const int num_rois,\n    const T spatial_scale,\n    const int channels,\n    const int height, const int width,\n    const int pooled_height, const int pooled_width,\n    const int output_dim,\n    T *bottom_data_diff, T *bottom_trans_diff,\n    const T *bottom_data,\n    const T *bottom_rois,\n    const T *bottom_trans,\n    const int no_trans,\n    const T trans_std,\n    const int sample_per_part,\n    const int group_size,\n    const int part_size,\n    const int num_classes,\n    const int channels_each_class)\n{\n  for(int index = 0; index < count; index++)\n  {\n    // The output is in order (n, ctop, ph, pw)\n    int pw = index % pooled_width;\n    int ph = (index / pooled_width) % pooled_height;\n    int ctop = (index / pooled_width / pooled_height) % output_dim;\n    int n = index / pooled_width / pooled_height / output_dim;\n\n    // [start, end) interval for spatial sampling\n    const T *offset_bottom_rois = bottom_rois + n * 5;\n    int roi_batch_ind = offset_bottom_rois[0];\n    T roi_start_w = static_cast<T>(round(offset_bottom_rois[1])) * spatial_scale - 0.5;\n    T roi_start_h = static_cast<T>(round(offset_bottom_rois[2])) * spatial_scale - 0.5;\n    T roi_end_w = static_cast<T>(round(offset_bottom_rois[3]) + 1.) * spatial_scale - 0.5;\n    T roi_end_h = static_cast<T>(round(offset_bottom_rois[4]) + 1.) * spatial_scale - 0.5;\n    \n    // Force too small ROIs to be 1x1\n    T roi_width = std::max(roi_end_w - roi_start_w, T(0.1)); //avoid 0\n    T roi_height = std::max(roi_end_h - roi_start_h, T(0.1));\n\n    // Compute w and h at bottom\n    T bin_size_h = roi_height / static_cast<T>(pooled_height);\n    T bin_size_w = roi_width / static_cast<T>(pooled_width);\n\n    T sub_bin_size_h = bin_size_h / static_cast<T>(sample_per_part);\n    T sub_bin_size_w = bin_size_w / static_cast<T>(sample_per_part);\n\n    int part_h = floor(static_cast<T>(ph) / pooled_height * part_size);\n    int part_w = floor(static_cast<T>(pw) / pooled_width * part_size);\n    int class_id = ctop / channels_each_class;\n    T trans_x = no_trans ? static_cast<T>(0) : bottom_trans[(((n * num_classes + class_id) * 2) * part_size + part_h) * part_size + part_w] * trans_std;\n    T trans_y = no_trans ? static_cast<T>(0) : bottom_trans[(((n * num_classes + class_id) * 2 + 1) * part_size + part_h) * part_size + part_w] * trans_std;\n\n    T wstart = static_cast<T>(pw) * bin_size_w + roi_start_w;\n    wstart += trans_x * roi_width;\n    T hstart = static_cast<T>(ph) * bin_size_h + roi_start_h;\n    hstart += trans_y * roi_height;\n\n    if (top_count[index] <= 0)\n    {\n      continue;\n    }\n    T diff_val = top_diff[index] / top_count[index];\n    const T *offset_bottom_data = bottom_data + roi_batch_ind * channels * height * width;\n    T *offset_bottom_data_diff = bottom_data_diff + roi_batch_ind * channels * height * width;\n    int gw = floor(static_cast<T>(pw) * group_size / pooled_width);\n    int gh = floor(static_cast<T>(ph) * group_size / pooled_height);\n    gw = std::min(std::max(gw, 0), group_size - 1);\n    gh = std::min(std::max(gh, 0), group_size - 1);\n\n    for (int ih = 0; ih < sample_per_part; ih++)\n    {\n      for (int iw = 0; iw < sample_per_part; iw++)\n      {\n        T w = wstart + iw * sub_bin_size_w;\n        T h = hstart + ih * sub_bin_size_h;\n        // bilinear interpolation\n        if (w < -0.5 || w > width - 0.5 || h < -0.5 || h > height - 0.5)\n        {\n          continue;\n        }\n        w = std::min(std::max(w, T(0.)), width - T(1.));\n        h = std::min(std::max(h, T(0.)), height - T(1.));\n        int c = (ctop * group_size + gh) * group_size + gw;\n        // backward on feature\n        int x0 = floor(w);\n        int x1 = ceil(w);\n        int y0 = floor(h);\n        int y1 = ceil(h);\n        T dist_x = w - x0, dist_y = h - y0;\n        T q00 = (1 - dist_x) * (1 - dist_y);\n        T q01 = (1 - dist_x) * dist_y;\n        T q10 = dist_x * (1 - dist_y);\n        T q11 = dist_x * dist_y;\n        int bottom_index_base = c * height * width;\n        /*atomicAdd(offset_bottom_data_diff + bottom_index_base + y0 * width + x0, q00 * diff_val);\n        atomicAdd(offset_bottom_data_diff + bottom_index_base + y1 * width + x0, q01 * diff_val);\n        atomicAdd(offset_bottom_data_diff + bottom_index_base + y0 * width + x1, q10 * diff_val);\n        atomicAdd(offset_bottom_data_diff + bottom_index_base + y1 * width + x1, q11 * diff_val);*/\n       *(offset_bottom_data_diff + bottom_index_base + y0 * width + x0) += q00 * diff_val;\n       *(offset_bottom_data_diff + bottom_index_base + y1 * width + x0) += q01 * diff_val;\n       *(offset_bottom_data_diff + bottom_index_base + y0 * width + x1) += q10 * diff_val;\n       *(offset_bottom_data_diff + bottom_index_base + y1 * width + x1) += q11 * diff_val;\n\n\n        if (no_trans)\n        {\n          continue;\n        }\n        T U00 = offset_bottom_data[bottom_index_base + y0 * width + x0];\n        T U01 = offset_bottom_data[bottom_index_base + y1 * width + x0];\n        T U10 = offset_bottom_data[bottom_index_base + y0 * width + x1];\n        T U11 = offset_bottom_data[bottom_index_base + y1 * width + x1];\n        T diff_x = (U11 * dist_y + U10 * (1 - dist_y) - U01 * dist_y - U00 * (1 - dist_y)) * trans_std * diff_val;\n        diff_x *= roi_width;\n        T diff_y = (U11 * dist_x + U01 * (1 - dist_x) - U10 * dist_x - U00 * (1 - dist_x)) * trans_std * diff_val;\n        diff_y *= roi_height;\n\n        /*atomicAdd(bottom_trans_diff + (((n * num_classes + class_id) * 2) * part_size + part_h) * part_size + part_w, diff_x);\n        atomicAdd(bottom_trans_diff + (((n * num_classes + class_id) * 2 + 1) * part_size + part_h) * part_size + part_w, diff_y);*/\n        *(bottom_trans_diff + (((n * num_classes + class_id) * 2) * part_size + part_h) * part_size + part_w) += diff_x;\n        *(bottom_trans_diff + (((n * num_classes + class_id) * 2 + 1) * part_size + part_h) * part_size + part_w) += diff_y;\n      }\n    }\n  }\n}\n\nstd::tuple<at::Tensor, at::Tensor>\ndcn_v2_psroi_pooling_cpu_forward(const at::Tensor &input,\n                                  const at::Tensor &bbox,\n                                  const at::Tensor &trans,\n                                  const int no_trans,\n                                  const float spatial_scale,\n                                  const int output_dim,\n                                  const int group_size,\n                                  const int pooled_size,\n                                  const int part_size,\n                                  const int sample_per_part,\n                                  const float trans_std)\n{\n  /*AT_ASSERTM(input.type().is_cuda(), \"input must be a CUDA tensor\");\n  AT_ASSERTM(bbox.type().is_cuda(), \"rois must be a CUDA tensor\");\n  AT_ASSERTM(trans.type().is_cuda(), \"trans must be a CUDA tensor\");*/\n\n  const int batch = input.size(0);\n  const int channels = input.size(1);\n  const int height = input.size(2);\n  const int width = input.size(3);\n  const int channels_trans = no_trans ? 2 : trans.size(1);\n  const int num_bbox = bbox.size(0);\n\n  AT_ASSERTM(channels == output_dim, \"input channels and output channels must equal\");\n  auto pooled_height = pooled_size;\n  auto pooled_width = pooled_size;\n\n  auto out = at::empty({num_bbox, output_dim, pooled_height, pooled_width}, input.options());\n  long out_size = num_bbox * output_dim * pooled_height * pooled_width;\n  auto top_count = at::zeros({num_bbox, output_dim, pooled_height, pooled_width}, input.options());\n\n  const int num_classes = no_trans ? 1 : channels_trans / 2;\n  const int channels_each_class = no_trans ? output_dim : output_dim / num_classes;\n\n  //cudaStream_t stream = at::cuda::getCurrentCUDAStream();\n\n  if (out.numel() == 0)\n  {\n    //THCudaCheck(cudaGetLastError());\n    return std::make_tuple(out, top_count);\n  }\n\n  /*dim3 grid(std::min(THCCeilDiv(out_size, 512L), 4096L));\n  dim3 block(512);*/\n\n  AT_DISPATCH_FLOATING_TYPES(input.type(), \"dcn_v2_psroi_pooling_cpu_forward\", [&] {\n    DeformablePSROIPoolForwardKernelCpu<scalar_t>(\n        out_size,\n        input.contiguous().data<scalar_t>(),\n        spatial_scale,\n        channels,\n        height, width,\n        pooled_height,\n        pooled_width,\n        bbox.contiguous().data<scalar_t>(),\n        trans.contiguous().data<scalar_t>(),\n        no_trans,\n        trans_std,\n        sample_per_part,\n        output_dim,\n        group_size,\n        part_size,\n        num_classes,\n        channels_each_class,\n        out.data<scalar_t>(),\n        top_count.data<scalar_t>());\n  });\n  //THCudaCheck(cudaGetLastError());\n  return std::make_tuple(out, top_count);\n}\n\nstd::tuple<at::Tensor, at::Tensor>\ndcn_v2_psroi_pooling_cpu_backward(const at::Tensor &out_grad,\n                                   const at::Tensor &input,\n                                   const at::Tensor &bbox,\n                                   const at::Tensor &trans,\n                                   const at::Tensor &top_count,\n                                   const int no_trans,\n                                   const float spatial_scale,\n                                   const int output_dim,\n                                   const int group_size,\n                                   const int pooled_size,\n                                   const int part_size,\n                                   const int sample_per_part,\n                                   const float trans_std)\n{\n  /*AT_ASSERTM(out_grad.type().is_cuda(), \"out_grad must be a CUDA tensor\");\n  AT_ASSERTM(input.type().is_cuda(), \"input must be a CUDA tensor\");\n  AT_ASSERTM(bbox.type().is_cuda(), \"bbox must be a CUDA tensor\");\n  AT_ASSERTM(trans.type().is_cuda(), \"trans must be a CUDA tensor\");\n  AT_ASSERTM(top_count.type().is_cuda(), \"top_count must be a CUDA tensor\");*/\n\n  const int batch = input.size(0);\n  const int channels = input.size(1);\n  const int height = input.size(2);\n  const int width = input.size(3);\n  const int channels_trans = no_trans ? 2 : trans.size(1);\n  const int num_bbox = bbox.size(0);\n\n  AT_ASSERTM(channels == output_dim, \"input channels and output channels must equal\");\n  auto pooled_height = pooled_size;\n  auto pooled_width = pooled_size;\n  long out_size = num_bbox * output_dim * pooled_height * pooled_width;\n  const int num_classes = no_trans ? 1 : channels_trans / 2;\n  const int channels_each_class = no_trans ? output_dim : output_dim / num_classes;\n\n  auto input_grad = at::zeros({batch, channels, height, width}, out_grad.options());\n  auto trans_grad = at::zeros_like(trans);\n\n  if (input_grad.numel() == 0)\n  {\n    //THCudaCheck(cudaGetLastError());\n    return std::make_tuple(input_grad, trans_grad);\n  }\n\n  /*dim3 grid(std::min(THCCeilDiv(out_size, 512L), 4096L));\n  dim3 block(512);\n  cudaStream_t stream = at::cuda::getCurrentCUDAStream();*/\n\n  AT_DISPATCH_FLOATING_TYPES(out_grad.type(), \"dcn_v2_psroi_pooling_cpu_backward\", [&] {\n    DeformablePSROIPoolBackwardAccKernelCpu<scalar_t>(\n        out_size,\n        out_grad.contiguous().data<scalar_t>(),\n        top_count.contiguous().data<scalar_t>(),\n        num_bbox,\n        spatial_scale,\n        channels,\n        height,\n        width,\n        pooled_height,\n        pooled_width,\n        output_dim,\n        input_grad.contiguous().data<scalar_t>(),\n        trans_grad.contiguous().data<scalar_t>(),\n        input.contiguous().data<scalar_t>(),\n        bbox.contiguous().data<scalar_t>(),\n        trans.contiguous().data<scalar_t>(),\n        no_trans,\n        trans_std,\n        sample_per_part,\n        group_size,\n        part_size,\n        num_classes,\n        channels_each_class);\n  });\n  //THCudaCheck(cudaGetLastError());\n  return std::make_tuple(input_grad, trans_grad);\n}"
  },
  {
    "path": "Network/rigidmask/networks/DCNv2/DCN/src/cpu/vision.h",
    "content": "#pragma once\n#include <torch/extension.h>\n\nat::Tensor\ndcn_v2_cpu_forward(const at::Tensor &input,\n                    const at::Tensor &weight,\n                    const at::Tensor &bias,\n                    const at::Tensor &offset,\n                    const at::Tensor &mask,\n                    const int kernel_h,\n                    const int kernel_w,\n                    const int stride_h,\n                    const int stride_w,\n                    const int pad_h,\n                    const int pad_w,\n                    const int dilation_h,\n                    const int dilation_w,\n                    const int deformable_group);\n\nstd::vector<at::Tensor>\ndcn_v2_cpu_backward(const at::Tensor &input,\n                     const at::Tensor &weight,\n                     const at::Tensor &bias,\n                     const at::Tensor &offset,\n                     const at::Tensor &mask,\n                     const at::Tensor &grad_output,\n                     int kernel_h, int kernel_w,\n                     int stride_h, int stride_w,\n                     int pad_h, int pad_w,\n                     int dilation_h, int dilation_w,\n                     int deformable_group);\n\n\nstd::tuple<at::Tensor, at::Tensor>\ndcn_v2_psroi_pooling_cpu_forward(const at::Tensor &input,\n                                  const at::Tensor &bbox,\n                                  const at::Tensor &trans,\n                                  const int no_trans,\n                                  const float spatial_scale,\n                                  const int output_dim,\n                                  const int group_size,\n                                  const int pooled_size,\n                                  const int part_size,\n                                  const int sample_per_part,\n                                  const float trans_std);\n\nstd::tuple<at::Tensor, at::Tensor>\ndcn_v2_psroi_pooling_cpu_backward(const at::Tensor &out_grad,\n                                   const at::Tensor &input,\n                                   const at::Tensor &bbox,\n                                   const at::Tensor &trans,\n                                   const at::Tensor &top_count,\n                                   const int no_trans,\n                                   const float spatial_scale,\n                                   const int output_dim,\n                                   const int group_size,\n                                   const int pooled_size,\n                                   const int part_size,\n                                   const int sample_per_part,\n                                   const float trans_std);"
  },
  {
    "path": "Network/rigidmask/networks/DCNv2/DCN/src/cuda/dcn_v2_cuda.cu",
    "content": "#include <vector>\n#include \"cuda/dcn_v2_im2col_cuda.h\"\n\n#include <ATen/ATen.h>\n#include <ATen/cuda/CUDAContext.h>\n\n#include <THC/THC.h>\n#include <THC/THCAtomics.cuh>\n#include <THC/THCDeviceUtils.cuh>\n\nTHCState *state = at::globalContext().lazyInitCUDA();\n\n// author: Charles Shang\n// https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu\n\n// [batch gemm]\n// https://github.com/pytorch/pytorch/blob/master/aten/src/THC/generic/THCTensorMathBlas.cu\n\n__global__ void createBatchGemmBuffer(const float **input_b, float **output_b,\n                                      float **columns_b, const float **ones_b,\n                                      const float **weight_b, const float **bias_b,\n                                      float *input, float *output,\n                                      float *columns, float *ones,\n                                      float *weight, float *bias,\n                                      const int input_stride, const int output_stride,\n                                      const int columns_stride, const int ones_stride,\n                                      const int num_batches)\n{\n    const int idx = blockIdx.x * blockDim.x + threadIdx.x;\n    if (idx < num_batches)\n    {\n        input_b[idx] = input + idx * input_stride;\n        output_b[idx] = output + idx * output_stride;\n        columns_b[idx] = columns + idx * columns_stride;\n        ones_b[idx] = ones + idx * ones_stride;\n        // share weights and bias within a Mini-Batch\n        weight_b[idx] = weight;\n        bias_b[idx] = bias;\n    }\n}\n\nat::Tensor\ndcn_v2_cuda_forward(const at::Tensor &input,\n                    const at::Tensor &weight,\n                    const at::Tensor &bias,\n                    const at::Tensor &offset,\n                    const at::Tensor &mask,\n                    const int kernel_h,\n                    const int kernel_w,\n                    const int stride_h,\n                    const int stride_w,\n                    const int pad_h,\n                    const int pad_w,\n                    const int dilation_h,\n                    const int dilation_w,\n                    const int deformable_group)\n{\n    using scalar_t = float;\n    // THCAssertSameGPU(THCudaTensor_checkGPU(state, 5, input, weight, bias, offset, mask));\n    AT_ASSERTM(input.type().is_cuda(), \"input must be a CUDA tensor\");\n    AT_ASSERTM(weight.type().is_cuda(), \"weight must be a CUDA tensor\");\n    AT_ASSERTM(bias.type().is_cuda(), \"bias must be a CUDA tensor\");\n    AT_ASSERTM(offset.type().is_cuda(), \"offset must be a CUDA tensor\");\n    AT_ASSERTM(mask.type().is_cuda(), \"mask must be a CUDA tensor\");\n\n    const int batch = input.size(0);\n    const int channels = input.size(1);\n    const int height = input.size(2);\n    const int width = input.size(3);\n\n    const int channels_out = weight.size(0);\n    const int channels_kernel = weight.size(1);\n    const int kernel_h_ = weight.size(2);\n    const int kernel_w_ = weight.size(3);\n\n    // printf(\"Kernels: %d %d %d %d\\n\", kernel_h_, kernel_w_, kernel_w, kernel_h);\n    // printf(\"Channels: %d %d\\n\", channels, channels_kernel);\n    // printf(\"Channels: %d %d\\n\", channels_out, channels_kernel);\n\n    AT_ASSERTM(kernel_h_ == kernel_h && kernel_w_ == kernel_w,\n               \"Input shape and kernel shape wont match: (%d x %d vs %d x %d).\", kernel_h_, kernel_w, kernel_h_, kernel_w_);\n\n    AT_ASSERTM(channels == channels_kernel,\n               \"Input shape and kernel channels wont match: (%d vs %d).\", channels, channels_kernel);\n\n    const int height_out = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;\n    const int width_out = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;\n\n    auto ones = at::ones({batch, height_out, width_out}, input.options());\n    auto columns = at::empty({batch, channels * kernel_h * kernel_w, 1 * height_out * width_out}, input.options());\n    auto output = at::empty({batch, channels_out, height_out, width_out}, input.options());\n\n    // prepare for batch-wise computing, which is significantly faster than instance-wise computing\n    // when batch size is large.\n    // launch batch threads\n    int matrices_size = batch * sizeof(float *);\n    auto input_b = static_cast<const float **>(THCudaMalloc(state, matrices_size));\n    auto output_b = static_cast<float **>(THCudaMalloc(state, matrices_size));\n    auto columns_b = static_cast<float **>(THCudaMalloc(state, matrices_size));\n    auto ones_b = static_cast<const float **>(THCudaMalloc(state, matrices_size));\n    auto weight_b = static_cast<const float **>(THCudaMalloc(state, matrices_size));\n    auto bias_b = static_cast<const float **>(THCudaMalloc(state, matrices_size));\n\n    const int block = 128;\n    const int grid = (batch + block - 1) / block;\n\n    createBatchGemmBuffer<<<grid, block, 0, c10::cuda::getCurrentCUDAStream()>>>(\n        input_b, output_b,\n        columns_b, ones_b,\n        weight_b, bias_b,\n        input.data<scalar_t>(),\n        output.data<scalar_t>(),\n        columns.data<scalar_t>(),\n        ones.data<scalar_t>(),\n        weight.data<scalar_t>(),\n        bias.data<scalar_t>(),\n        channels * width * height,\n        channels_out * width_out * height_out,\n        channels * kernel_h * kernel_w * height_out * width_out,\n        height_out * width_out,\n        batch);\n\n    long m_ = channels_out;\n    long n_ = height_out * width_out;\n    long k_ = 1;\n    THCudaBlas_SgemmBatched(state,\n                            't',\n                            'n',\n                            n_,\n                            m_,\n                            k_,\n                            1.0f,\n                            ones_b, k_,\n                            bias_b, k_,\n                            0.0f,\n                            output_b, n_,\n                            batch);\n\n    modulated_deformable_im2col_cuda(c10::cuda::getCurrentCUDAStream(),\n                                     input.data<scalar_t>(),\n                                     offset.data<scalar_t>(),\n                                     mask.data<scalar_t>(),\n                                     batch, channels, height, width,\n                                     height_out, width_out, kernel_h, kernel_w,\n                                     pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w,\n                                     deformable_group,\n                                     columns.data<scalar_t>());\n\n    long m = channels_out;\n    long n = height_out * width_out;\n    long k = channels * kernel_h * kernel_w;\n    THCudaBlas_SgemmBatched(state,\n                            'n',\n                            'n',\n                            n,\n                            m,\n                            k,\n                            1.0f,\n                            (const float **)columns_b, n,\n                            weight_b, k,\n                            1.0f,\n                            output_b, n,\n                            batch);\n\n    THCudaFree(state, input_b);\n    THCudaFree(state, output_b);\n    THCudaFree(state, columns_b);\n    THCudaFree(state, ones_b);\n    THCudaFree(state, weight_b);\n    THCudaFree(state, bias_b);\n    return output;\n}\n\n__global__ void createBatchGemmBufferBackward(\n    float **grad_output_b,\n    float **columns_b,\n    float **ones_b,\n    float **weight_b,\n    float **grad_weight_b,\n    float **grad_bias_b,\n    float *grad_output,\n    float *columns,\n    float *ones,\n    float *weight,\n    float *grad_weight,\n    float *grad_bias,\n    const int grad_output_stride,\n    const int columns_stride,\n    const int ones_stride,\n    const int num_batches)\n{\n    const int idx = blockIdx.x * blockDim.x + threadIdx.x;\n    if (idx < num_batches)\n    {\n        grad_output_b[idx] = grad_output + idx * grad_output_stride;\n        columns_b[idx] = columns + idx * columns_stride;\n        ones_b[idx] = ones + idx * ones_stride;\n\n        // share weights and bias within a Mini-Batch\n        weight_b[idx] = weight;\n        grad_weight_b[idx] = grad_weight;\n        grad_bias_b[idx] = grad_bias;\n    }\n}\n\nstd::vector<at::Tensor> dcn_v2_cuda_backward(const at::Tensor &input,\n                                             const at::Tensor &weight,\n                                             const at::Tensor &bias,\n                                             const at::Tensor &offset,\n                                             const at::Tensor &mask,\n                                             const at::Tensor &grad_output,\n                                             int kernel_h, int kernel_w,\n                                             int stride_h, int stride_w,\n                                             int pad_h, int pad_w,\n                                             int dilation_h, int dilation_w,\n                                             int deformable_group)\n{\n\n    THArgCheck(input.is_contiguous(), 1, \"input tensor has to be contiguous\");\n    THArgCheck(weight.is_contiguous(), 2, \"weight tensor has to be contiguous\");\n\n    AT_ASSERTM(input.type().is_cuda(), \"input must be a CUDA tensor\");\n    AT_ASSERTM(weight.type().is_cuda(), \"weight must be a CUDA tensor\");\n    AT_ASSERTM(bias.type().is_cuda(), \"bias must be a CUDA tensor\");\n    AT_ASSERTM(offset.type().is_cuda(), \"offset must be a CUDA tensor\");\n    AT_ASSERTM(mask.type().is_cuda(), \"mask must be a CUDA tensor\");\n\n    const int batch = input.size(0);\n    const int channels = input.size(1);\n    const int height = input.size(2);\n    const int width = input.size(3);\n\n    const int channels_out = weight.size(0);\n    const int channels_kernel = weight.size(1);\n    const int kernel_h_ = weight.size(2);\n    const int kernel_w_ = weight.size(3);\n\n    AT_ASSERTM(kernel_h_ == kernel_h && kernel_w_ == kernel_w,\n               \"Input shape and kernel shape wont match: (%d x %d vs %d x %d).\", kernel_h_, kernel_w, kernel_h_, kernel_w_);\n\n    AT_ASSERTM(channels == channels_kernel,\n               \"Input shape and kernel channels wont match: (%d vs %d).\", channels, channels_kernel);\n\n    const int height_out = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;\n    const int width_out = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;\n\n    auto ones = at::ones({height_out, width_out}, input.options());\n    auto columns = at::empty({channels * kernel_h * kernel_w, 1 * height_out * width_out}, input.options());\n    auto output = at::empty({batch, channels_out, height_out, width_out}, input.options());\n\n    auto grad_input = at::zeros_like(input);\n    auto grad_weight = at::zeros_like(weight);\n    auto grad_bias = at::zeros_like(bias);\n    auto grad_offset = at::zeros_like(offset);\n    auto grad_mask = at::zeros_like(mask);\n\n    using scalar_t = float;\n\n    for (int b = 0; b < batch; b++)\n    {\n        auto input_n = input.select(0, b);\n        auto offset_n = offset.select(0, b);\n        auto mask_n = mask.select(0, b);\n        auto grad_output_n = grad_output.select(0, b);\n        auto grad_input_n = grad_input.select(0, b);\n        auto grad_offset_n = grad_offset.select(0, b);\n        auto grad_mask_n = grad_mask.select(0, b);\n\n        long m = channels * kernel_h * kernel_w;\n        long n = height_out * width_out;\n        long k = channels_out;\n\n        THCudaBlas_Sgemm(state, 'n', 't', n, m, k, 1.0f,\n                         grad_output_n.data<scalar_t>(), n,\n                         weight.data<scalar_t>(), m, 0.0f,\n                         columns.data<scalar_t>(), n);\n\n        // gradient w.r.t. input coordinate data\n        modulated_deformable_col2im_coord_cuda(c10::cuda::getCurrentCUDAStream(),\n                                               columns.data<scalar_t>(),\n                                               input_n.data<scalar_t>(),\n                                               offset_n.data<scalar_t>(),\n                                               mask_n.data<scalar_t>(),\n                                               1, channels, height, width,\n                                               height_out, width_out, kernel_h, kernel_w,\n                                               pad_h, pad_w, stride_h, stride_w,\n                                               dilation_h, dilation_w, deformable_group,\n                                               grad_offset_n.data<scalar_t>(),\n                                               grad_mask_n.data<scalar_t>());\n        // gradient w.r.t. input data\n        modulated_deformable_col2im_cuda(c10::cuda::getCurrentCUDAStream(),\n                                         columns.data<scalar_t>(),\n                                         offset_n.data<scalar_t>(),\n                                         mask_n.data<scalar_t>(),\n                                         1, channels, height, width,\n                                         height_out, width_out, kernel_h, kernel_w,\n                                         pad_h, pad_w, stride_h, stride_w,\n                                         dilation_h, dilation_w, deformable_group,\n                                         grad_input_n.data<scalar_t>());\n\n        // gradient w.r.t. weight, dWeight should accumulate across the batch and group\n        modulated_deformable_im2col_cuda(c10::cuda::getCurrentCUDAStream(),\n                                         input_n.data<scalar_t>(),\n                                         offset_n.data<scalar_t>(),\n                                         mask_n.data<scalar_t>(),\n                                         1, channels, height, width,\n                                         height_out, width_out, kernel_h, kernel_w,\n                                         pad_h, pad_w, stride_h, stride_w,\n                                         dilation_h, dilation_w, deformable_group,\n                                         columns.data<scalar_t>());\n\n        long m_ = channels_out;\n        long n_ = channels * kernel_h * kernel_w;\n        long k_ = height_out * width_out;\n\n        THCudaBlas_Sgemm(state, 't', 'n', n_, m_, k_, 1.0f,\n                         columns.data<scalar_t>(), k_,\n                         grad_output_n.data<scalar_t>(), k_, 1.0f,\n                         grad_weight.data<scalar_t>(), n_);\n\n        // gradient w.r.t. bias\n        // long m_ = channels_out;\n        // long k__ = height_out * width_out;\n        // THCudaBlas_Sgemm(state,\n        //                  't', 'n',\n        //                  k_, m_, 1, 1.0f,\n        //                  grad_output_n.data<scalar_t>(), k_,\n        //                  ones.data<scalar_t>(), 1, 1.0f,\n        //                  grad_bias.data<scalar_t>(), 1);\n        THCudaBlas_Sgemm(state,\n            'N', 'N', 1, m_, k_, 1.0f,\n            ones.data<scalar_t>(), 1,\n            grad_output_n.data<scalar_t>(), k_,\n            1.0f,\n            grad_bias.data<scalar_t>(), 1);\n    }\n\n    return {\n        grad_input, grad_offset, grad_mask, grad_weight, grad_bias\n    };\n}\n"
  },
  {
    "path": "Network/rigidmask/networks/DCNv2/DCN/src/cuda/dcn_v2_im2col_cuda.cu",
    "content": "#include \"dcn_v2_im2col_cuda.h\"\n#include <cstdio>\n#include <algorithm>\n#include <cstring>\n\n#include <ATen/ATen.h>\n#include <ATen/cuda/CUDAContext.h>\n\n#include <THC/THC.h>\n#include <THC/THCAtomics.cuh>\n#include <THC/THCDeviceUtils.cuh>\n\n#define CUDA_KERNEL_LOOP(i, n)                          \\\n  for (int i = blockIdx.x * blockDim.x + threadIdx.x;   \\\n      i < (n);                                          \\\n      i += blockDim.x * gridDim.x)\n\nconst int CUDA_NUM_THREADS = 1024;\ninline int GET_BLOCKS(const int N)\n{\n  return (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS;\n}\n\n\n__device__ float dmcn_im2col_bilinear_cuda(const float *bottom_data, const int data_width,\n                                      const int height, const int width, float h, float w)\n{\n  int h_low = floor(h);\n  int w_low = floor(w);\n  int h_high = h_low + 1;\n  int w_high = w_low + 1;\n\n  float lh = h - h_low;\n  float lw = w - w_low;\n  float hh = 1 - lh, hw = 1 - lw;\n\n  float v1 = 0;\n  if (h_low >= 0 && w_low >= 0)\n    v1 = bottom_data[h_low * data_width + w_low];\n  float v2 = 0;\n  if (h_low >= 0 && w_high <= width - 1)\n    v2 = bottom_data[h_low * data_width + w_high];\n  float v3 = 0;\n  if (h_high <= height - 1 && w_low >= 0)\n    v3 = bottom_data[h_high * data_width + w_low];\n  float v4 = 0;\n  if (h_high <= height - 1 && w_high <= width - 1)\n    v4 = bottom_data[h_high * data_width + w_high];\n\n  float w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;\n\n  float val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);\n  return val;\n}\n\n__device__ float dmcn_get_gradient_weight_cuda(float argmax_h, float argmax_w,\n                                          const int h, const int w, const int height, const int width)\n{\n  if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width)\n  {\n    //empty\n    return 0;\n  }\n\n  int argmax_h_low = floor(argmax_h);\n  int argmax_w_low = floor(argmax_w);\n  int argmax_h_high = argmax_h_low + 1;\n  int argmax_w_high = argmax_w_low + 1;\n\n  float weight = 0;\n  if (h == argmax_h_low && w == argmax_w_low)\n    weight = (h + 1 - argmax_h) * (w + 1 - argmax_w);\n  if (h == argmax_h_low && w == argmax_w_high)\n    weight = (h + 1 - argmax_h) * (argmax_w + 1 - w);\n  if (h == argmax_h_high && w == argmax_w_low)\n    weight = (argmax_h + 1 - h) * (w + 1 - argmax_w);\n  if (h == argmax_h_high && w == argmax_w_high)\n    weight = (argmax_h + 1 - h) * (argmax_w + 1 - w);\n  return weight;\n}\n\n__device__ float dmcn_get_coordinate_weight_cuda(float argmax_h, float argmax_w,\n                                            const int height, const int width, const float *im_data,\n                                            const int data_width, const int bp_dir)\n{\n  if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width)\n  {\n    //empty\n    return 0;\n  }\n\n  int argmax_h_low = floor(argmax_h);\n  int argmax_w_low = floor(argmax_w);\n  int argmax_h_high = argmax_h_low + 1;\n  int argmax_w_high = argmax_w_low + 1;\n\n  float weight = 0;\n\n  if (bp_dir == 0)\n  {\n    if (argmax_h_low >= 0 && argmax_w_low >= 0)\n      weight += -1 * (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_low * data_width + argmax_w_low];\n    if (argmax_h_low >= 0 && argmax_w_high <= width - 1)\n      weight += -1 * (argmax_w - argmax_w_low) * im_data[argmax_h_low * data_width + argmax_w_high];\n    if (argmax_h_high <= height - 1 && argmax_w_low >= 0)\n      weight += (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_high * data_width + argmax_w_low];\n    if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)\n      weight += (argmax_w - argmax_w_low) * im_data[argmax_h_high * data_width + argmax_w_high];\n  }\n  else if (bp_dir == 1)\n  {\n    if (argmax_h_low >= 0 && argmax_w_low >= 0)\n      weight += -1 * (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_low];\n    if (argmax_h_low >= 0 && argmax_w_high <= width - 1)\n      weight += (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_high];\n    if (argmax_h_high <= height - 1 && argmax_w_low >= 0)\n      weight += -1 * (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_low];\n    if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)\n      weight += (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_high];\n  }\n\n  return weight;\n}\n\n__global__ void modulated_deformable_im2col_gpu_kernel(const int n,\n                                                       const float *data_im, const float *data_offset, const float *data_mask,\n                                                       const int height, const int width, const int kernel_h, const int kernel_w,\n                                                       const int pad_h, const int pad_w,\n                                                       const int stride_h, const int stride_w,\n                                                       const int dilation_h, const int dilation_w,\n                                                       const int channel_per_deformable_group,\n                                                       const int batch_size, const int num_channels, const int deformable_group,\n                                                       const int height_col, const int width_col,\n                                                       float *data_col)\n{\n  // launch channels * batch_size * height_col * width_col cores\n  CUDA_KERNEL_LOOP(index, n)\n  {\n    // NOTE(CharlesShang): different from Dai Jifeng's MXNet implementation, col_buffer is of shape (c*kw*kh, N, oh, ow)\n    // here columns is of shape (N, c*kw*kh, oh * ow), need to adapt axis\n\n    // index index of output matrix\n    const int w_col = index % width_col;\n    const int h_col = (index / width_col) % height_col;\n    // const int b_col = (index / width_col / height_col) % batch_size;\n    const int b_col = (index / width_col / height_col / num_channels) % batch_size;\n    // const int c_im = (index / width_col / height_col) / batch_size;\n    const int c_im = (index / width_col / height_col) % num_channels;\n    // const int c_col = c_im * kernel_h * kernel_w;\n    const int c_col = c_im * kernel_h * kernel_w;\n\n    // compute deformable group index\n    const int deformable_group_index = c_im / channel_per_deformable_group;\n\n    const int h_in = h_col * stride_h - pad_h;\n    const int w_in = w_col * stride_w - pad_w;\n\n    //  float *data_col_ptr = data_col + ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col;\n    float *data_col_ptr = data_col + ((b_col * num_channels * kernel_w * kernel_h + c_col) * height_col + h_col) * width_col + w_col;\n    //const float* data_im_ptr = data_im + ((b_col * num_channels + c_im) * height + h_in) * width + w_in;\n    const float *data_im_ptr = data_im + (b_col * num_channels + c_im) * height * width;\n    const float *data_offset_ptr = data_offset + (b_col * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col;\n\n    const float *data_mask_ptr = data_mask + (b_col * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col;\n\n    for (int i = 0; i < kernel_h; ++i)\n    {\n      for (int j = 0; j < kernel_w; ++j)\n      {\n        const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col;\n        const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col + w_col;\n        const int data_mask_hw_ptr = ((i * kernel_w + j) * height_col + h_col) * width_col + w_col;\n        const float offset_h = data_offset_ptr[data_offset_h_ptr];\n        const float offset_w = data_offset_ptr[data_offset_w_ptr];\n        const float mask = data_mask_ptr[data_mask_hw_ptr];\n        float val = static_cast<float>(0);\n        const float h_im = h_in + i * dilation_h + offset_h;\n        const float w_im = w_in + j * dilation_w + offset_w;\n        //if (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) {\n        if (h_im > -1 && w_im > -1 && h_im < height && w_im < width)\n        {\n          //const float map_h = i * dilation_h + offset_h;\n          //const float map_w = j * dilation_w + offset_w;\n          //const int cur_height = height - h_in;\n          //const int cur_width = width - w_in;\n          //val = dmcn_im2col_bilinear_cuda(data_im_ptr, width, cur_height, cur_width, map_h, map_w);\n          val = dmcn_im2col_bilinear_cuda(data_im_ptr, width, height, width, h_im, w_im);\n        }\n        *data_col_ptr = val * mask;\n        // data_col_ptr += batch_size * height_col * width_col;\n        data_col_ptr += height_col * width_col;\n      }\n    }\n  }\n}\n\n__global__ void modulated_deformable_col2im_gpu_kernel(const int n,\n                                                       const float *data_col, const float *data_offset, const float *data_mask,\n                                                       const int channels, const int height, const int width,\n                                                       const int kernel_h, const int kernel_w,\n                                                       const int pad_h, const int pad_w,\n                                                       const int stride_h, const int stride_w,\n                                                       const int dilation_h, const int dilation_w,\n                                                       const int channel_per_deformable_group,\n                                                       const int batch_size, const int deformable_group,\n                                                       const int height_col, const int width_col,\n                                                       float *grad_im)\n{\n  CUDA_KERNEL_LOOP(index, n)\n  {\n    const int j = (index / width_col / height_col / batch_size) % kernel_w;\n    const int i = (index / width_col / height_col / batch_size / kernel_w) % kernel_h;\n    const int c = index / width_col / height_col / batch_size / kernel_w / kernel_h;\n    // compute the start and end of the output\n\n    const int deformable_group_index = c / channel_per_deformable_group;\n\n    int w_out = index % width_col;\n    int h_out = (index / width_col) % height_col;\n    int b = (index / width_col / height_col) % batch_size;\n    int w_in = w_out * stride_w - pad_w;\n    int h_in = h_out * stride_h - pad_h;\n\n    const float *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col;\n    const float *data_mask_ptr = data_mask + (b * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col;\n    const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out;\n    const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out;\n    const int data_mask_hw_ptr = ((i * kernel_w + j) * height_col + h_out) * width_col + w_out;\n    const float offset_h = data_offset_ptr[data_offset_h_ptr];\n    const float offset_w = data_offset_ptr[data_offset_w_ptr];\n    const float mask = data_mask_ptr[data_mask_hw_ptr];\n    const float cur_inv_h_data = h_in + i * dilation_h + offset_h;\n    const float cur_inv_w_data = w_in + j * dilation_w + offset_w;\n\n    const float cur_top_grad = data_col[index] * mask;\n    const int cur_h = (int)cur_inv_h_data;\n    const int cur_w = (int)cur_inv_w_data;\n    for (int dy = -2; dy <= 2; dy++)\n    {\n      for (int dx = -2; dx <= 2; dx++)\n      {\n        if (cur_h + dy >= 0 && cur_h + dy < height &&\n            cur_w + dx >= 0 && cur_w + dx < width &&\n            abs(cur_inv_h_data - (cur_h + dy)) < 1 &&\n            abs(cur_inv_w_data - (cur_w + dx)) < 1)\n        {\n          int cur_bottom_grad_pos = ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx;\n          float weight = dmcn_get_gradient_weight_cuda(cur_inv_h_data, cur_inv_w_data, cur_h + dy, cur_w + dx, height, width);\n          atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad);\n        }\n      }\n    }\n  }\n}\n\n__global__ void modulated_deformable_col2im_coord_gpu_kernel(const int n,\n                                                             const float *data_col, const float *data_im,\n                                                             const float *data_offset, const float *data_mask,\n                                                             const int channels, const int height, const int width,\n                                                             const int kernel_h, const int kernel_w,\n                                                             const int pad_h, const int pad_w,\n                                                             const int stride_h, const int stride_w,\n                                                             const int dilation_h, const int dilation_w,\n                                                             const int channel_per_deformable_group,\n                                                             const int batch_size, const int offset_channels, const int deformable_group,\n                                                             const int height_col, const int width_col,\n                                                             float *grad_offset, float *grad_mask)\n{\n  CUDA_KERNEL_LOOP(index, n)\n  {\n    float val = 0, mval = 0;\n    int w = index % width_col;\n    int h = (index / width_col) % height_col;\n    int c = (index / width_col / height_col) % offset_channels;\n    int b = (index / width_col / height_col) / offset_channels;\n    // compute the start and end of the output\n\n    const int deformable_group_index = c / (2 * kernel_h * kernel_w);\n    const int col_step = kernel_h * kernel_w;\n    int cnt = 0;\n    const float *data_col_ptr = data_col + deformable_group_index * channel_per_deformable_group * batch_size * width_col * height_col;\n    const float *data_im_ptr = data_im + (b * deformable_group + deformable_group_index) * channel_per_deformable_group / kernel_h / kernel_w * height * width;\n    const float *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col;\n    const float *data_mask_ptr = data_mask + (b * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col;\n\n    const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w;\n\n    for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group; col_c += col_step)\n    {\n      const int col_pos = (((col_c * batch_size + b) * height_col) + h) * width_col + w;\n      const int bp_dir = offset_c % 2;\n\n      int j = (col_pos / width_col / height_col / batch_size) % kernel_w;\n      int i = (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h;\n      int w_out = col_pos % width_col;\n      int h_out = (col_pos / width_col) % height_col;\n      int w_in = w_out * stride_w - pad_w;\n      int h_in = h_out * stride_h - pad_h;\n      const int data_offset_h_ptr = (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out);\n      const int data_offset_w_ptr = (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out);\n      const int data_mask_hw_ptr = (((i * kernel_w + j) * height_col + h_out) * width_col + w_out);\n      const float offset_h = data_offset_ptr[data_offset_h_ptr];\n      const float offset_w = data_offset_ptr[data_offset_w_ptr];\n      const float mask = data_mask_ptr[data_mask_hw_ptr];\n      float inv_h = h_in + i * dilation_h + offset_h;\n      float inv_w = w_in + j * dilation_w + offset_w;\n      if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width)\n      {\n        inv_h = inv_w = -2;\n      }\n      else\n      {\n        mval += data_col_ptr[col_pos] * dmcn_im2col_bilinear_cuda(data_im_ptr + cnt * height * width, width, height, width, inv_h, inv_w);\n      }\n      const float weight = dmcn_get_coordinate_weight_cuda(\n          inv_h, inv_w,\n          height, width, data_im_ptr + cnt * height * width, width, bp_dir);\n      val += weight * data_col_ptr[col_pos] * mask;\n      cnt += 1;\n    }\n    // KERNEL_ASSIGN(grad_offset[index], offset_req, val);\n    grad_offset[index] = val;\n    if (offset_c % 2 == 0)\n      // KERNEL_ASSIGN(grad_mask[(((b * deformable_group + deformable_group_index) * kernel_h * kernel_w + offset_c / 2) * height_col + h) * width_col + w], mask_req, mval);\n      grad_mask[(((b * deformable_group + deformable_group_index) * kernel_h * kernel_w + offset_c / 2) * height_col + h) * width_col + w] = mval;\n  }\n}\n\nvoid modulated_deformable_im2col_cuda(cudaStream_t stream,\n  const float* data_im, const float* data_offset, const float* data_mask,\n  const int batch_size, const int channels, const int height_im, const int width_im, \n  const int height_col, const int width_col, const int kernel_h, const int kernel_w,\n  const int pad_h, const int pad_w, const int stride_h, const int stride_w, \n  const int dilation_h, const int dilation_w,\n  const int deformable_group, float* data_col) {\n  // num_axes should be smaller than block size\n  const int channel_per_deformable_group = channels / deformable_group;\n  const int num_kernels = channels * batch_size * height_col * width_col;\n  modulated_deformable_im2col_gpu_kernel\n      <<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS,\n          0, stream>>>(\n      num_kernels, data_im, data_offset, data_mask, height_im, width_im, kernel_h, kernel_w,\n      pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, channel_per_deformable_group,\n      batch_size, channels, deformable_group, height_col, width_col, data_col);\n  \n  cudaError_t err = cudaGetLastError();\n  if (err != cudaSuccess)\n  {\n    printf(\"error in modulated_deformable_im2col_cuda: %s\\n\", cudaGetErrorString(err));\n  }\n\n}\n\nvoid modulated_deformable_col2im_cuda(cudaStream_t stream,\n  const float* data_col, const float* data_offset, const float* data_mask,\n  const int batch_size, const int channels, const int height_im, const int width_im, \n  const int height_col, const int width_col, const int kernel_h, const int kernel_w,\n  const int pad_h, const int pad_w, const int stride_h, const int stride_w, \n  const int dilation_h, const int dilation_w, \n  const int deformable_group, float* grad_im){\n\n  const int channel_per_deformable_group = channels / deformable_group;\n  const int num_kernels = channels * kernel_h * kernel_w * batch_size * height_col * width_col;\n  modulated_deformable_col2im_gpu_kernel\n      <<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS,\n          0, stream>>>(\n        num_kernels, data_col, data_offset, data_mask, channels, height_im, width_im,\n        kernel_h, kernel_w, pad_h, pad_h, stride_h, stride_w,\n        dilation_h, dilation_w, channel_per_deformable_group,\n        batch_size, deformable_group, height_col, width_col, grad_im);\n  cudaError_t err = cudaGetLastError();\n  if (err != cudaSuccess)\n  {\n    printf(\"error in modulated_deformable_col2im_cuda: %s\\n\", cudaGetErrorString(err));\n  }\n\n}\n\nvoid modulated_deformable_col2im_coord_cuda(cudaStream_t stream,\n  const float* data_col, const float* data_im, const float* data_offset, const float* data_mask,\n  const int batch_size, const int channels, const int height_im, const int width_im, \n  const int height_col, const int width_col, const int kernel_h, const int kernel_w,\n  const int pad_h, const int pad_w, const int stride_h, const int stride_w, \n  const int dilation_h, const int dilation_w, \n  const int deformable_group,\n  float* grad_offset, float* grad_mask) {\n  const int num_kernels = batch_size * height_col * width_col * 2 * kernel_h * kernel_w * deformable_group;\n  const int channel_per_deformable_group = channels * kernel_h * kernel_w / deformable_group;\n  modulated_deformable_col2im_coord_gpu_kernel\n      <<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS,\n        0, stream>>>(\n        num_kernels, data_col, data_im, data_offset, data_mask, channels, height_im, width_im,\n        kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,\n        dilation_h, dilation_w, channel_per_deformable_group,\n        batch_size, 2 * kernel_h * kernel_w * deformable_group, deformable_group, height_col, width_col, \n        grad_offset, grad_mask);\n  cudaError_t err = cudaGetLastError();\n  if (err != cudaSuccess)\n  {\n    printf(\"error in modulated_deformable_col2im_coord_cuda: %s\\n\", cudaGetErrorString(err));\n  }\n}"
  },
  {
    "path": "Network/rigidmask/networks/DCNv2/DCN/src/cuda/dcn_v2_im2col_cuda.h",
    "content": "\n/*!\n ******************* BEGIN Caffe Copyright Notice and Disclaimer ****************\n *\n * COPYRIGHT\n *\n * All contributions by the University of California:\n * Copyright (c) 2014-2017 The Regents of the University of California (Regents)\n * All rights reserved.\n *\n * All other contributions:\n * Copyright (c) 2014-2017, the respective contributors\n * All rights reserved.\n *\n * Caffe uses a shared copyright model: each contributor holds copyright over\n * their contributions to Caffe. The project versioning records all such\n * contribution and copyright details. If a contributor wants to further mark\n * their specific copyright on a particular contribution, they should indicate\n * their copyright solely in the commit message of the change when it is\n * committed.\n *\n * LICENSE\n *\n * Redistribution and use in source and binary forms, with or without\n * modification, are permitted provided that the following conditions are met:\n *\n * 1. Redistributions of source code must retain the above copyright notice, this\n * list of conditions and the following disclaimer.\n * 2. Redistributions in binary form must reproduce the above copyright notice,\n * this list of conditions and the following disclaimer in the documentation\n * and/or other materials provided with the distribution.\n *\n * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND\n * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\n * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\n * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR\n * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES\n * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\n * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND\n * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\n * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n *\n * CONTRIBUTION AGREEMENT\n *\n * By contributing to the BVLC/caffe repository through pull-request, comment,\n * or otherwise, the contributor releases their content to the\n * license and copyright terms herein.\n *\n ***************** END Caffe Copyright Notice and Disclaimer ********************\n *\n * Copyright (c) 2018 Microsoft\n * Licensed under The MIT License [see LICENSE for details]\n * \\file modulated_deformable_im2col.h\n * \\brief Function definitions of converting an image to\n * column matrix based on kernel, padding, dilation, and offset.\n * These functions are mainly used in deformable convolution operators.\n * \\ref: https://arxiv.org/abs/1811.11168\n * \\author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu\n */\n\n/***************** Adapted by Charles Shang *********************/\n\n#ifndef DCN_V2_IM2COL_CUDA\n#define DCN_V2_IM2COL_CUDA\n\n#ifdef __cplusplus\nextern \"C\"\n{\n#endif\n\n  void modulated_deformable_im2col_cuda(cudaStream_t stream,\n                                        const float *data_im, const float *data_offset, const float *data_mask,\n                                        const int batch_size, const int channels, const int height_im, const int width_im,\n                                        const int height_col, const int width_col, const int kernel_h, const int kenerl_w,\n                                        const int pad_h, const int pad_w, const int stride_h, const int stride_w,\n                                        const int dilation_h, const int dilation_w,\n                                        const int deformable_group, float *data_col);\n\n  void modulated_deformable_col2im_cuda(cudaStream_t stream,\n                                        const float *data_col, const float *data_offset, const float *data_mask,\n                                        const int batch_size, const int channels, const int height_im, const int width_im,\n                                        const int height_col, const int width_col, const int kernel_h, const int kenerl_w,\n                                        const int pad_h, const int pad_w, const int stride_h, const int stride_w,\n                                        const int dilation_h, const int dilation_w,\n                                        const int deformable_group, float *grad_im);\n\n  void modulated_deformable_col2im_coord_cuda(cudaStream_t stream,\n                                         const float *data_col, const float *data_im, const float *data_offset, const float *data_mask,\n                                         const int batch_size, const int channels, const int height_im, const int width_im,\n                                         const int height_col, const int width_col, const int kernel_h, const int kenerl_w,\n                                         const int pad_h, const int pad_w, const int stride_h, const int stride_w,\n                                         const int dilation_h, const int dilation_w,\n                                         const int deformable_group,\n                                         float *grad_offset, float *grad_mask);\n\n#ifdef __cplusplus\n}\n#endif\n\n#endif"
  },
  {
    "path": "Network/rigidmask/networks/DCNv2/DCN/src/cuda/dcn_v2_psroi_pooling_cuda.cu",
    "content": "/*!\n * Copyright (c) 2017 Microsoft\n * Licensed under The MIT License [see LICENSE for details]\n * \\file deformable_psroi_pooling.cu\n * \\brief\n * \\author Yi Li, Guodong Zhang, Jifeng Dai\n*/\n/***************** Adapted by Charles Shang *********************/\n\n#include <cstdio>\n#include <algorithm>\n#include <cstring>\n#include <iostream>\n\n#include <ATen/ATen.h>\n#include <ATen/cuda/CUDAContext.h>\n\n#include <THC/THC.h>\n#include <THC/THCAtomics.cuh>\n#include <THC/THCDeviceUtils.cuh>\n\n#define CUDA_KERNEL_LOOP(i, n)                        \\\n  for (int i = blockIdx.x * blockDim.x + threadIdx.x; \\\n       i < (n);                                       \\\n       i += blockDim.x * gridDim.x)\n\nconst int CUDA_NUM_THREADS = 1024;\ninline int GET_BLOCKS(const int N)\n{\n  return (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS;\n}\n\ntemplate <typename T>\n__device__ T bilinear_interp_cuda(\n    const T *data,\n    const T x,\n    const T y,\n    const int width,\n    const int height)\n{\n  int x1 = floor(x);\n  int x2 = ceil(x);\n  int y1 = floor(y);\n  int y2 = ceil(y);\n  T dist_x = static_cast<T>(x - x1);\n  T dist_y = static_cast<T>(y - y1);\n  T value11 = data[y1 * width + x1];\n  T value12 = data[y2 * width + x1];\n  T value21 = data[y1 * width + x2];\n  T value22 = data[y2 * width + x2];\n  T value = (1 - dist_x) * (1 - dist_y) * value11 +\n            (1 - dist_x) * dist_y * value12 +\n            dist_x * (1 - dist_y) * value21 +\n            dist_x * dist_y * value22;\n  return value;\n}\n\ntemplate <typename T>\n__global__ void DeformablePSROIPoolForwardKernelCuda(\n    const int count,\n    const T *bottom_data,\n    const T spatial_scale,\n    const int channels,\n    const int height, const int width,\n    const int pooled_height, const int pooled_width,\n    const T *bottom_rois, const T *bottom_trans,\n    const int no_trans,\n    const T trans_std,\n    const int sample_per_part,\n    const int output_dim,\n    const int group_size,\n    const int part_size,\n    const int num_classes,\n    const int channels_each_class,\n    T *top_data,\n    T *top_count)\n{\n  CUDA_KERNEL_LOOP(index, count)\n  {\n    // The output is in order (n, ctop, ph, pw)\n    int pw = index % pooled_width;\n    int ph = (index / pooled_width) % pooled_height;\n    int ctop = (index / pooled_width / pooled_height) % output_dim;\n    int n = index / pooled_width / pooled_height / output_dim;\n\n    // [start, end) interval for spatial sampling\n    const T *offset_bottom_rois = bottom_rois + n * 5;\n    int roi_batch_ind = offset_bottom_rois[0];\n    T roi_start_w = static_cast<T>(round(offset_bottom_rois[1])) * spatial_scale - 0.5;\n    T roi_start_h = static_cast<T>(round(offset_bottom_rois[2])) * spatial_scale - 0.5;\n    T roi_end_w = static_cast<T>(round(offset_bottom_rois[3]) + 1.) * spatial_scale - 0.5;\n    T roi_end_h = static_cast<T>(round(offset_bottom_rois[4]) + 1.) * spatial_scale - 0.5;\n\n    // Force too small ROIs to be 1x1\n    T roi_width = max(roi_end_w - roi_start_w, 0.1); //avoid 0\n    T roi_height = max(roi_end_h - roi_start_h, 0.1);\n\n    // Compute w and h at bottom\n    T bin_size_h = roi_height / static_cast<T>(pooled_height);\n    T bin_size_w = roi_width / static_cast<T>(pooled_width);\n\n    T sub_bin_size_h = bin_size_h / static_cast<T>(sample_per_part);\n    T sub_bin_size_w = bin_size_w / static_cast<T>(sample_per_part);\n\n    int part_h = floor(static_cast<T>(ph) / pooled_height * part_size);\n    int part_w = floor(static_cast<T>(pw) / pooled_width * part_size);\n    int class_id = ctop / channels_each_class;\n    T trans_x = no_trans ? static_cast<T>(0) : bottom_trans[(((n * num_classes + class_id) * 2) * part_size + part_h) * part_size + part_w] * trans_std;\n    T trans_y = no_trans ? static_cast<T>(0) : bottom_trans[(((n * num_classes + class_id) * 2 + 1) * part_size + part_h) * part_size + part_w] * trans_std;\n\n    T wstart = static_cast<T>(pw) * bin_size_w + roi_start_w;\n    wstart += trans_x * roi_width;\n    T hstart = static_cast<T>(ph) * bin_size_h + roi_start_h;\n    hstart += trans_y * roi_height;\n\n    T sum = 0;\n    int count = 0;\n    int gw = floor(static_cast<T>(pw) * group_size / pooled_width);\n    int gh = floor(static_cast<T>(ph) * group_size / pooled_height);\n    gw = min(max(gw, 0), group_size - 1);\n    gh = min(max(gh, 0), group_size - 1);\n\n    const T *offset_bottom_data = bottom_data + (roi_batch_ind * channels) * height * width;\n    for (int ih = 0; ih < sample_per_part; ih++)\n    {\n      for (int iw = 0; iw < sample_per_part; iw++)\n      {\n        T w = wstart + iw * sub_bin_size_w;\n        T h = hstart + ih * sub_bin_size_h;\n        // bilinear interpolation\n        if (w < -0.5 || w > width - 0.5 || h < -0.5 || h > height - 0.5)\n        {\n          continue;\n        }\n        w = min(max(w, 0.), width - 1.);\n        h = min(max(h, 0.), height - 1.);\n        int c = (ctop * group_size + gh) * group_size + gw;\n        T val = bilinear_interp_cuda(offset_bottom_data + c * height * width, w, h, width, height);\n        sum += val;\n        count++;\n      }\n    }\n    top_data[index] = count == 0 ? static_cast<T>(0) : sum / count;\n    top_count[index] = count;\n  }\n}\n\ntemplate <typename T>\n__global__ void DeformablePSROIPoolBackwardAccKernelCuda(\n    const int count,\n    const T *top_diff,\n    const T *top_count,\n    const int num_rois,\n    const T spatial_scale,\n    const int channels,\n    const int height, const int width,\n    const int pooled_height, const int pooled_width,\n    const int output_dim,\n    T *bottom_data_diff, T *bottom_trans_diff,\n    const T *bottom_data,\n    const T *bottom_rois,\n    const T *bottom_trans,\n    const int no_trans,\n    const T trans_std,\n    const int sample_per_part,\n    const int group_size,\n    const int part_size,\n    const int num_classes,\n    const int channels_each_class)\n{\n  CUDA_KERNEL_LOOP(index, count)\n  {\n    // The output is in order (n, ctop, ph, pw)\n    int pw = index % pooled_width;\n    int ph = (index / pooled_width) % pooled_height;\n    int ctop = (index / pooled_width / pooled_height) % output_dim;\n    int n = index / pooled_width / pooled_height / output_dim;\n\n    // [start, end) interval for spatial sampling\n    const T *offset_bottom_rois = bottom_rois + n * 5;\n    int roi_batch_ind = offset_bottom_rois[0];\n    T roi_start_w = static_cast<T>(round(offset_bottom_rois[1])) * spatial_scale - 0.5;\n    T roi_start_h = static_cast<T>(round(offset_bottom_rois[2])) * spatial_scale - 0.5;\n    T roi_end_w = static_cast<T>(round(offset_bottom_rois[3]) + 1.) * spatial_scale - 0.5;\n    T roi_end_h = static_cast<T>(round(offset_bottom_rois[4]) + 1.) * spatial_scale - 0.5;\n\n    // Force too small ROIs to be 1x1\n    T roi_width = max(roi_end_w - roi_start_w, 0.1); //avoid 0\n    T roi_height = max(roi_end_h - roi_start_h, 0.1);\n\n    // Compute w and h at bottom\n    T bin_size_h = roi_height / static_cast<T>(pooled_height);\n    T bin_size_w = roi_width / static_cast<T>(pooled_width);\n\n    T sub_bin_size_h = bin_size_h / static_cast<T>(sample_per_part);\n    T sub_bin_size_w = bin_size_w / static_cast<T>(sample_per_part);\n\n    int part_h = floor(static_cast<T>(ph) / pooled_height * part_size);\n    int part_w = floor(static_cast<T>(pw) / pooled_width * part_size);\n    int class_id = ctop / channels_each_class;\n    T trans_x = no_trans ? static_cast<T>(0) : bottom_trans[(((n * num_classes + class_id) * 2) * part_size + part_h) * part_size + part_w] * trans_std;\n    T trans_y = no_trans ? static_cast<T>(0) : bottom_trans[(((n * num_classes + class_id) * 2 + 1) * part_size + part_h) * part_size + part_w] * trans_std;\n\n    T wstart = static_cast<T>(pw) * bin_size_w + roi_start_w;\n    wstart += trans_x * roi_width;\n    T hstart = static_cast<T>(ph) * bin_size_h + roi_start_h;\n    hstart += trans_y * roi_height;\n\n    if (top_count[index] <= 0)\n    {\n      continue;\n    }\n    T diff_val = top_diff[index] / top_count[index];\n    const T *offset_bottom_data = bottom_data + roi_batch_ind * channels * height * width;\n    T *offset_bottom_data_diff = bottom_data_diff + roi_batch_ind * channels * height * width;\n    int gw = floor(static_cast<T>(pw) * group_size / pooled_width);\n    int gh = floor(static_cast<T>(ph) * group_size / pooled_height);\n    gw = min(max(gw, 0), group_size - 1);\n    gh = min(max(gh, 0), group_size - 1);\n\n    for (int ih = 0; ih < sample_per_part; ih++)\n    {\n      for (int iw = 0; iw < sample_per_part; iw++)\n      {\n        T w = wstart + iw * sub_bin_size_w;\n        T h = hstart + ih * sub_bin_size_h;\n        // bilinear interpolation\n        if (w < -0.5 || w > width - 0.5 || h < -0.5 || h > height - 0.5)\n        {\n          continue;\n        }\n        w = min(max(w, 0.), width - 1.);\n        h = min(max(h, 0.), height - 1.);\n        int c = (ctop * group_size + gh) * group_size + gw;\n        // backward on feature\n        int x0 = floor(w);\n        int x1 = ceil(w);\n        int y0 = floor(h);\n        int y1 = ceil(h);\n        T dist_x = w - x0, dist_y = h - y0;\n        T q00 = (1 - dist_x) * (1 - dist_y);\n        T q01 = (1 - dist_x) * dist_y;\n        T q10 = dist_x * (1 - dist_y);\n        T q11 = dist_x * dist_y;\n        int bottom_index_base = c * height * width;\n        atomicAdd(offset_bottom_data_diff + bottom_index_base + y0 * width + x0, q00 * diff_val);\n        atomicAdd(offset_bottom_data_diff + bottom_index_base + y1 * width + x0, q01 * diff_val);\n        atomicAdd(offset_bottom_data_diff + bottom_index_base + y0 * width + x1, q10 * diff_val);\n        atomicAdd(offset_bottom_data_diff + bottom_index_base + y1 * width + x1, q11 * diff_val);\n\n        if (no_trans)\n        {\n          continue;\n        }\n        T U00 = offset_bottom_data[bottom_index_base + y0 * width + x0];\n        T U01 = offset_bottom_data[bottom_index_base + y1 * width + x0];\n        T U10 = offset_bottom_data[bottom_index_base + y0 * width + x1];\n        T U11 = offset_bottom_data[bottom_index_base + y1 * width + x1];\n        T diff_x = (U11 * dist_y + U10 * (1 - dist_y) - U01 * dist_y - U00 * (1 - dist_y)) * trans_std * diff_val;\n        diff_x *= roi_width;\n        T diff_y = (U11 * dist_x + U01 * (1 - dist_x) - U10 * dist_x - U00 * (1 - dist_x)) * trans_std * diff_val;\n        diff_y *= roi_height;\n\n        atomicAdd(bottom_trans_diff + (((n * num_classes + class_id) * 2) * part_size + part_h) * part_size + part_w, diff_x);\n        atomicAdd(bottom_trans_diff + (((n * num_classes + class_id) * 2 + 1) * part_size + part_h) * part_size + part_w, diff_y);\n      }\n    }\n  }\n}\n\nstd::tuple<at::Tensor, at::Tensor>\ndcn_v2_psroi_pooling_cuda_forward(const at::Tensor &input,\n                                  const at::Tensor &bbox,\n                                  const at::Tensor &trans,\n                                  const int no_trans,\n                                  const float spatial_scale,\n                                  const int output_dim,\n                                  const int group_size,\n                                  const int pooled_size,\n                                  const int part_size,\n                                  const int sample_per_part,\n                                  const float trans_std)\n{\n  AT_ASSERTM(input.type().is_cuda(), \"input must be a CUDA tensor\");\n  AT_ASSERTM(bbox.type().is_cuda(), \"rois must be a CUDA tensor\");\n  AT_ASSERTM(trans.type().is_cuda(), \"trans must be a CUDA tensor\");\n\n  const int batch = input.size(0);\n  const int channels = input.size(1);\n  const int height = input.size(2);\n  const int width = input.size(3);\n  const int channels_trans = no_trans ? 2 : trans.size(1);\n  const int num_bbox = bbox.size(0);\n\n  AT_ASSERTM(channels == output_dim, \"input channels and output channels must equal\");\n  auto pooled_height = pooled_size;\n  auto pooled_width = pooled_size;\n\n  auto out = at::empty({num_bbox, output_dim, pooled_height, pooled_width}, input.options());\n  long out_size = num_bbox * output_dim * pooled_height * pooled_width;\n  auto top_count = at::zeros({num_bbox, output_dim, pooled_height, pooled_width}, input.options());\n\n  const int num_classes = no_trans ? 1 : channels_trans / 2;\n  const int channels_each_class = no_trans ? output_dim : output_dim / num_classes;\n\n  cudaStream_t stream = at::cuda::getCurrentCUDAStream();\n\n  if (out.numel() == 0)\n  {\n    THCudaCheck(cudaGetLastError());\n    return std::make_tuple(out, top_count);\n  }\n\n  dim3 grid(std::min(THCCeilDiv(out_size, 512L), 4096L));\n  dim3 block(512);\n\n  AT_DISPATCH_FLOATING_TYPES(input.type(), \"dcn_v2_psroi_pooling_cuda_forward\", [&] {\n    DeformablePSROIPoolForwardKernelCuda<scalar_t><<<grid, block, 0, stream>>>(\n        out_size,\n        input.contiguous().data<scalar_t>(),\n        spatial_scale,\n        channels,\n        height, width,\n        pooled_height,\n        pooled_width,\n        bbox.contiguous().data<scalar_t>(),\n        trans.contiguous().data<scalar_t>(),\n        no_trans,\n        trans_std,\n        sample_per_part,\n        output_dim,\n        group_size,\n        part_size,\n        num_classes,\n        channels_each_class,\n        out.data<scalar_t>(),\n        top_count.data<scalar_t>());\n  });\n  THCudaCheck(cudaGetLastError());\n  return std::make_tuple(out, top_count);\n}\n\nstd::tuple<at::Tensor, at::Tensor>\ndcn_v2_psroi_pooling_cuda_backward(const at::Tensor &out_grad,\n                                   const at::Tensor &input,\n                                   const at::Tensor &bbox,\n                                   const at::Tensor &trans,\n                                   const at::Tensor &top_count,\n                                   const int no_trans,\n                                   const float spatial_scale,\n                                   const int output_dim,\n                                   const int group_size,\n                                   const int pooled_size,\n                                   const int part_size,\n                                   const int sample_per_part,\n                                   const float trans_std)\n{\n  AT_ASSERTM(out_grad.type().is_cuda(), \"out_grad must be a CUDA tensor\");\n  AT_ASSERTM(input.type().is_cuda(), \"input must be a CUDA tensor\");\n  AT_ASSERTM(bbox.type().is_cuda(), \"bbox must be a CUDA tensor\");\n  AT_ASSERTM(trans.type().is_cuda(), \"trans must be a CUDA tensor\");\n  AT_ASSERTM(top_count.type().is_cuda(), \"top_count must be a CUDA tensor\");\n\n  const int batch = input.size(0);\n  const int channels = input.size(1);\n  const int height = input.size(2);\n  const int width = input.size(3);\n  const int channels_trans = no_trans ? 2 : trans.size(1);\n  const int num_bbox = bbox.size(0);\n\n  AT_ASSERTM(channels == output_dim, \"input channels and output channels must equal\");\n  auto pooled_height = pooled_size;\n  auto pooled_width = pooled_size;\n  long out_size = num_bbox * output_dim * pooled_height * pooled_width;\n  const int num_classes = no_trans ? 1 : channels_trans / 2;\n  const int channels_each_class = no_trans ? output_dim : output_dim / num_classes;\n\n  auto input_grad = at::zeros({batch, channels, height, width}, out_grad.options());\n  auto trans_grad = at::zeros_like(trans);\n\n  if (input_grad.numel() == 0)\n  {\n    THCudaCheck(cudaGetLastError());\n    return std::make_tuple(input_grad, trans_grad);\n  }\n\n  dim3 grid(std::min(THCCeilDiv(out_size, 512L), 4096L));\n  dim3 block(512);\n  cudaStream_t stream = at::cuda::getCurrentCUDAStream();\n\n  AT_DISPATCH_FLOATING_TYPES(out_grad.type(), \"dcn_v2_psroi_pooling_cuda_backward\", [&] {\n    DeformablePSROIPoolBackwardAccKernelCuda<scalar_t><<<grid, block, 0, stream>>>(\n        out_size,\n        out_grad.contiguous().data<scalar_t>(),\n        top_count.contiguous().data<scalar_t>(),\n        num_bbox,\n        spatial_scale,\n        channels,\n        height,\n        width,\n        pooled_height,\n        pooled_width,\n        output_dim,\n        input_grad.contiguous().data<scalar_t>(),\n        trans_grad.contiguous().data<scalar_t>(),\n        input.contiguous().data<scalar_t>(),\n        bbox.contiguous().data<scalar_t>(),\n        trans.contiguous().data<scalar_t>(),\n        no_trans,\n        trans_std,\n        sample_per_part,\n        group_size,\n        part_size,\n        num_classes,\n        channels_each_class);\n  });\n  THCudaCheck(cudaGetLastError());\n  return std::make_tuple(input_grad, trans_grad);\n}"
  },
  {
    "path": "Network/rigidmask/networks/DCNv2/DCN/src/cuda/vision.h",
    "content": "#pragma once\n#include <torch/extension.h>\n\nat::Tensor\ndcn_v2_cuda_forward(const at::Tensor &input,\n                    const at::Tensor &weight,\n                    const at::Tensor &bias,\n                    const at::Tensor &offset,\n                    const at::Tensor &mask,\n                    const int kernel_h,\n                    const int kernel_w,\n                    const int stride_h,\n                    const int stride_w,\n                    const int pad_h,\n                    const int pad_w,\n                    const int dilation_h,\n                    const int dilation_w,\n                    const int deformable_group);\n\nstd::vector<at::Tensor>\ndcn_v2_cuda_backward(const at::Tensor &input,\n                     const at::Tensor &weight,\n                     const at::Tensor &bias,\n                     const at::Tensor &offset,\n                     const at::Tensor &mask,\n                     const at::Tensor &grad_output,\n                     int kernel_h, int kernel_w,\n                     int stride_h, int stride_w,\n                     int pad_h, int pad_w,\n                     int dilation_h, int dilation_w,\n                     int deformable_group);\n\n\nstd::tuple<at::Tensor, at::Tensor>\ndcn_v2_psroi_pooling_cuda_forward(const at::Tensor &input,\n                                  const at::Tensor &bbox,\n                                  const at::Tensor &trans,\n                                  const int no_trans,\n                                  const float spatial_scale,\n                                  const int output_dim,\n                                  const int group_size,\n                                  const int pooled_size,\n                                  const int part_size,\n                                  const int sample_per_part,\n                                  const float trans_std);\n\nstd::tuple<at::Tensor, at::Tensor>\ndcn_v2_psroi_pooling_cuda_backward(const at::Tensor &out_grad,\n                                   const at::Tensor &input,\n                                   const at::Tensor &bbox,\n                                   const at::Tensor &trans,\n                                   const at::Tensor &top_count,\n                                   const int no_trans,\n                                   const float spatial_scale,\n                                   const int output_dim,\n                                   const int group_size,\n                                   const int pooled_size,\n                                   const int part_size,\n                                   const int sample_per_part,\n                                   const float trans_std);"
  },
  {
    "path": "Network/rigidmask/networks/DCNv2/DCN/src/dcn_v2.h",
    "content": "#pragma once\n\n#include \"cpu/vision.h\"\n\n#ifdef WITH_CUDA\n#include \"cuda/vision.h\"\n#endif\n\nat::Tensor\ndcn_v2_forward(const at::Tensor &input,\n               const at::Tensor &weight,\n               const at::Tensor &bias,\n               const at::Tensor &offset,\n               const at::Tensor &mask,\n               const int kernel_h,\n               const int kernel_w,\n               const int stride_h,\n               const int stride_w,\n               const int pad_h,\n               const int pad_w,\n               const int dilation_h,\n               const int dilation_w,\n               const int deformable_group)\n{\n    if (input.type().is_cuda())\n    {\n#ifdef WITH_CUDA\n        return dcn_v2_cuda_forward(input, weight, bias, offset, mask,\n                                   kernel_h, kernel_w,\n                                   stride_h, stride_w,\n                                   pad_h, pad_w,\n                                   dilation_h, dilation_w,\n                                   deformable_group);\n#else\n        AT_ERROR(\"Not compiled with GPU support\");\n#endif\n    }\n    else{\n        return dcn_v2_cpu_forward(input, weight, bias, offset, mask,\n                                   kernel_h, kernel_w,\n                                   stride_h, stride_w,\n                                   pad_h, pad_w,\n                                   dilation_h, dilation_w,\n                                   deformable_group);\n    }\n}\n\nstd::vector<at::Tensor>\ndcn_v2_backward(const at::Tensor &input,\n                const at::Tensor &weight,\n                const at::Tensor &bias,\n                const at::Tensor &offset,\n                const at::Tensor &mask,\n                const at::Tensor &grad_output,\n                int kernel_h, int kernel_w,\n                int stride_h, int stride_w,\n                int pad_h, int pad_w,\n                int dilation_h, int dilation_w,\n                int deformable_group)\n{\n    if (input.type().is_cuda())\n    {\n#ifdef WITH_CUDA\n        return dcn_v2_cuda_backward(input,\n                                    weight,\n                                    bias,\n                                    offset,\n                                    mask,\n                                    grad_output,\n                                    kernel_h, kernel_w,\n                                    stride_h, stride_w,\n                                    pad_h, pad_w,\n                                    dilation_h, dilation_w,\n                                    deformable_group);\n#else\n        AT_ERROR(\"Not compiled with GPU support\");\n#endif\n    }\n    else{\n        return dcn_v2_cpu_backward(input,\n                                    weight,\n                                    bias,\n                                    offset,\n                                    mask,\n                                    grad_output,\n                                    kernel_h, kernel_w,\n                                    stride_h, stride_w,\n                                    pad_h, pad_w,\n                                    dilation_h, dilation_w,\n                                    deformable_group);\n    }\n}\n\nstd::tuple<at::Tensor, at::Tensor>\ndcn_v2_psroi_pooling_forward(const at::Tensor &input,\n                             const at::Tensor &bbox,\n                             const at::Tensor &trans,\n                             const int no_trans,\n                             const float spatial_scale,\n                             const int output_dim,\n                             const int group_size,\n                             const int pooled_size,\n                             const int part_size,\n                             const int sample_per_part,\n                             const float trans_std)\n{\n    if (input.type().is_cuda())\n    {\n#ifdef WITH_CUDA\n        return dcn_v2_psroi_pooling_cuda_forward(input,\n                                                 bbox,\n                                                 trans,\n                                                 no_trans,\n                                                 spatial_scale,\n                                                 output_dim,\n                                                 group_size,\n                                                 pooled_size,\n                                                 part_size,\n                                                 sample_per_part,\n                                                 trans_std);\n#else\n        AT_ERROR(\"Not compiled with GPU support\");\n#endif\n    }\n    else{\n        return dcn_v2_psroi_pooling_cpu_forward(input,\n                                                 bbox,\n                                                 trans,\n                                                 no_trans,\n                                                 spatial_scale,\n                                                 output_dim,\n                                                 group_size,\n                                                 pooled_size,\n                                                 part_size,\n                                                 sample_per_part,\n                                                 trans_std);\n    }\n}\n\nstd::tuple<at::Tensor, at::Tensor>\ndcn_v2_psroi_pooling_backward(const at::Tensor &out_grad,\n                              const at::Tensor &input,\n                              const at::Tensor &bbox,\n                              const at::Tensor &trans,\n                              const at::Tensor &top_count,\n                              const int no_trans,\n                              const float spatial_scale,\n                              const int output_dim,\n                              const int group_size,\n                              const int pooled_size,\n                              const int part_size,\n                              const int sample_per_part,\n                              const float trans_std)\n{\n    if (input.type().is_cuda())\n    {\n#ifdef WITH_CUDA\n        return dcn_v2_psroi_pooling_cuda_backward(out_grad,\n                                                  input,\n                                                  bbox,\n                                                  trans,\n                                                  top_count,\n                                                  no_trans,\n                                                  spatial_scale,\n                                                  output_dim,\n                                                  group_size,\n                                                  pooled_size,\n                                                  part_size,\n                                                  sample_per_part,\n                                                  trans_std);\n#else\n        AT_ERROR(\"Not compiled with GPU support\");\n#endif\n    }\n    else{\n        return dcn_v2_psroi_pooling_cpu_backward(out_grad,\n                                                  input,\n                                                  bbox,\n                                                  trans,\n                                                  top_count,\n                                                  no_trans,\n                                                  spatial_scale,\n                                                  output_dim,\n                                                  group_size,\n                                                  pooled_size,\n                                                  part_size,\n                                                  sample_per_part,\n                                                  trans_std);\n    }\n}"
  },
  {
    "path": "Network/rigidmask/networks/DCNv2/DCN/src/vision.cpp",
    "content": "\n#include \"dcn_v2.h\"\n\nPYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {\n  m.def(\"dcn_v2_forward\", &dcn_v2_forward, \"dcn_v2_forward\");\n  m.def(\"dcn_v2_backward\", &dcn_v2_backward, \"dcn_v2_backward\");\n  m.def(\"dcn_v2_psroi_pooling_forward\", &dcn_v2_psroi_pooling_forward, \"dcn_v2_psroi_pooling_forward\");\n  m.def(\"dcn_v2_psroi_pooling_backward\", &dcn_v2_psroi_pooling_backward, \"dcn_v2_psroi_pooling_backward\");\n}\n"
  },
  {
    "path": "Network/rigidmask/networks/DCNv2/DCN/testcpu.py",
    "content": "#!/usr/bin/env python\nfrom __future__ import absolute_import\nfrom __future__ import print_function\nfrom __future__ import division\n\nimport time\nimport torch\nimport torch.nn as nn\nfrom torch.autograd import gradcheck\n\nfrom dcn_v2 import dcn_v2_conv, DCNv2, DCN\nfrom dcn_v2 import dcn_v2_pooling, DCNv2Pooling, DCNPooling\n\ndeformable_groups = 1\nN, inC, inH, inW = 2, 2, 4, 4\noutC = 2\nkH, kW = 3, 3\n\n\ndef conv_identify(weight, bias):\n    weight.data.zero_()\n    bias.data.zero_()\n    o, i, h, w = weight.shape\n    y = h//2\n    x = w//2\n    for p in range(i):\n        for q in range(o):\n            if p == q:\n                weight.data[q, p, y, x] = 1.0\n\n\ndef check_zero_offset():\n    conv_offset = nn.Conv2d(inC, deformable_groups * 2 * kH * kW,\n                            kernel_size=(kH, kW),\n                            stride=(1, 1),\n                            padding=(1, 1),\n                            bias=True)\n\n    conv_mask = nn.Conv2d(inC, deformable_groups * 1 * kH * kW,\n                          kernel_size=(kH, kW),\n                          stride=(1, 1),\n                          padding=(1, 1),\n                          bias=True)\n\n    dcn_v2 = DCNv2(inC, outC, (kH, kW),\n                   stride=1, padding=1, dilation=1,\n                   deformable_groups=deformable_groups)\n\n    conv_offset.weight.data.zero_()\n    conv_offset.bias.data.zero_()\n    conv_mask.weight.data.zero_()\n    conv_mask.bias.data.zero_()\n    conv_identify(dcn_v2.weight, dcn_v2.bias)\n\n    input = torch.randn(N, inC, inH, inW)\n    offset = conv_offset(input)\n    mask = conv_mask(input)\n    mask = torch.sigmoid(mask)\n    output = dcn_v2(input, offset, mask)\n    output *= 2\n    d = (input - output).abs().max()\n    if d < 1e-10:\n        print('Zero offset passed')\n    else:\n        print('Zero offset failed')\n        print(input)\n        print(output)\n\ndef check_gradient_dconv():\n\n    input = torch.rand(N, inC, inH, inW) * 0.01\n    input.requires_grad = True\n\n    offset = torch.randn(N, deformable_groups * 2 * kW * kH, inH, inW) * 2\n    # offset.data.zero_()\n    # offset.data -= 0.5\n    offset.requires_grad = True\n\n    mask = torch.rand(N, deformable_groups * 1 * kW * kH, inH, inW)\n    # mask.data.zero_()\n    mask.requires_grad = True\n    mask = torch.sigmoid(mask)\n\n    weight = torch.randn(outC, inC, kH, kW)\n    weight.requires_grad = True\n\n    bias = torch.rand(outC)\n    bias.requires_grad = True\n\n    stride = 1\n    padding = 1\n    dilation = 1\n\n    print('check_gradient_dconv: ',\n          gradcheck(dcn_v2_conv, (input, offset, mask, weight, bias,\n                    stride, padding, dilation, deformable_groups),\n                    eps=1e-3, atol=1e-4, rtol=1e-2))\n\n\ndef check_pooling_zero_offset():\n\n    input = torch.randn(2, 16, 64, 64).zero_()\n    input[0, :, 16:26, 16:26] = 1.\n    input[1, :, 10:20, 20:30] = 2.\n    rois = torch.tensor([\n        [0, 65, 65, 103, 103],\n        [1, 81, 41, 119, 79],\n    ]).float()\n    pooling = DCNv2Pooling(spatial_scale=1.0 / 4,\n                           pooled_size=7,\n                           output_dim=16,\n                           no_trans=True,\n                           group_size=1,\n                           trans_std=0.0)\n\n    out = pooling(input, rois, input.new())\n    s = ', '.join(['%f' % out[i, :, :, :].mean().item()\n                   for i in range(rois.shape[0])])\n    print(s)\n\n    dpooling = DCNv2Pooling(spatial_scale=1.0 / 4,\n                            pooled_size=7,\n                            output_dim=16,\n                            no_trans=False,\n                            group_size=1,\n                            trans_std=0.0)\n    offset = torch.randn(20, 2, 7, 7).zero_()\n    dout = dpooling(input, rois, offset)\n    s = ', '.join(['%f' % dout[i, :, :, :].mean().item()\n                   for i in range(rois.shape[0])])\n    print(s)\n\n\ndef check_gradient_dpooling():\n    input = torch.randn(2, 3, 5, 5) * 0.01\n    N = 4\n    batch_inds = torch.randint(2, (N, 1)).float()\n    x = torch.rand((N, 1)).float() * 15\n    y = torch.rand((N, 1)).float() * 15\n    w = torch.rand((N, 1)).float() * 10\n    h = torch.rand((N, 1)).float() * 10\n    rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)\n    offset = torch.randn(N, 2, 3, 3)\n    input.requires_grad = True\n    offset.requires_grad = True\n\n    spatial_scale = 1.0 / 4\n    pooled_size = 3\n    output_dim = 3\n    no_trans = 0\n    group_size = 1\n    trans_std = 0.0\n    sample_per_part = 4\n    part_size = pooled_size\n\n    print('check_gradient_dpooling:',\n          gradcheck(dcn_v2_pooling, (input, rois, offset,\n                                     spatial_scale,\n                                     pooled_size,\n                                     output_dim,\n                                     no_trans,\n                                     group_size,\n                                     part_size,\n                                     sample_per_part,\n                                     trans_std),\n                    eps=1e-4))\n\n\ndef example_dconv():\n    input = torch.randn(2, 64, 128, 128)\n    # wrap all things (offset and mask) in DCN\n    dcn = DCN(64, 64, kernel_size=(3, 3), stride=1,\n              padding=1, deformable_groups=2)\n    # print(dcn.weight.shape, input.shape)\n    output = dcn(input)\n    targert = output.new(*output.size())\n    targert.data.uniform_(-0.01, 0.01)\n    error = (targert - output).mean()\n    error.backward()\n    print(output.shape)\n\n\ndef example_dpooling():\n    input = torch.randn(2, 32, 64, 64)\n    batch_inds = torch.randint(2, (20, 1)).float()\n    x = torch.randint(256, (20, 1)).float()\n    y = torch.randint(256, (20, 1)).float()\n    w = torch.randint(64, (20, 1)).float()\n    h = torch.randint(64, (20, 1)).float()\n    rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)\n    offset = torch.randn(20, 2, 7, 7)\n    input.requires_grad = True\n    offset.requires_grad = True\n\n    # normal roi_align\n    pooling = DCNv2Pooling(spatial_scale=1.0 / 4,\n                           pooled_size=7,\n                           output_dim=32,\n                           no_trans=True,\n                           group_size=1,\n                           trans_std=0.1)\n\n    # deformable pooling\n    dpooling = DCNv2Pooling(spatial_scale=1.0 / 4,\n                            pooled_size=7,\n                            output_dim=32,\n                            no_trans=False,\n                            group_size=1,\n                            trans_std=0.1)\n\n    out = pooling(input, rois, offset)\n    dout = dpooling(input, rois, offset)\n    print(out.shape)\n    print(dout.shape)\n\n    target_out = out.new(*out.size())\n    target_out.data.uniform_(-0.01, 0.01)\n    target_dout = dout.new(*dout.size())\n    target_dout.data.uniform_(-0.01, 0.01)\n    e = (target_out - out).mean()\n    e.backward()\n    e = (target_dout - dout).mean()\n    e.backward()\n\n\ndef example_mdpooling():\n    input = torch.randn(2, 32, 64, 64)\n    input.requires_grad = True\n    batch_inds = torch.randint(2, (20, 1)).float()\n    x = torch.randint(256, (20, 1)).float()\n    y = torch.randint(256, (20, 1)).float()\n    w = torch.randint(64, (20, 1)).float()\n    h = torch.randint(64, (20, 1)).float()\n    rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)\n\n    # mdformable pooling (V2)\n    dpooling = DCNPooling(spatial_scale=1.0 / 4,\n                          pooled_size=7,\n                          output_dim=32,\n                          no_trans=False,\n                          group_size=1,\n                          trans_std=0.1,\n                          deform_fc_dim=1024)\n\n    dout = dpooling(input, rois)\n    target = dout.new(*dout.size())\n    target.data.uniform_(-0.1, 0.1)\n    error = (target - dout).mean()\n    error.backward()\n    print(dout.shape)\n\n\nif __name__ == '__main__':\n\n    example_dconv()\n    example_dpooling()\n    example_mdpooling()\n\n    check_pooling_zero_offset()\n    # zero offset check\n    if inC == outC:\n        check_zero_offset()\n\n    check_gradient_dpooling()\n    check_gradient_dconv()\n    # \"\"\"\n    # ****** Note: backward is not reentrant error may not be a serious problem,\n    # ****** since the max error is less than 1e-7,\n    # ****** Still looking for what trigger this problem\n    # \"\"\"\n"
  },
  {
    "path": "Network/rigidmask/networks/DCNv2/DCN/testcuda.py",
    "content": "#!/usr/bin/env python\nfrom __future__ import absolute_import\nfrom __future__ import print_function\nfrom __future__ import division\n\nimport time\nimport torch\nimport torch.nn as nn\nfrom torch.autograd import gradcheck\n\nfrom dcn_v2 import dcn_v2_conv, DCNv2, DCN\nfrom dcn_v2 import dcn_v2_pooling, DCNv2Pooling, DCNPooling\n\ndeformable_groups = 1\nN, inC, inH, inW = 2, 2, 4, 4\noutC = 2\nkH, kW = 3, 3\n\n\ndef conv_identify(weight, bias):\n    weight.data.zero_()\n    bias.data.zero_()\n    o, i, h, w = weight.shape\n    y = h//2\n    x = w//2\n    for p in range(i):\n        for q in range(o):\n            if p == q:\n                weight.data[q, p, y, x] = 1.0\n\n\ndef check_zero_offset():\n    conv_offset = nn.Conv2d(inC, deformable_groups * 2 * kH * kW,\n                            kernel_size=(kH, kW),\n                            stride=(1, 1),\n                            padding=(1, 1),\n                            bias=True).cuda()\n\n    conv_mask = nn.Conv2d(inC, deformable_groups * 1 * kH * kW,\n                          kernel_size=(kH, kW),\n                          stride=(1, 1),\n                          padding=(1, 1),\n                          bias=True).cuda()\n\n    dcn_v2 = DCNv2(inC, outC, (kH, kW),\n                   stride=1, padding=1, dilation=1,\n                   deformable_groups=deformable_groups).cuda()\n\n    conv_offset.weight.data.zero_()\n    conv_offset.bias.data.zero_()\n    conv_mask.weight.data.zero_()\n    conv_mask.bias.data.zero_()\n    conv_identify(dcn_v2.weight, dcn_v2.bias)\n\n    input = torch.randn(N, inC, inH, inW).cuda()\n    offset = conv_offset(input)\n    mask = conv_mask(input)\n    mask = torch.sigmoid(mask)\n    output = dcn_v2(input, offset, mask)\n    output *= 2\n    d = (input - output).abs().max()\n    if d < 1e-10:\n        print('Zero offset passed')\n    else:\n        print('Zero offset failed')\n        print(input)\n        print(output)\n\ndef check_gradient_dconv():\n\n    input = torch.rand(N, inC, inH, inW).cuda() * 0.01\n    input.requires_grad = True\n\n    offset = torch.randn(N, deformable_groups * 2 * kW * kH, inH, inW).cuda() * 2\n    # offset.data.zero_()\n    # offset.data -= 0.5\n    offset.requires_grad = True\n\n    mask = torch.rand(N, deformable_groups * 1 * kW * kH, inH, inW).cuda()\n    # mask.data.zero_()\n    mask.requires_grad = True\n    mask = torch.sigmoid(mask)\n\n    weight = torch.randn(outC, inC, kH, kW).cuda()\n    weight.requires_grad = True\n\n    bias = torch.rand(outC).cuda()\n    bias.requires_grad = True\n\n    stride = 1\n    padding = 1\n    dilation = 1\n\n    print('check_gradient_dconv: ',\n          gradcheck(dcn_v2_conv, (input, offset, mask, weight, bias,\n                    stride, padding, dilation, deformable_groups),\n                    eps=1e-3, atol=1e-4, rtol=1e-2))\n\n\ndef check_pooling_zero_offset():\n\n    input = torch.randn(2, 16, 64, 64).cuda().zero_()\n    input[0, :, 16:26, 16:26] = 1.\n    input[1, :, 10:20, 20:30] = 2.\n    rois = torch.tensor([\n        [0, 65, 65, 103, 103],\n        [1, 81, 41, 119, 79],\n    ]).cuda().float()\n    pooling = DCNv2Pooling(spatial_scale=1.0 / 4,\n                           pooled_size=7,\n                           output_dim=16,\n                           no_trans=True,\n                           group_size=1,\n                           trans_std=0.0).cuda()\n\n    out = pooling(input, rois, input.new())\n    s = ', '.join(['%f' % out[i, :, :, :].mean().item()\n                   for i in range(rois.shape[0])])\n    print(s)\n\n    dpooling = DCNv2Pooling(spatial_scale=1.0 / 4,\n                            pooled_size=7,\n                            output_dim=16,\n                            no_trans=False,\n                            group_size=1,\n                            trans_std=0.0).cuda()\n    offset = torch.randn(20, 2, 7, 7).cuda().zero_()\n    dout = dpooling(input, rois, offset)\n    s = ', '.join(['%f' % dout[i, :, :, :].mean().item()\n                   for i in range(rois.shape[0])])\n    print(s)\n\n\ndef check_gradient_dpooling():\n    input = torch.randn(2, 3, 5, 5).cuda().float() * 0.01\n    N = 4\n    batch_inds = torch.randint(2, (N, 1)).cuda().float()\n    x = torch.rand((N, 1)).cuda().float() * 15\n    y = torch.rand((N, 1)).cuda().float() * 15\n    w = torch.rand((N, 1)).cuda().float() * 10\n    h = torch.rand((N, 1)).cuda().float() * 10\n    rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)\n    offset = torch.randn(N, 2, 3, 3).cuda()\n    input.requires_grad = True\n    offset.requires_grad = True\n\n    spatial_scale = 1.0 / 4\n    pooled_size = 3\n    output_dim = 3\n    no_trans = 0\n    group_size = 1\n    trans_std = 0.0\n    sample_per_part = 4\n    part_size = pooled_size\n\n    print('check_gradient_dpooling:',\n          gradcheck(dcn_v2_pooling, (input, rois, offset,\n                                     spatial_scale,\n                                     pooled_size,\n                                     output_dim,\n                                     no_trans,\n                                     group_size,\n                                     part_size,\n                                     sample_per_part,\n                                     trans_std),\n                    eps=1e-4))\n\n\ndef example_dconv():\n    input = torch.randn(2, 64, 128, 128).cuda()\n    # wrap all things (offset and mask) in DCN\n    dcn = DCN(64, 64, kernel_size=(3, 3), stride=1,\n              padding=1, deformable_groups=2).cuda()\n    # print(dcn.weight.shape, input.shape)\n    output = dcn(input)\n    targert = output.new(*output.size())\n    targert.data.uniform_(-0.01, 0.01)\n    error = (targert - output).mean()\n    error.backward()\n    print(output.shape)\n\n\ndef example_dpooling():\n    input = torch.randn(2, 32, 64, 64).cuda()\n    batch_inds = torch.randint(2, (20, 1)).cuda().float()\n    x = torch.randint(256, (20, 1)).cuda().float()\n    y = torch.randint(256, (20, 1)).cuda().float()\n    w = torch.randint(64, (20, 1)).cuda().float()\n    h = torch.randint(64, (20, 1)).cuda().float()\n    rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)\n    offset = torch.randn(20, 2, 7, 7).cuda()\n    input.requires_grad = True\n    offset.requires_grad = True\n\n    # normal roi_align\n    pooling = DCNv2Pooling(spatial_scale=1.0 / 4,\n                           pooled_size=7,\n                           output_dim=32,\n                           no_trans=True,\n                           group_size=1,\n                           trans_std=0.1).cuda()\n\n    # deformable pooling\n    dpooling = DCNv2Pooling(spatial_scale=1.0 / 4,\n                            pooled_size=7,\n                            output_dim=32,\n                            no_trans=False,\n                            group_size=1,\n                            trans_std=0.1).cuda()\n\n    out = pooling(input, rois, offset)\n    dout = dpooling(input, rois, offset)\n    print(out.shape)\n    print(dout.shape)\n\n    target_out = out.new(*out.size())\n    target_out.data.uniform_(-0.01, 0.01)\n    target_dout = dout.new(*dout.size())\n    target_dout.data.uniform_(-0.01, 0.01)\n    e = (target_out - out).mean()\n    e.backward()\n    e = (target_dout - dout).mean()\n    e.backward()\n\n\ndef example_mdpooling():\n    input = torch.randn(2, 32, 64, 64).cuda()\n    input.requires_grad = True\n    batch_inds = torch.randint(2, (20, 1)).cuda().float()\n    x = torch.randint(256, (20, 1)).cuda().float()\n    y = torch.randint(256, (20, 1)).cuda().float()\n    w = torch.randint(64, (20, 1)).cuda().float()\n    h = torch.randint(64, (20, 1)).cuda().float()\n    rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)\n\n    # mdformable pooling (V2)\n    dpooling = DCNPooling(spatial_scale=1.0 / 4,\n                          pooled_size=7,\n                          output_dim=32,\n                          no_trans=False,\n                          group_size=1,\n                          trans_std=0.1,\n                          deform_fc_dim=1024).cuda()\n\n    dout = dpooling(input, rois)\n    target = dout.new(*dout.size())\n    target.data.uniform_(-0.1, 0.1)\n    error = (target - dout).mean()\n    error.backward()\n    print(dout.shape)\n\n\nif __name__ == '__main__':\n\n    example_dconv()\n    example_dpooling()\n    example_mdpooling()\n\n    check_pooling_zero_offset()\n    # zero offset check\n    if inC == outC:\n        check_zero_offset()\n\n    check_gradient_dpooling()\n    check_gradient_dconv()\n    # \"\"\"\n    # ****** Note: backward is not reentrant error may not be a serious problem,\n    # ****** since the max error is less than 1e-7,\n    # ****** Still looking for what trigger this problem\n    # \"\"\"\n"
  },
  {
    "path": "Network/rigidmask/networks/DCNv2/LICENSE",
    "content": "BSD 3-Clause License\n\nCopyright (c) 2019, Charles Shang\nAll rights reserved.\n\nRedistribution and use in source and binary forms, with or without\nmodification, are permitted provided that the following conditions are met:\n\n1. Redistributions of source code must retain the above copyright notice, this\n   list of conditions and the following disclaimer.\n\n2. Redistributions in binary form must reproduce the above copyright notice,\n   this list of conditions and the following disclaimer in the documentation\n   and/or other materials provided with the distribution.\n\n3. Neither the name of the copyright holder nor the names of its\n   contributors may be used to endorse or promote products derived from\n   this software without specific prior written permission.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\nAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\nIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\nDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE\nFOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\nDAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\nCAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\nOR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\nOF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE."
  },
  {
    "path": "Network/rigidmask/networks/DCNv2/README.md",
    "content": "## Deformable Convolutional Networks V2 with Pytorch 1.X\n\n### Build\n```bash\n    ./make.sh         # build\n    python testcpu.py    # run examples and gradient check on cpu\n    python testcuda.py   # run examples and gradient check on gpu \n```\n### Note\nNow the master branch is for pytorch 1.x, you can switch back to pytorch 0.4 with,\n```bash\ngit checkout pytorch_0.4\n```\n\n### Known Issues:\n\n- [x] Gradient check w.r.t offset (solved)\n- [ ] Backward is not reentrant (minor)\n\nThis is an adaption of the official [Deformable-ConvNets](https://github.com/msracver/Deformable-ConvNets/tree/master/DCNv2_op).\n\nUpdate: all gradient check passes with **double** precision. \n\nAnother issue is that it raises `RuntimeError: Backward is not reentrant`. However, the error is very small (`<1e-7` for \nfloat `<1e-15` for double), \nso it may not be a serious problem (?)\n\nPlease post an issue or PR if you have any comments.\n"
  },
  {
    "path": "Network/rigidmask/networks/DCNv2/make.sh",
    "content": "#!/usr/bin/env bash\npython setup.py build develop\n"
  },
  {
    "path": "Network/rigidmask/networks/DCNv2/setup.py",
    "content": "#!/usr/bin/env python\n\nimport os\nimport glob\n\nimport torch\n\nfrom torch.utils.cpp_extension import CUDA_HOME\nfrom torch.utils.cpp_extension import CppExtension\nfrom torch.utils.cpp_extension import CUDAExtension\n\nfrom setuptools import find_packages\nfrom setuptools import setup\n\nrequirements = [\"torch\", \"torchvision\"]\n\n\ndef get_extensions():\n    this_dir = os.path.dirname(os.path.abspath(__file__))\n    extensions_dir = os.path.join(this_dir, \"DCN\", \"src\")\n\n    main_file = glob.glob(os.path.join(extensions_dir, \"*.cpp\"))\n    source_cpu = glob.glob(os.path.join(extensions_dir, \"cpu\", \"*.cpp\"))\n    source_cuda = glob.glob(os.path.join(extensions_dir, \"cuda\", \"*.cu\"))\n    \n    #os.environ[\"CC\"] = \"g++\"\n    sources = main_file + source_cpu\n    extension = CppExtension\n    extra_compile_args = {'cxx': ['-std=c++14']}\n    define_macros = []\n\n    \n    #if torch.cuda.is_available() and CUDA_HOME is not None:\n    if torch.cuda.is_available():\n        extension = CUDAExtension\n        sources += source_cuda\n        define_macros += [(\"WITH_CUDA\", None)]\n        extra_compile_args[\"nvcc\"] = [\n            \"-DCUDA_HAS_FP16=1\",\n            \"-D__CUDA_NO_HALF_OPERATORS__\",\n            \"-D__CUDA_NO_HALF_CONVERSIONS__\",\n            \"-D__CUDA_NO_HALF2_OPERATORS__\",\n        ]\n    else:\n        #raise NotImplementedError('Cuda is not available')\n        pass\n    \n\n    sources = [os.path.join(extensions_dir, s) for s in sources]\n    include_dirs = [extensions_dir]\n    ext_modules = [\n        extension(\n            \"_ext\",\n            sources,\n            include_dirs=include_dirs,\n            define_macros=define_macros,\n            extra_compile_args=extra_compile_args,\n        )\n    ]\n    return ext_modules\n\nsetup(\n    name=\"DCNv2\",\n    version=\"0.1\",\n    author=\"charlesshang\",\n    url=\"https://github.com/charlesshang/DCNv2\",\n    description=\"deformable convolutional networks\",\n    packages=find_packages(exclude=(\"configs\", \"tests\",)),\n    # install_requires=requirements,\n    ext_modules=get_extensions(),\n    cmdclass={\"build_ext\": torch.utils.cpp_extension.BuildExtension},\n)\n"
  },
  {
    "path": "Network/rigidmask/networks/dlav0.py",
    "content": "#!/usr/bin/env python\n# -*- coding: utf-8 -*-\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport math\nfrom os.path import join\n\nimport torch\nfrom torch import nn\nimport torch.utils.model_zoo as model_zoo\n\nimport numpy as np\n\nBatchNorm = nn.BatchNorm2d\n\ndef get_model_url(data='imagenet', name='dla34', hash='ba72cf86'):\n    return join('http://dl.yf.io/dla/models', data, '{}-{}.pth'.format(name, hash))\n\n\ndef conv3x3(in_planes, out_planes, stride=1):\n    \"3x3 convolution with padding\"\n    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,\n                     padding=1, bias=False)\n\n\nclass BasicBlock(nn.Module):\n    def __init__(self, inplanes, planes, stride=1, dilation=1):\n        super(BasicBlock, self).__init__()\n        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3,\n                               stride=stride, padding=dilation,\n                               bias=False, dilation=dilation)\n        self.bn1 = BatchNorm(planes)\n        self.relu = nn.ReLU(inplace=True)\n        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,\n                               stride=1, padding=dilation,\n                               bias=False, dilation=dilation)\n        self.bn2 = BatchNorm(planes)\n        self.stride = stride\n\n    def forward(self, x, residual=None):\n        if residual is None:\n            residual = x\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n\n        out += residual\n        out = self.relu(out)\n\n        return out\n\n\nclass Bottleneck(nn.Module):\n    expansion = 2\n\n    def __init__(self, inplanes, planes, stride=1, dilation=1):\n        super(Bottleneck, self).__init__()\n        expansion = Bottleneck.expansion\n        bottle_planes = planes // expansion\n        self.conv1 = nn.Conv2d(inplanes, bottle_planes,\n                               kernel_size=1, bias=False)\n        self.bn1 = BatchNorm(bottle_planes)\n        self.conv2 = nn.Conv2d(bottle_planes, bottle_planes, kernel_size=3,\n                               stride=stride, padding=dilation,\n                               bias=False, dilation=dilation)\n        self.bn2 = BatchNorm(bottle_planes)\n        self.conv3 = nn.Conv2d(bottle_planes, planes,\n                               kernel_size=1, bias=False)\n        self.bn3 = BatchNorm(planes)\n        self.relu = nn.ReLU(inplace=True)\n        self.stride = stride\n\n    def forward(self, x, residual=None):\n        if residual is None:\n            residual = x\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n        out = self.relu(out)\n\n        out = self.conv3(out)\n        out = self.bn3(out)\n\n        out += residual\n        out = self.relu(out)\n\n        return out\n\n\nclass BottleneckX(nn.Module):\n    expansion = 2\n    cardinality = 32\n\n    def __init__(self, inplanes, planes, stride=1, dilation=1):\n        super(BottleneckX, self).__init__()\n        cardinality = BottleneckX.cardinality\n        # dim = int(math.floor(planes * (BottleneckV5.expansion / 64.0)))\n        # bottle_planes = dim * cardinality\n        bottle_planes = planes * cardinality // 32\n        self.conv1 = nn.Conv2d(inplanes, bottle_planes,\n                               kernel_size=1, bias=False)\n        self.bn1 = BatchNorm(bottle_planes)\n        self.conv2 = nn.Conv2d(bottle_planes, bottle_planes, kernel_size=3,\n                               stride=stride, padding=dilation, bias=False,\n                               dilation=dilation, groups=cardinality)\n        self.bn2 = BatchNorm(bottle_planes)\n        self.conv3 = nn.Conv2d(bottle_planes, planes,\n                               kernel_size=1, bias=False)\n        self.bn3 = BatchNorm(planes)\n        self.relu = nn.ReLU(inplace=True)\n        self.stride = stride\n\n    def forward(self, x, residual=None):\n        if residual is None:\n            residual = x\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n        out = self.relu(out)\n\n        out = self.conv3(out)\n        out = self.bn3(out)\n\n        out += residual\n        out = self.relu(out)\n\n        return out\n\n\nclass Root(nn.Module):\n    def __init__(self, in_channels, out_channels, kernel_size, residual):\n        super(Root, self).__init__()\n        self.conv = nn.Conv2d(\n            in_channels, out_channels, 1,\n            stride=1, bias=False, padding=(kernel_size - 1) // 2)\n        self.bn = BatchNorm(out_channels)\n        self.relu = nn.ReLU(inplace=True)\n        self.residual = residual\n\n    def forward(self, *x):\n        children = x\n        x = self.conv(torch.cat(x, 1))\n        x = self.bn(x)\n        if self.residual:\n            x += children[0]\n        x = self.relu(x)\n\n        return x\n\n\nclass Tree(nn.Module):\n    def __init__(self, levels, block, in_channels, out_channels, stride=1,\n                 level_root=False, root_dim=0, root_kernel_size=1,\n                 dilation=1, root_residual=False):\n        super(Tree, self).__init__()\n        if root_dim == 0:\n            root_dim = 2 * out_channels\n        if level_root:\n            root_dim += in_channels\n        if levels == 1:\n            self.tree1 = block(in_channels, out_channels, stride,\n                               dilation=dilation)\n            self.tree2 = block(out_channels, out_channels, 1,\n                               dilation=dilation)\n        else:\n            self.tree1 = Tree(levels - 1, block, in_channels, out_channels,\n                              stride, root_dim=0,\n                              root_kernel_size=root_kernel_size,\n                              dilation=dilation, root_residual=root_residual)\n            self.tree2 = Tree(levels - 1, block, out_channels, out_channels,\n                              root_dim=root_dim + out_channels,\n                              root_kernel_size=root_kernel_size,\n                              dilation=dilation, root_residual=root_residual)\n        if levels == 1:\n            self.root = Root(root_dim, out_channels, root_kernel_size,\n                             root_residual)\n        self.level_root = level_root\n        self.root_dim = root_dim\n        self.downsample = None\n        self.project = None\n        self.levels = levels\n        if stride > 1:\n            self.downsample = nn.MaxPool2d(stride, stride=stride)\n        if in_channels != out_channels:\n            self.project = nn.Sequential(\n                nn.Conv2d(in_channels, out_channels,\n                          kernel_size=1, stride=1, bias=False),\n                BatchNorm(out_channels)\n            )\n\n    def forward(self, x, residual=None, children=None):\n        children = [] if children is None else children\n        bottom = self.downsample(x) if self.downsample else x\n        residual = self.project(bottom) if self.project else bottom\n        if self.level_root:\n            children.append(bottom)\n        x1 = self.tree1(x, residual)\n        if self.levels == 1:\n            x2 = self.tree2(x1)\n            x = self.root(x2, x1, *children)\n        else:\n            children.append(x1)\n            x = self.tree2(x1, children=children)\n        return x\n\n\nclass DLA(nn.Module):\n    def __init__(self, levels, channels, num_classes=1000,\n                 block=BasicBlock, residual_root=False, return_levels=False,\n                 pool_size=7, linear_root=False):\n        super(DLA, self).__init__()\n        self.channels = channels\n        self.return_levels = return_levels\n        self.num_classes = num_classes\n        self.base_layer = nn.Sequential(\n            nn.Conv2d(3, channels[0], kernel_size=7, stride=1,\n                      padding=3, bias=False),\n            BatchNorm(channels[0]),\n            nn.ReLU(inplace=True))\n        self.level0 = self._make_conv_level(\n            channels[0], channels[0], levels[0])\n        self.level1 = self._make_conv_level(\n            channels[0], channels[1], levels[1], stride=2)\n        self.level2 = Tree(levels[2], block, channels[1], channels[2], 2,\n                           level_root=False,\n                           root_residual=residual_root)\n        self.level3 = Tree(levels[3], block, channels[2], channels[3], 2,\n                           level_root=True, root_residual=residual_root)\n        self.level4 = Tree(levels[4], block, channels[3], channels[4], 2,\n                           level_root=True, root_residual=residual_root)\n        self.level5 = Tree(levels[5], block, channels[4], channels[5], 2,\n                           level_root=True, root_residual=residual_root)\n\n        self.avgpool = nn.AvgPool2d(pool_size)\n        self.fc = nn.Conv2d(channels[-1], num_classes, kernel_size=1,\n                            stride=1, padding=0, bias=True)\n\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels\n                m.weight.data.normal_(0, math.sqrt(2. / n))\n            elif isinstance(m, BatchNorm):\n                m.weight.data.fill_(1)\n                m.bias.data.zero_()\n\n    def _make_level(self, block, inplanes, planes, blocks, stride=1):\n        downsample = None\n        if stride != 1 or inplanes != planes:\n            downsample = nn.Sequential(\n                nn.MaxPool2d(stride, stride=stride),\n                nn.Conv2d(inplanes, planes,\n                          kernel_size=1, stride=1, bias=False),\n                BatchNorm(planes),\n            )\n\n        layers = []\n        layers.append(block(inplanes, planes, stride, downsample=downsample))\n        for i in range(1, blocks):\n            layers.append(block(inplanes, planes))\n\n        return nn.Sequential(*layers)\n\n    def _make_conv_level(self, inplanes, planes, convs, stride=1, dilation=1):\n        modules = []\n        for i in range(convs):\n            modules.extend([\n                nn.Conv2d(inplanes, planes, kernel_size=3,\n                          stride=stride if i == 0 else 1,\n                          padding=dilation, bias=False, dilation=dilation),\n                BatchNorm(planes),\n                nn.ReLU(inplace=True)])\n            inplanes = planes\n        return nn.Sequential(*modules)\n\n    def forward(self, x):\n        y = []\n        x = self.base_layer(x)\n        for i in range(6):\n            x = getattr(self, 'level{}'.format(i))(x)\n            y.append(x)\n        if self.return_levels:\n            return y\n        else:\n            x = self.avgpool(x)\n            x = self.fc(x)\n            x = x.view(x.size(0), -1)\n\n            return x\n\n    def load_pretrained_model(self,  data='imagenet', name='dla34', hash='ba72cf86'):\n        fc = self.fc\n        if name.endswith('.pth'):\n            model_weights = torch.load(data + name)\n        else:\n            model_url = get_model_url(data, name, hash)\n            model_weights = model_zoo.load_url(model_url)\n        num_classes = len(model_weights[list(model_weights.keys())[-1]])\n        self.fc = nn.Conv2d(\n            self.channels[-1], num_classes,\n            kernel_size=1, stride=1, padding=0, bias=True)\n        self.load_state_dict(model_weights)\n        self.fc = fc\n\n\ndef dla34(pretrained, **kwargs):  # DLA-34\n    model = DLA([1, 1, 1, 2, 2, 1],\n                [16, 32, 64, 128, 256, 512],\n                block=BasicBlock, **kwargs)\n    if pretrained:\n        model.load_pretrained_model(data='imagenet', name='dla34', hash='ba72cf86')\n    return model\n\n\ndef dla46_c(pretrained=None, **kwargs):  # DLA-46-C\n    Bottleneck.expansion = 2\n    model = DLA([1, 1, 1, 2, 2, 1],\n                [16, 32, 64, 64, 128, 256],\n                block=Bottleneck, **kwargs)\n    if pretrained is not None:\n        model.load_pretrained_model(pretrained, 'dla46_c')\n    return model\n\n\ndef dla46x_c(pretrained=None, **kwargs):  # DLA-X-46-C\n    BottleneckX.expansion = 2\n    model = DLA([1, 1, 1, 2, 2, 1],\n                [16, 32, 64, 64, 128, 256],\n                block=BottleneckX, **kwargs)\n    if pretrained is not None:\n        model.load_pretrained_model(pretrained, 'dla46x_c')\n    return model\n\n\ndef dla60x_c(pretrained, **kwargs):  # DLA-X-60-C\n    BottleneckX.expansion = 2\n    model = DLA([1, 1, 1, 2, 3, 1],\n                [16, 32, 64, 64, 128, 256],\n                block=BottleneckX, **kwargs)\n    if pretrained:\n        model.load_pretrained_model(data='imagenet', name='dla60x_c', hash='b870c45c')\n    return model\n\n\ndef dla60(pretrained=None, **kwargs):  # DLA-60\n    Bottleneck.expansion = 2\n    model = DLA([1, 1, 1, 2, 3, 1],\n                [16, 32, 128, 256, 512, 1024],\n                block=Bottleneck, **kwargs)\n    if pretrained is not None:\n        model.load_pretrained_model(pretrained, 'dla60')\n    return model\n\n\ndef dla60x(pretrained=None, **kwargs):  # DLA-X-60\n    BottleneckX.expansion = 2\n    model = DLA([1, 1, 1, 2, 3, 1],\n                [16, 32, 128, 256, 512, 1024],\n                block=BottleneckX, **kwargs)\n    if pretrained is not None:\n        model.load_pretrained_model(pretrained, 'dla60x')\n    return model\n\n\ndef dla102(pretrained=None, **kwargs):  # DLA-102\n    Bottleneck.expansion = 2\n    model = DLA([1, 1, 1, 3, 4, 1], [16, 32, 128, 256, 512, 1024],\n                block=Bottleneck, residual_root=True, **kwargs)\n    if pretrained is not None:\n        model.load_pretrained_model(pretrained, 'dla102')\n    return model\n\n\ndef dla102x(pretrained=None, **kwargs):  # DLA-X-102\n    BottleneckX.expansion = 2\n    model = DLA([1, 1, 1, 3, 4, 1], [16, 32, 128, 256, 512, 1024],\n                block=BottleneckX, residual_root=True, **kwargs)\n    if pretrained is not None:\n        model.load_pretrained_model(pretrained, 'dla102x')\n    return model\n\n\ndef dla102x2(pretrained=None, **kwargs):  # DLA-X-102 64\n    BottleneckX.cardinality = 64\n    model = DLA([1, 1, 1, 3, 4, 1], [16, 32, 128, 256, 512, 1024],\n                block=BottleneckX, residual_root=True, **kwargs)\n    if pretrained is not None:\n        model.load_pretrained_model(pretrained, 'dla102x2')\n    return model\n\n\ndef dla169(pretrained=None, **kwargs):  # DLA-169\n    Bottleneck.expansion = 2\n    model = DLA([1, 1, 2, 3, 5, 1], [16, 32, 128, 256, 512, 1024],\n                block=Bottleneck, residual_root=True, **kwargs)\n    if pretrained is not None:\n        model.load_pretrained_model(pretrained, 'dla169')\n    return model\n\n\ndef set_bn(bn):\n    global BatchNorm\n    BatchNorm = bn\n    dla.BatchNorm = bn\n\n\nclass Identity(nn.Module):\n    def __init__(self):\n        super(Identity, self).__init__()\n\n    def forward(self, x):\n        return x\n\n\ndef fill_up_weights(up):\n    w = up.weight.data\n    f = math.ceil(w.size(2) / 2)\n    c = (2 * f - 1 - f % 2) / (2. * f)\n    for i in range(w.size(2)):\n        for j in range(w.size(3)):\n            w[0, 0, i, j] = \\\n                (1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c))\n    for c in range(1, w.size(0)):\n        w[c, 0, :, :] = w[0, 0, :, :]\n\n\nclass IDAUp(nn.Module):\n    def __init__(self, node_kernel, out_dim, channels, up_factors):\n        super(IDAUp, self).__init__()\n        self.channels = channels\n        self.out_dim = out_dim\n        for i, c in enumerate(channels):\n            if c == out_dim:\n                proj = Identity()\n            else:\n                proj = nn.Sequential(\n                    nn.Conv2d(c, out_dim,\n                              kernel_size=1, stride=1, bias=False),\n                    BatchNorm(out_dim),\n                    nn.ReLU(inplace=True))\n            f = int(up_factors[i])\n            if f == 1:\n                up = Identity()\n            else:\n                up = nn.ConvTranspose2d(\n                    out_dim, out_dim, f * 2, stride=f, padding=f // 2,\n                    output_padding=0, groups=out_dim, bias=False)\n                fill_up_weights(up)\n            setattr(self, 'proj_' + str(i), proj)\n            setattr(self, 'up_' + str(i), up)\n\n        for i in range(1, len(channels)):\n            node = nn.Sequential(\n                nn.Conv2d(out_dim * 2, out_dim,\n                          kernel_size=node_kernel, stride=1,\n                          padding=node_kernel // 2, bias=False),\n                BatchNorm(out_dim),\n                nn.ReLU(inplace=True))\n            setattr(self, 'node_' + str(i), node)\n\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels\n                m.weight.data.normal_(0, math.sqrt(2. / n))\n            elif isinstance(m, BatchNorm):\n                m.weight.data.fill_(1)\n                m.bias.data.zero_()\n\n    def forward(self, layers):\n        assert len(self.channels) == len(layers), \\\n            '{} vs {} layers'.format(len(self.channels), len(layers))\n        layers = list(layers)\n        for i, l in enumerate(layers):\n            upsample = getattr(self, 'up_' + str(i))\n            project = getattr(self, 'proj_' + str(i))\n            layers[i] = upsample(project(l))\n        x = layers[0]\n        y = []\n        for i in range(1, len(layers)):\n            node = getattr(self, 'node_' + str(i))\n            x = node(torch.cat([x, layers[i]], 1))\n            y.append(x)\n        return x, y\n\n\nclass DLAUp(nn.Module):\n    def __init__(self, channels, scales=(1, 2, 4, 8, 16), in_channels=None):\n        super(DLAUp, self).__init__()\n        if in_channels is None:\n            in_channels = channels\n        self.channels = channels\n        channels = list(channels)\n        scales = np.array(scales, dtype=int)\n        for i in range(len(channels) - 1):\n            j = -i - 2\n            setattr(self, 'ida_{}'.format(i),\n                    IDAUp(3, channels[j], in_channels[j:],\n                          scales[j:] // scales[j]))\n            scales[j + 1:] = scales[j]\n            in_channels[j + 1:] = [channels[j] for _ in channels[j + 1:]]\n\n    def forward(self, layers):\n        layers = list(layers)\n        assert len(layers) > 1\n        for i in range(len(layers) - 1):\n            ida = getattr(self, 'ida_{}'.format(i))\n            x, y = ida(layers[-i - 2:])\n            layers[-i - 1:] = y\n        return x\n\ndef fill_fc_weights(layers):\n    for m in layers.modules():\n        if isinstance(m, nn.Conv2d):\n            nn.init.normal_(m.weight, std=0.001)\n            # torch.nn.init.kaiming_normal_(m.weight.data, nonlinearity='relu')\n            # torch.nn.init.xavier_normal_(m.weight.data)\n            if m.bias is not None:\n                nn.init.constant_(m.bias, 0)\n\nclass DLASeg(nn.Module):\n    def __init__(self, base_name, heads,\n                 pretrained=True, down_ratio=4, head_conv=256):\n        super(DLASeg, self).__init__()\n        assert down_ratio in [2, 4, 8, 16]\n        self.heads = heads\n        self.first_level = int(np.log2(down_ratio))\n        self.base = globals()[base_name](\n          pretrained=pretrained, return_levels=True)\n        channels = self.base.channels\n        scales = [2 ** i for i in range(len(channels[self.first_level:]))]\n        self.dla_up = DLAUp(channels[self.first_level:], scales=scales)\n        '''\n        self.fc = nn.Sequential(\n            nn.Conv2d(channels[self.first_level], classes, kernel_size=1,\n                      stride=1, padding=0, bias=True)\n        )\n        '''\n\n        for head in self.heads:\n            classes = self.heads[head]\n            if head_conv > 0:\n                fc = nn.Sequential(\n                  nn.Conv2d(channels[self.first_level], head_conv,\n                    kernel_size=3, padding=1, bias=True),\n                  nn.ReLU(inplace=True),\n                  nn.Conv2d(head_conv, classes, \n                    kernel_size=1, stride=1, \n                    padding=0, bias=True))\n                if 'hm' in head:\n                    fc[-1].bias.data.fill_(-2.19)\n                else:\n                    fill_fc_weights(fc)\n            else:\n                fc = nn.Conv2d(channels[self.first_level], classes, \n                  kernel_size=1, stride=1, \n                  padding=0, bias=True)\n                if 'hm' in head:\n                    fc.bias.data.fill_(-2.19)\n                else:\n                    fill_fc_weights(fc)\n            self.__setattr__(head, fc)\n\n        '''\n        up_factor = 2 ** self.first_level\n        if up_factor > 1:\n            up = nn.ConvTranspose2d(classes, classes, up_factor * 2,\n                                    stride=up_factor, padding=up_factor // 2,\n                                    output_padding=0, groups=classes,\n                                    bias=False)\n            fill_up_weights(up)\n            up.weight.requires_grad = False\n        else:\n            up = Identity()\n        self.up = up\n        self.softmax = nn.LogSoftmax(dim=1)\n        \n\n        for m in self.fc.modules():\n            if isinstance(m, nn.Conv2d):\n                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels\n                m.weight.data.normal_(0, math.sqrt(2. / n))\n            elif isinstance(m, BatchNorm):\n                m.weight.data.fill_(1)\n                m.bias.data.zero_()\n        '''\n\n    def forward(self, x):\n        x = self.base(x)\n        x = self.dla_up(x[self.first_level:])\n        # x = self.fc(x)\n        # y = self.softmax(self.up(x))\n        ret = {}\n        for head in self.heads:\n            ret[head] = self.__getattr__(head)(x)\n        return [ret]\n\n    '''\n    def optim_parameters(self, memo=None):\n        for param in self.base.parameters():\n            yield param\n        for param in self.dla_up.parameters():\n            yield param\n        for param in self.fc.parameters():\n            yield param\n    '''\n'''\ndef dla34up(classes, pretrained_base=None, **kwargs):\n    model = DLASeg('dla34', classes, pretrained_base=pretrained_base, **kwargs)\n    return model\n\n\ndef dla60up(classes, pretrained_base=None, **kwargs):\n    model = DLASeg('dla60', classes, pretrained_base=pretrained_base, **kwargs)\n    return model\n\n\ndef dla102up(classes, pretrained_base=None, **kwargs):\n    model = DLASeg('dla102', classes,\n                   pretrained_base=pretrained_base, **kwargs)\n    return model\n\n\ndef dla169up(classes, pretrained_base=None, **kwargs):\n    model = DLASeg('dla169', classes,\n                   pretrained_base=pretrained_base, **kwargs)\n    return model\n'''\n\ndef get_pose_net(num_layers, heads, head_conv=256, down_ratio=4):\n  model = DLASeg('dla{}'.format(num_layers), heads,\n                 pretrained=True,\n                 down_ratio=down_ratio,\n                 head_conv=head_conv)\n  return model\n"
  },
  {
    "path": "Network/rigidmask/networks/large_hourglass.py",
    "content": "# ------------------------------------------------------------------------------\n# This code is base on \n# CornerNet (https://github.com/princeton-vl/CornerNet)\n# Copyright (c) 2018, University of Michigan\n# Licensed under the BSD 3-Clause License\n# ------------------------------------------------------------------------------\n\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport numpy as np\nimport torch\nimport torch.nn as nn\n\nclass convolution(nn.Module):\n    def __init__(self, k, inp_dim, out_dim, stride=1, with_bn=True):\n        super(convolution, self).__init__()\n\n        pad = (k - 1) // 2\n        self.conv = nn.Conv2d(inp_dim, out_dim, (k, k), padding=(pad, pad), stride=(stride, stride), bias=not with_bn)\n        self.bn   = nn.BatchNorm2d(out_dim) if with_bn else nn.Sequential()\n        self.relu = nn.ReLU(inplace=True)\n\n    def forward(self, x):\n        conv = self.conv(x)\n        bn   = self.bn(conv)\n        relu = self.relu(bn)\n        return relu\n\nclass fully_connected(nn.Module):\n    def __init__(self, inp_dim, out_dim, with_bn=True):\n        super(fully_connected, self).__init__()\n        self.with_bn = with_bn\n\n        self.linear = nn.Linear(inp_dim, out_dim)\n        if self.with_bn:\n            self.bn = nn.BatchNorm1d(out_dim)\n        self.relu   = nn.ReLU(inplace=True)\n\n    def forward(self, x):\n        linear = self.linear(x)\n        bn     = self.bn(linear) if self.with_bn else linear\n        relu   = self.relu(bn)\n        return relu\n\nclass residual(nn.Module):\n    def __init__(self, k, inp_dim, out_dim, stride=1, with_bn=True):\n        super(residual, self).__init__()\n\n        self.conv1 = nn.Conv2d(inp_dim, out_dim, (3, 3), padding=(1, 1), stride=(stride, stride), bias=False)\n        self.bn1   = nn.BatchNorm2d(out_dim)\n        self.relu1 = nn.ReLU(inplace=True)\n\n        self.conv2 = nn.Conv2d(out_dim, out_dim, (3, 3), padding=(1, 1), bias=False)\n        self.bn2   = nn.BatchNorm2d(out_dim)\n        \n        self.skip  = nn.Sequential(\n            nn.Conv2d(inp_dim, out_dim, (1, 1), stride=(stride, stride), bias=False),\n            nn.BatchNorm2d(out_dim)\n        ) if stride != 1 or inp_dim != out_dim else nn.Sequential()\n        self.relu  = nn.ReLU(inplace=True)\n\n    def forward(self, x):\n        conv1 = self.conv1(x)\n        bn1   = self.bn1(conv1)\n        relu1 = self.relu1(bn1)\n\n        conv2 = self.conv2(relu1)\n        bn2   = self.bn2(conv2)\n\n        skip  = self.skip(x)\n        return self.relu(bn2 + skip)\n\ndef make_layer(k, inp_dim, out_dim, modules, layer=convolution, **kwargs):\n    layers = [layer(k, inp_dim, out_dim, **kwargs)]\n    for _ in range(1, modules):\n        layers.append(layer(k, out_dim, out_dim, **kwargs))\n    return nn.Sequential(*layers)\n\ndef make_layer_revr(k, inp_dim, out_dim, modules, layer=convolution, **kwargs):\n    layers = []\n    for _ in range(modules - 1):\n        layers.append(layer(k, inp_dim, inp_dim, **kwargs))\n    layers.append(layer(k, inp_dim, out_dim, **kwargs))\n    return nn.Sequential(*layers)\n\nclass MergeUp(nn.Module):\n    def forward(self, up1, up2):\n        return up1 + up2\n\ndef make_merge_layer(dim):\n    return MergeUp()\n\n# def make_pool_layer(dim):\n#     return nn.MaxPool2d(kernel_size=2, stride=2)\n\ndef make_pool_layer(dim):\n    return nn.Sequential()\n\ndef make_unpool_layer(dim):\n    return nn.Upsample(scale_factor=2)\n\ndef make_kp_layer(cnv_dim, curr_dim, out_dim):\n    return nn.Sequential(\n        convolution(3, cnv_dim, curr_dim, with_bn=False),\n        nn.Conv2d(curr_dim, out_dim, (1, 1))\n    )\n\ndef make_inter_layer(dim):\n    return residual(3, dim, dim)\n\ndef make_cnv_layer(inp_dim, out_dim):\n    return convolution(3, inp_dim, out_dim)\n\nclass kp_module(nn.Module):\n    def __init__(\n        self, n, dims, modules, layer=residual,\n        make_up_layer=make_layer, make_low_layer=make_layer,\n        make_hg_layer=make_layer, make_hg_layer_revr=make_layer_revr,\n        make_pool_layer=make_pool_layer, make_unpool_layer=make_unpool_layer,\n        make_merge_layer=make_merge_layer, **kwargs\n    ):\n        super(kp_module, self).__init__()\n\n        self.n   = n\n\n        curr_mod = modules[0]\n        next_mod = modules[1]\n\n        curr_dim = dims[0]\n        next_dim = dims[1]\n\n        self.up1  = make_up_layer(\n            3, curr_dim, curr_dim, curr_mod, \n            layer=layer, **kwargs\n        )  \n        self.max1 = make_pool_layer(curr_dim)\n        self.low1 = make_hg_layer(\n            3, curr_dim, next_dim, curr_mod,\n            layer=layer, **kwargs\n        )\n        self.low2 = kp_module(\n            n - 1, dims[1:], modules[1:], layer=layer, \n            make_up_layer=make_up_layer, \n            make_low_layer=make_low_layer,\n            make_hg_layer=make_hg_layer,\n            make_hg_layer_revr=make_hg_layer_revr,\n            make_pool_layer=make_pool_layer,\n            make_unpool_layer=make_unpool_layer,\n            make_merge_layer=make_merge_layer,\n            **kwargs\n        ) if self.n > 1 else \\\n        make_low_layer(\n            3, next_dim, next_dim, next_mod,\n            layer=layer, **kwargs\n        )\n        self.low3 = make_hg_layer_revr(\n            3, next_dim, curr_dim, curr_mod,\n            layer=layer, **kwargs\n        )\n        self.up2  = make_unpool_layer(curr_dim)\n\n        self.merge = make_merge_layer(curr_dim)\n\n    def forward(self, x):\n        up1  = self.up1(x)\n        max1 = self.max1(x)\n        low1 = self.low1(max1)\n        low2 = self.low2(low1)\n        low3 = self.low3(low2)\n        up2  = self.up2(low3)\n        return self.merge(up1, up2)\n\nclass exkp(nn.Module):\n    def __init__(\n        self, n, nstack, dims, modules, heads, pre=None, cnv_dim=256, \n        make_tl_layer=None, make_br_layer=None,\n        make_cnv_layer=make_cnv_layer, make_heat_layer=make_kp_layer,\n        make_tag_layer=make_kp_layer, make_regr_layer=make_kp_layer,\n        make_up_layer=make_layer, make_low_layer=make_layer, \n        make_hg_layer=make_layer, make_hg_layer_revr=make_layer_revr,\n        make_pool_layer=make_pool_layer, make_unpool_layer=make_unpool_layer,\n        make_merge_layer=make_merge_layer, make_inter_layer=make_inter_layer, \n        kp_layer=residual\n    ):\n        super(exkp, self).__init__()\n\n        self.nstack    = nstack\n        self.heads     = heads\n\n        curr_dim = dims[0]\n\n        self.pre = nn.Sequential(\n            convolution(7, 3, 128, stride=2),\n            residual(3, 128, 256, stride=2)\n        ) if pre is None else pre\n\n        self.kps  = nn.ModuleList([\n            kp_module(\n                n, dims, modules, layer=kp_layer,\n                make_up_layer=make_up_layer,\n                make_low_layer=make_low_layer,\n                make_hg_layer=make_hg_layer,\n                make_hg_layer_revr=make_hg_layer_revr,\n                make_pool_layer=make_pool_layer,\n                make_unpool_layer=make_unpool_layer,\n                make_merge_layer=make_merge_layer\n            ) for _ in range(nstack)\n        ])\n        self.cnvs = nn.ModuleList([\n            make_cnv_layer(curr_dim, cnv_dim) for _ in range(nstack)\n        ])\n\n        self.inters = nn.ModuleList([\n            make_inter_layer(curr_dim) for _ in range(nstack - 1)\n        ])\n\n        self.inters_ = nn.ModuleList([\n            nn.Sequential(\n                nn.Conv2d(curr_dim, curr_dim, (1, 1), bias=False),\n                nn.BatchNorm2d(curr_dim)\n            ) for _ in range(nstack - 1)\n        ])\n        self.cnvs_   = nn.ModuleList([\n            nn.Sequential(\n                nn.Conv2d(cnv_dim, curr_dim, (1, 1), bias=False),\n                nn.BatchNorm2d(curr_dim)\n            ) for _ in range(nstack - 1)\n        ])\n\n        ## keypoint heatmaps\n        for head in heads.keys():\n            if 'hm' in head:\n                module =  nn.ModuleList([\n                    make_heat_layer(\n                        cnv_dim, curr_dim, heads[head]) for _ in range(nstack)\n                ])\n                self.__setattr__(head, module)\n                for heat in self.__getattr__(head):\n                    heat[-1].bias.data.fill_(-2.19)\n            else:\n                module = nn.ModuleList([\n                    make_regr_layer(\n                        cnv_dim, curr_dim, heads[head]) for _ in range(nstack)\n                ])\n                self.__setattr__(head, module)\n\n\n        self.relu = nn.ReLU(inplace=True)\n\n    def forward(self, image):\n        # print('image shape', image.shape)\n        inter = self.pre(image)\n        outs  = []\n\n        for ind in range(self.nstack):\n            kp_, cnv_  = self.kps[ind], self.cnvs[ind]\n            kp  = kp_(inter)\n            cnv = cnv_(kp)\n\n            out = {}\n            for head in self.heads:\n                layer = self.__getattr__(head)[ind]\n                y = layer(cnv)\n                out[head] = y\n            \n            outs.append(out)\n            if ind < self.nstack - 1:\n                inter = self.inters_[ind](inter) + self.cnvs_[ind](cnv)\n                inter = self.relu(inter)\n                inter = self.inters[ind](inter)\n        return outs\n\n\ndef make_hg_layer(kernel, dim0, dim1, mod, layer=convolution, **kwargs):\n    layers  = [layer(kernel, dim0, dim1, stride=2)]\n    layers += [layer(kernel, dim1, dim1) for _ in range(mod - 1)]\n    return nn.Sequential(*layers)\n\n\nclass HourglassNet(exkp):\n    def __init__(self, heads, num_stacks=2):\n        n       = 5\n        dims    = [256, 256, 384, 384, 384, 512]\n        modules = [2, 2, 2, 2, 2, 4]\n\n        super(HourglassNet, self).__init__(\n            n, num_stacks, dims, modules, heads,\n            make_tl_layer=None,\n            make_br_layer=None,\n            make_pool_layer=make_pool_layer,\n            make_hg_layer=make_hg_layer,\n            kp_layer=residual, cnv_dim=256\n        )\n\ndef get_large_hourglass_net(num_layers, heads, head_conv):\n  model = HourglassNet(heads, 2)\n  return model\n"
  },
  {
    "path": "Network/rigidmask/networks/msra_resnet.py",
    "content": "# ------------------------------------------------------------------------------\n# Copyright (c) Microsoft\n# Licensed under the MIT License.\n# Written by Bin Xiao (Bin.Xiao@microsoft.com)\n# Modified by Xingyi Zhou\n# ------------------------------------------------------------------------------\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport os\n\nimport torch\nimport torch.nn as nn\nimport torch.utils.model_zoo as model_zoo\n\nBN_MOMENTUM = 0.1\n\nmodel_urls = {\n    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',\n    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',\n    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',\n    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',\n    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',\n}\n\ndef conv3x3(in_planes, out_planes, stride=1):\n    \"\"\"3x3 convolution with padding\"\"\"\n    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,\n                     padding=1, bias=False)\n\n\nclass BasicBlock(nn.Module):\n    expansion = 1\n\n    def __init__(self, inplanes, planes, stride=1, downsample=None):\n        super(BasicBlock, self).__init__()\n        self.conv1 = conv3x3(inplanes, planes, stride)\n        self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)\n        self.relu = nn.ReLU(inplace=True)\n        self.conv2 = conv3x3(planes, planes)\n        self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)\n        self.downsample = downsample\n        self.stride = stride\n\n    def forward(self, x):\n        residual = x\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n\n        if self.downsample is not None:\n            residual = self.downsample(x)\n\n        out += residual\n        out = self.relu(out)\n\n        return out\n\n\nclass Bottleneck(nn.Module):\n    expansion = 4\n\n    def __init__(self, inplanes, planes, stride=1, downsample=None):\n        super(Bottleneck, self).__init__()\n        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)\n        self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)\n        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,\n                               padding=1, bias=False)\n        self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)\n        self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,\n                               bias=False)\n        self.bn3 = nn.BatchNorm2d(planes * self.expansion,\n                                  momentum=BN_MOMENTUM)\n        self.relu = nn.ReLU(inplace=True)\n        self.downsample = downsample\n        self.stride = stride\n\n    def forward(self, x):\n        residual = x\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n        out = self.relu(out)\n\n        out = self.conv3(out)\n        out = self.bn3(out)\n\n        if self.downsample is not None:\n            residual = self.downsample(x)\n\n        out += residual\n        out = self.relu(out)\n\n        return out\n\n\nclass PoseResNet(nn.Module):\n\n    def __init__(self, block, layers, heads, head_conv, **kwargs):\n        self.inplanes = 64\n        self.deconv_with_bias = False\n        self.heads = heads\n\n        super(PoseResNet, self).__init__()\n        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,\n                               bias=False)\n        self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)\n        self.relu = nn.ReLU(inplace=True)\n        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)\n        self.layer1 = self._make_layer(block, 64, layers[0])\n        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)\n        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)\n        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)\n\n        # used for deconv layers\n        self.deconv_layers = self._make_deconv_layer(\n            3,\n            [256, 256, 256],\n            [4, 4, 4],\n        )\n        # self.final_layer = []\n\n        for head in sorted(self.heads):\n          num_output = self.heads[head]\n          if head_conv > 0:\n            fc = nn.Sequential(\n                nn.Conv2d(256, head_conv,\n                  kernel_size=3, padding=1, bias=True),\n                nn.ReLU(inplace=True),\n                nn.Conv2d(head_conv, num_output, \n                  kernel_size=1, stride=1, padding=0))\n          else:\n            fc = nn.Conv2d(\n              in_channels=256,\n              out_channels=num_output,\n              kernel_size=1,\n              stride=1,\n              padding=0\n          )\n          self.__setattr__(head, fc)\n\n        # self.final_layer = nn.ModuleList(self.final_layer)\n\n    def _make_layer(self, block, planes, blocks, stride=1):\n        downsample = None\n        if stride != 1 or self.inplanes != planes * block.expansion:\n            downsample = nn.Sequential(\n                nn.Conv2d(self.inplanes, planes * block.expansion,\n                          kernel_size=1, stride=stride, bias=False),\n                nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM),\n            )\n\n        layers = []\n        layers.append(block(self.inplanes, planes, stride, downsample))\n        self.inplanes = planes * block.expansion\n        for i in range(1, blocks):\n            layers.append(block(self.inplanes, planes))\n\n        return nn.Sequential(*layers)\n\n    def _get_deconv_cfg(self, deconv_kernel, index):\n        if deconv_kernel == 4:\n            padding = 1\n            output_padding = 0\n        elif deconv_kernel == 3:\n            padding = 1\n            output_padding = 1\n        elif deconv_kernel == 2:\n            padding = 0\n            output_padding = 0\n\n        return deconv_kernel, padding, output_padding\n\n    def _make_deconv_layer(self, num_layers, num_filters, num_kernels):\n        assert num_layers == len(num_filters), \\\n            'ERROR: num_deconv_layers is different len(num_deconv_filters)'\n        assert num_layers == len(num_kernels), \\\n            'ERROR: num_deconv_layers is different len(num_deconv_filters)'\n\n        layers = []\n        for i in range(num_layers):\n            kernel, padding, output_padding = \\\n                self._get_deconv_cfg(num_kernels[i], i)\n\n            planes = num_filters[i]\n            layers.append(\n                nn.ConvTranspose2d(\n                    in_channels=self.inplanes,\n                    out_channels=planes,\n                    kernel_size=kernel,\n                    stride=2,\n                    padding=padding,\n                    output_padding=output_padding,\n                    bias=self.deconv_with_bias))\n            layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM))\n            layers.append(nn.ReLU(inplace=True))\n            self.inplanes = planes\n\n        return nn.Sequential(*layers)\n\n    def forward(self, x):\n        x = self.conv1(x)\n        x = self.bn1(x)\n        x = self.relu(x)\n        x = self.maxpool(x)\n\n        x = self.layer1(x)\n        x = self.layer2(x)\n        x = self.layer3(x)\n        x = self.layer4(x)\n\n        x = self.deconv_layers(x)\n        ret = {}\n        for head in self.heads:\n            ret[head] = self.__getattr__(head)(x)\n        return [ret]\n\n    def init_weights(self, num_layers, pretrained=True):\n        if pretrained:\n            # print('=> init resnet deconv weights from normal distribution')\n            for _, m in self.deconv_layers.named_modules():\n                if isinstance(m, nn.ConvTranspose2d):\n                    # print('=> init {}.weight as normal(0, 0.001)'.format(name))\n                    # print('=> init {}.bias as 0'.format(name))\n                    nn.init.normal_(m.weight, std=0.001)\n                    if self.deconv_with_bias:\n                        nn.init.constant_(m.bias, 0)\n                elif isinstance(m, nn.BatchNorm2d):\n                    # print('=> init {}.weight as 1'.format(name))\n                    # print('=> init {}.bias as 0'.format(name))\n                    nn.init.constant_(m.weight, 1)\n                    nn.init.constant_(m.bias, 0)\n            # print('=> init final conv weights from normal distribution')\n            for head in self.heads:\n              final_layer = self.__getattr__(head)\n              for i, m in enumerate(final_layer.modules()):\n                  if isinstance(m, nn.Conv2d):\n                      # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')\n                      # print('=> init {}.weight as normal(0, 0.001)'.format(name))\n                      # print('=> init {}.bias as 0'.format(name))\n                      if m.weight.shape[0] == self.heads[head]:\n                          if 'hm' in head:\n                              nn.init.constant_(m.bias, -2.19)\n                          else:\n                              nn.init.normal_(m.weight, std=0.001)\n                              nn.init.constant_(m.bias, 0)\n            #pretrained_state_dict = torch.load(pretrained)\n            url = model_urls['resnet{}'.format(num_layers)]\n            pretrained_state_dict = model_zoo.load_url(url)\n            print('=> loading pretrained model {}'.format(url))\n            self.load_state_dict(pretrained_state_dict, strict=False)\n        else:\n            print('=> imagenet pretrained model dose not exist')\n            print('=> please download it first')\n            raise ValueError('imagenet pretrained model does not exist')\n\n\nresnet_spec = {18: (BasicBlock, [2, 2, 2, 2]),\n               34: (BasicBlock, [3, 4, 6, 3]),\n               50: (Bottleneck, [3, 4, 6, 3]),\n               101: (Bottleneck, [3, 4, 23, 3]),\n               152: (Bottleneck, [3, 8, 36, 3])}\n\n\ndef get_pose_net(num_layers, heads, head_conv):\n  block_class, layers = resnet_spec[num_layers]\n\n  model = PoseResNet(block_class, layers, heads, head_conv=head_conv)\n  model.init_weights(num_layers, pretrained=True)\n  return model\n"
  },
  {
    "path": "Network/rigidmask/networks/pose_dla_dcn.py",
    "content": "from __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport os\nimport math\nimport logging\nimport numpy as np\nfrom os.path import join\n\nimport torch\nfrom torch import nn\nimport torch.nn.functional as F\nimport torch.utils.model_zoo as model_zoo\n\nfrom .DCNv2.DCN.dcn_v2 import DCN\n\nBN_MOMENTUM = 0.1\nlogger = logging.getLogger(__name__)\n\ndef get_model_url(data='imagenet', name='dla34', hash='ba72cf86'):\n    return join('http://dl.yf.io/dla/models', data, '{}-{}.pth'.format(name, hash))\n\n\ndef conv3x3(in_planes, out_planes, stride=1):\n    \"3x3 convolution with padding\"\n    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,\n                     padding=1, bias=False)\n\n\nclass BasicBlock(nn.Module):\n    def __init__(self, inplanes, planes, stride=1, dilation=1):\n        super(BasicBlock, self).__init__()\n        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3,\n                               stride=stride, padding=dilation,\n                               bias=False, dilation=dilation)\n        self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)\n        self.relu = nn.ReLU(inplace=True)\n        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,\n                               stride=1, padding=dilation,\n                               bias=False, dilation=dilation)\n        self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)\n        self.stride = stride\n\n    def forward(self, x, residual=None):\n        if residual is None:\n            residual = x\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n\n        out += residual\n        out = self.relu(out)\n\n        return out\n\n\nclass Bottleneck(nn.Module):\n    expansion = 2\n\n    def __init__(self, inplanes, planes, stride=1, dilation=1):\n        super(Bottleneck, self).__init__()\n        expansion = Bottleneck.expansion\n        bottle_planes = planes // expansion\n        self.conv1 = nn.Conv2d(inplanes, bottle_planes,\n                               kernel_size=1, bias=False)\n        self.bn1 = nn.BatchNorm2d(bottle_planes, momentum=BN_MOMENTUM)\n        self.conv2 = nn.Conv2d(bottle_planes, bottle_planes, kernel_size=3,\n                               stride=stride, padding=dilation,\n                               bias=False, dilation=dilation)\n        self.bn2 = nn.BatchNorm2d(bottle_planes, momentum=BN_MOMENTUM)\n        self.conv3 = nn.Conv2d(bottle_planes, planes,\n                               kernel_size=1, bias=False)\n        self.bn3 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)\n        self.relu = nn.ReLU(inplace=True)\n        self.stride = stride\n\n    def forward(self, x, residual=None):\n        if residual is None:\n            residual = x\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n        out = self.relu(out)\n\n        out = self.conv3(out)\n        out = self.bn3(out)\n\n        out += residual\n        out = self.relu(out)\n\n        return out\n\n\nclass BottleneckX(nn.Module):\n    expansion = 2\n    cardinality = 32\n\n    def __init__(self, inplanes, planes, stride=1, dilation=1):\n        super(BottleneckX, self).__init__()\n        cardinality = BottleneckX.cardinality\n        # dim = int(math.floor(planes * (BottleneckV5.expansion / 64.0)))\n        # bottle_planes = dim * cardinality\n        bottle_planes = planes * cardinality // 32\n        self.conv1 = nn.Conv2d(inplanes, bottle_planes,\n                               kernel_size=1, bias=False)\n        self.bn1 = nn.BatchNorm2d(bottle_planes, momentum=BN_MOMENTUM)\n        self.conv2 = nn.Conv2d(bottle_planes, bottle_planes, kernel_size=3,\n                               stride=stride, padding=dilation, bias=False,\n                               dilation=dilation, groups=cardinality)\n        self.bn2 = nn.BatchNorm2d(bottle_planes, momentum=BN_MOMENTUM)\n        self.conv3 = nn.Conv2d(bottle_planes, planes,\n                               kernel_size=1, bias=False)\n        self.bn3 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)\n        self.relu = nn.ReLU(inplace=True)\n        self.stride = stride\n\n    def forward(self, x, residual=None):\n        if residual is None:\n            residual = x\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n        out = self.relu(out)\n\n        out = self.conv3(out)\n        out = self.bn3(out)\n\n        out += residual\n        out = self.relu(out)\n\n        return out\n\n\nclass Root(nn.Module):\n    def __init__(self, in_channels, out_channels, kernel_size, residual):\n        super(Root, self).__init__()\n        self.conv = nn.Conv2d(\n            in_channels, out_channels, 1,\n            stride=1, bias=False, padding=(kernel_size - 1) // 2)\n        self.bn = nn.BatchNorm2d(out_channels, momentum=BN_MOMENTUM)\n        self.relu = nn.ReLU(inplace=True)\n        self.residual = residual\n\n    def forward(self, *x):\n        children = x\n        x = self.conv(torch.cat(x, 1))\n        x = self.bn(x)\n        if self.residual:\n            x += children[0]\n        x = self.relu(x)\n\n        return x\n\n\nclass Tree(nn.Module):\n    def __init__(self, levels, block, in_channels, out_channels, stride=1,\n                 level_root=False, root_dim=0, root_kernel_size=1,\n                 dilation=1, root_residual=False):\n        super(Tree, self).__init__()\n        if root_dim == 0:\n            root_dim = 2 * out_channels\n        if level_root:\n            root_dim += in_channels\n        if levels == 1:\n            self.tree1 = block(in_channels, out_channels, stride,\n                               dilation=dilation)\n            self.tree2 = block(out_channels, out_channels, 1,\n                               dilation=dilation)\n        else:\n            self.tree1 = Tree(levels - 1, block, in_channels, out_channels,\n                              stride, root_dim=0,\n                              root_kernel_size=root_kernel_size,\n                              dilation=dilation, root_residual=root_residual)\n            self.tree2 = Tree(levels - 1, block, out_channels, out_channels,\n                              root_dim=root_dim + out_channels,\n                              root_kernel_size=root_kernel_size,\n                              dilation=dilation, root_residual=root_residual)\n        if levels == 1:\n            self.root = Root(root_dim, out_channels, root_kernel_size,\n                             root_residual)\n        self.level_root = level_root\n        self.root_dim = root_dim\n        self.downsample = None\n        self.project = None\n        self.levels = levels\n        if stride > 1:\n            self.downsample = nn.MaxPool2d(stride, stride=stride)\n        if in_channels != out_channels:\n            self.project = nn.Sequential(\n                nn.Conv2d(in_channels, out_channels,\n                          kernel_size=1, stride=1, bias=False),\n                nn.BatchNorm2d(out_channels, momentum=BN_MOMENTUM)\n            )\n\n    def forward(self, x, residual=None, children=None):\n        children = [] if children is None else children\n        bottom = self.downsample(x) if self.downsample else x\n        residual = self.project(bottom) if self.project else bottom\n        if self.level_root:\n            children.append(bottom)\n        x1 = self.tree1(x, residual)\n        if self.levels == 1:\n            x2 = self.tree2(x1)\n            x = self.root(x2, x1, *children)\n        else:\n            children.append(x1)\n            x = self.tree2(x1, children=children)\n        return x\n\n\nclass DLA(nn.Module):\n    def __init__(self, levels, channels, num_classes=1000,\n                 block=BasicBlock, residual_root=False, linear_root=False,num_input=14):\n        super(DLA, self).__init__()\n        self.channels = channels\n        self.num_classes = num_classes\n        self.base_layer = nn.Sequential(\n            nn.Conv2d(num_input, channels[0], kernel_size=7, stride=1,\n                      padding=3, bias=False),\n            nn.BatchNorm2d(channels[0], momentum=BN_MOMENTUM),\n            nn.ReLU(inplace=True))\n        self.level0 = self._make_conv_level(\n            channels[0], channels[0], levels[0])\n        self.level1 = self._make_conv_level(\n            channels[0], channels[1], levels[1], stride=2)\n        self.level2 = Tree(levels[2], block, channels[1], channels[2], 2,\n                           level_root=False,\n                           root_residual=residual_root)\n        self.level3 = Tree(levels[3], block, channels[2], channels[3], 2,\n                           level_root=True, root_residual=residual_root)\n        self.level4 = Tree(levels[4], block, channels[3], channels[4], 2,\n                           level_root=True, root_residual=residual_root)\n        self.level5 = Tree(levels[5], block, channels[4], channels[5], 2,\n                           level_root=True, root_residual=residual_root)\n\n        # for m in self.modules():\n        #     if isinstance(m, nn.Conv2d):\n        #         n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels\n        #         m.weight.data.normal_(0, math.sqrt(2. / n))\n        #     elif isinstance(m, nn.BatchNorm2d):\n        #         m.weight.data.fill_(1)\n        #         m.bias.data.zero_()\n\n    def _make_level(self, block, inplanes, planes, blocks, stride=1):\n        downsample = None\n        if stride != 1 or inplanes != planes:\n            downsample = nn.Sequential(\n                nn.MaxPool2d(stride, stride=stride),\n                nn.Conv2d(inplanes, planes,\n                          kernel_size=1, stride=1, bias=False),\n                nn.BatchNorm2d(planes, momentum=BN_MOMENTUM),\n            )\n\n        layers = []\n        layers.append(block(inplanes, planes, stride, downsample=downsample))\n        for i in range(1, blocks):\n            layers.append(block(inplanes, planes))\n\n        return nn.Sequential(*layers)\n\n    def _make_conv_level(self, inplanes, planes, convs, stride=1, dilation=1):\n        modules = []\n        for i in range(convs):\n            modules.extend([\n                nn.Conv2d(inplanes, planes, kernel_size=3,\n                          stride=stride if i == 0 else 1,\n                          padding=dilation, bias=False, dilation=dilation),\n                nn.BatchNorm2d(planes, momentum=BN_MOMENTUM),\n                nn.ReLU(inplace=True)])\n            inplanes = planes\n        return nn.Sequential(*modules)\n\n    def forward(self, x):\n        y = []\n        x = self.base_layer(x)\n        for i in range(6):\n            x = getattr(self, 'level{}'.format(i))(x)\n            y.append(x)\n        return y\n\n    def load_pretrained_model(self, data='imagenet', name='dla34', hash='ba72cf86'):\n        # fc = self.fc\n        if name.endswith('.pth'):\n            model_weights = torch.load(data + name)\n        else:\n            model_url = get_model_url(data, name, hash)\n            model_weights = model_zoo.load_url(model_url)\n        num_classes = len(model_weights[list(model_weights.keys())[-1]])\n        self.fc = nn.Conv2d(\n            self.channels[-1], num_classes,\n            kernel_size=1, stride=1, padding=0, bias=True)\n        self.load_state_dict(model_weights)\n        # self.fc = fc\n\n\ndef dla34(pretrained=True, **kwargs):  # DLA-34\n    model = DLA([1, 1, 1, 2, 2, 1],\n                [16, 32, 64, 128, 256, 512],\n                block=BasicBlock, **kwargs)\n    if pretrained:\n        model.load_pretrained_model(data='imagenet', name='dla34', hash='ba72cf86')\n    return model\n\nclass Identity(nn.Module):\n\n    def __init__(self):\n        super(Identity, self).__init__()\n\n    def forward(self, x):\n        return x\n\n\ndef fill_fc_weights(layers):\n    for m in layers.modules():\n        if isinstance(m, nn.Conv2d):\n            if m.bias is not None:\n                nn.init.constant_(m.bias, 0)\n\n\ndef fill_up_weights(up):\n    w = up.weight.data\n    f = math.ceil(w.size(2) / 2)\n    c = (2 * f - 1 - f % 2) / (2. * f)\n    for i in range(w.size(2)):\n        for j in range(w.size(3)):\n            w[0, 0, i, j] = \\\n                (1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c))\n    for c in range(1, w.size(0)):\n        w[c, 0, :, :] = w[0, 0, :, :]\n\n\nclass DeformConv(nn.Module):\n    def __init__(self, chi, cho):\n        super(DeformConv, self).__init__()\n        self.actf = nn.Sequential(\n            nn.BatchNorm2d(cho, momentum=BN_MOMENTUM),\n            nn.ReLU(inplace=True)\n        )\n        self.conv = DCN(chi, cho, kernel_size=(3,3), stride=1, padding=1, dilation=1, deformable_groups=1)\n\n    def forward(self, x):\n        x = self.conv(x)\n        x = self.actf(x)\n        return x\n\n\nclass IDAUp(nn.Module):\n\n    def __init__(self, o, channels, up_f):\n        super(IDAUp, self).__init__()\n        for i in range(1, len(channels)):\n            c = channels[i]\n            f = int(up_f[i])  \n            proj = DeformConv(c, o)\n            node = DeformConv(o, o)\n     \n            up = nn.ConvTranspose2d(o, o, f * 2, stride=f, \n                                    padding=f // 2, output_padding=0,\n                                    groups=o, bias=False)\n            fill_up_weights(up)\n\n            setattr(self, 'proj_' + str(i), proj)\n            setattr(self, 'up_' + str(i), up)\n            setattr(self, 'node_' + str(i), node)\n                 \n        \n    def forward(self, layers, startp, endp):\n        for i in range(startp + 1, endp):\n            upsample = getattr(self, 'up_' + str(i - startp))\n            project = getattr(self, 'proj_' + str(i - startp))\n            layers[i] = upsample(project(layers[i]))\n            node = getattr(self, 'node_' + str(i - startp))\n            layers[i] = node(layers[i] + layers[i - 1])\n\n\n\nclass DLAUp(nn.Module):\n    def __init__(self, startp, channels, scales, in_channels=None):\n        super(DLAUp, self).__init__()\n        self.startp = startp\n        if in_channels is None:\n            in_channels = channels\n        self.channels = channels\n        channels = list(channels)\n        scales = np.array(scales, dtype=int)\n        for i in range(len(channels) - 1):\n            j = -i - 2\n            setattr(self, 'ida_{}'.format(i),\n                    IDAUp(channels[j], in_channels[j:],\n                          scales[j:] // scales[j]))\n            scales[j + 1:] = scales[j]\n            in_channels[j + 1:] = [channels[j] for _ in channels[j + 1:]]\n\n    def forward(self, layers):\n        out = [layers[-1]] # start with 32\n        for i in range(len(layers) - self.startp - 1):\n            ida = getattr(self, 'ida_{}'.format(i))\n            ida(layers, len(layers) -i - 2, len(layers))\n            out.insert(0, layers[-1])\n        return out\n\n\nclass Interpolate(nn.Module):\n    def __init__(self, scale, mode):\n        super(Interpolate, self).__init__()\n        self.scale = scale\n        self.mode = mode\n        \n    def forward(self, x):\n        x = F.interpolate(x, scale_factor=self.scale, mode=self.mode, align_corners=False)\n        return x\n\n\nclass DLASeg(nn.Module):\n    def __init__(self, base_name, heads, pretrained, down_ratio, final_kernel,\n                 last_level, head_conv, out_channel=0,num_input=14):\n        super(DLASeg, self).__init__()\n        assert down_ratio in [2, 4, 8, 16]\n        self.first_level = int(np.log2(down_ratio))\n        self.last_level = last_level\n        self.base = globals()[base_name](pretrained=pretrained,num_input=num_input)\n        channels = self.base.channels\n        scales = [2 ** i for i in range(len(channels[self.first_level:]))]\n        self.dla_up = DLAUp(self.first_level, channels[self.first_level:], scales)\n\n        if out_channel == 0:\n            out_channel = channels[self.first_level]\n\n        self.ida_up = IDAUp(out_channel, channels[self.first_level:self.last_level], \n                            [2 ** i for i in range(self.last_level - self.first_level)])\n        \n        self.heads = heads\n        for head in self.heads:\n            classes = self.heads[head]\n            if head_conv > 0:\n              fc = nn.Sequential(\n                  nn.Conv2d(channels[self.first_level], head_conv,\n                    kernel_size=3, padding=1, bias=True),\n                  nn.ReLU(inplace=True),\n                  nn.Conv2d(head_conv, classes, \n                    kernel_size=final_kernel, stride=1, \n                    padding=final_kernel // 2, bias=True))\n              if 'hm' in head:\n                fc[-1].bias.data.fill_(-2.19)\n              else:\n                fill_fc_weights(fc)\n            else:\n              fc = nn.Conv2d(channels[self.first_level], classes, \n                  kernel_size=final_kernel, stride=1, \n                  padding=final_kernel // 2, bias=True)\n              if 'hm' in head:\n                fc.bias.data.fill_(-2.19)\n              else:\n                fill_fc_weights(fc)\n            self.__setattr__(head, fc)\n\n    def forward(self, x):\n        x = self.base(x)\n        x = self.dla_up(x)\n\n        y = []\n        for i in range(self.last_level - self.first_level):\n            y.append(x[i].clone())\n        self.ida_up(y, 0, len(y))\n\n        z = {}\n        for head in self.heads:\n            z[head] = self.__getattr__(head)(y[-1])\n        return [z]\n    \n\ndef get_pose_net(num_layers, heads, head_conv=256, down_ratio=4,num_input=14):\n  model = DLASeg('dla{}'.format(num_layers), heads,\n                 pretrained=False,\n                 #pretrained=True,\n                 down_ratio=down_ratio,\n                 final_kernel=1,\n                 last_level=5,\n                 head_conv=head_conv,num_input=num_input)\n  return model\n\n"
  },
  {
    "path": "Network/rigidmask/networks/resnet_dcn.py",
    "content": "# ------------------------------------------------------------------------------\n# Copyright (c) Microsoft\n# Licensed under the MIT License.\n# Written by Bin Xiao (Bin.Xiao@microsoft.com)\n# Modified by Dequan Wang and Xingyi Zhou\n# ------------------------------------------------------------------------------\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport os\nimport math\nimport logging\n\nimport torch\nimport torch.nn as nn\nfrom .DCNv2.DCN.dcn_v2 import DCN\nimport torch.utils.model_zoo as model_zoo\n\nBN_MOMENTUM = 0.1\nlogger = logging.getLogger(__name__)\n\nmodel_urls = {\n    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',\n    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',\n    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',\n    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',\n    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',\n}\n\ndef conv3x3(in_planes, out_planes, stride=1):\n    \"\"\"3x3 convolution with padding\"\"\"\n    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,\n                     padding=1, bias=False)\n\n\nclass BasicBlock(nn.Module):\n    expansion = 1\n\n    def __init__(self, inplanes, planes, stride=1, downsample=None):\n        super(BasicBlock, self).__init__()\n        self.conv1 = conv3x3(inplanes, planes, stride)\n        self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)\n        self.relu = nn.ReLU(inplace=True)\n        self.conv2 = conv3x3(planes, planes)\n        self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)\n        self.downsample = downsample\n        self.stride = stride\n\n    def forward(self, x):\n        residual = x\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n\n        if self.downsample is not None:\n            residual = self.downsample(x)\n\n        out += residual\n        out = self.relu(out)\n\n        return out\n\n\nclass Bottleneck(nn.Module):\n    expansion = 4\n\n    def __init__(self, inplanes, planes, stride=1, downsample=None):\n        super(Bottleneck, self).__init__()\n        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)\n        self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)\n        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,\n                               padding=1, bias=False)\n        self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)\n        self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,\n                               bias=False)\n        self.bn3 = nn.BatchNorm2d(planes * self.expansion,\n                                  momentum=BN_MOMENTUM)\n        self.relu = nn.ReLU(inplace=True)\n        self.downsample = downsample\n        self.stride = stride\n\n    def forward(self, x):\n        residual = x\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n        out = self.relu(out)\n\n        out = self.conv3(out)\n        out = self.bn3(out)\n\n        if self.downsample is not None:\n            residual = self.downsample(x)\n\n        out += residual\n        out = self.relu(out)\n\n        return out\n\ndef fill_up_weights(up):\n    w = up.weight.data\n    f = math.ceil(w.size(2) / 2)\n    c = (2 * f - 1 - f % 2) / (2. * f)\n    for i in range(w.size(2)):\n        for j in range(w.size(3)):\n            w[0, 0, i, j] = \\\n                (1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c))\n    for c in range(1, w.size(0)):\n        w[c, 0, :, :] = w[0, 0, :, :] \n\ndef fill_fc_weights(layers):\n    for m in layers.modules():\n        if isinstance(m, nn.Conv2d):\n            nn.init.normal_(m.weight, std=0.001)\n            # torch.nn.init.kaiming_normal_(m.weight.data, nonlinearity='relu')\n            # torch.nn.init.xavier_normal_(m.weight.data)\n            if m.bias is not None:\n                nn.init.constant_(m.bias, 0)\n\nclass PoseResNet(nn.Module):\n\n    def __init__(self, block, layers, heads, head_conv):\n        self.inplanes = 64\n        self.heads = heads\n        self.deconv_with_bias = False\n\n        super(PoseResNet, self).__init__()\n        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,\n                               bias=False)\n        self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)\n        self.relu = nn.ReLU(inplace=True)\n        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)\n        self.layer1 = self._make_layer(block, 64, layers[0])\n        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)\n        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)\n        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)\n\n        # used for deconv layers\n        self.deconv_layers = self._make_deconv_layer(\n            3,\n            [256, 128, 64],\n            [4, 4, 4],\n        )\n\n        for head in self.heads:\n            classes = self.heads[head]\n            if head_conv > 0:\n                fc = nn.Sequential(\n                  nn.Conv2d(64, head_conv,\n                    kernel_size=3, padding=1, bias=True),\n                  nn.ReLU(inplace=True),\n                  nn.Conv2d(head_conv, classes, \n                    kernel_size=1, stride=1, \n                    padding=0, bias=True))\n                if 'hm' in head:\n                    fc[-1].bias.data.fill_(-2.19)\n                else:\n                    fill_fc_weights(fc)\n            else:\n                fc = nn.Conv2d(64, classes, \n                  kernel_size=1, stride=1, \n                  padding=0, bias=True)\n                if 'hm' in head:\n                    fc.bias.data.fill_(-2.19)\n                else:\n                    fill_fc_weights(fc)\n            self.__setattr__(head, fc)\n\n    def _make_layer(self, block, planes, blocks, stride=1):\n        downsample = None\n        if stride != 1 or self.inplanes != planes * block.expansion:\n            downsample = nn.Sequential(\n                nn.Conv2d(self.inplanes, planes * block.expansion,\n                          kernel_size=1, stride=stride, bias=False),\n                nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM),\n            )\n\n        layers = []\n        layers.append(block(self.inplanes, planes, stride, downsample))\n        self.inplanes = planes * block.expansion\n        for i in range(1, blocks):\n            layers.append(block(self.inplanes, planes))\n\n        return nn.Sequential(*layers)\n\n    def _get_deconv_cfg(self, deconv_kernel, index):\n        if deconv_kernel == 4:\n            padding = 1\n            output_padding = 0\n        elif deconv_kernel == 3:\n            padding = 1\n            output_padding = 1\n        elif deconv_kernel == 2:\n            padding = 0\n            output_padding = 0\n\n        return deconv_kernel, padding, output_padding\n\n    def _make_deconv_layer(self, num_layers, num_filters, num_kernels):\n        assert num_layers == len(num_filters), \\\n            'ERROR: num_deconv_layers is different len(num_deconv_filters)'\n        assert num_layers == len(num_kernels), \\\n            'ERROR: num_deconv_layers is different len(num_deconv_filters)'\n\n        layers = []\n        for i in range(num_layers):\n            kernel, padding, output_padding = \\\n                self._get_deconv_cfg(num_kernels[i], i)\n\n            planes = num_filters[i]\n            fc = DCN(self.inplanes, planes, \n                    kernel_size=(3,3), stride=1,\n                    padding=1, dilation=1, deformable_groups=1)\n            # fc = nn.Conv2d(self.inplanes, planes,\n            #         kernel_size=3, stride=1, \n            #         padding=1, dilation=1, bias=False)\n            # fill_fc_weights(fc)\n            up = nn.ConvTranspose2d(\n                    in_channels=planes,\n                    out_channels=planes,\n                    kernel_size=kernel,\n                    stride=2,\n                    padding=padding,\n                    output_padding=output_padding,\n                    bias=self.deconv_with_bias)\n            fill_up_weights(up)\n\n            layers.append(fc)\n            layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM))\n            layers.append(nn.ReLU(inplace=True))\n            layers.append(up)\n            layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM))\n            layers.append(nn.ReLU(inplace=True))\n            self.inplanes = planes\n\n        return nn.Sequential(*layers)\n\n    def forward(self, x):\n        x = self.conv1(x)\n        x = self.bn1(x)\n        x = self.relu(x)\n        x = self.maxpool(x)\n\n        x = self.layer1(x)\n        x = self.layer2(x)\n        x = self.layer3(x)\n        x = self.layer4(x)\n\n        x = self.deconv_layers(x)\n        ret = {}\n        for head in self.heads:\n            ret[head] = self.__getattr__(head)(x)\n        return [ret]\n\n    def init_weights(self, num_layers):\n        if 1:\n            url = model_urls['resnet{}'.format(num_layers)]\n            pretrained_state_dict = model_zoo.load_url(url)\n            print('=> loading pretrained model {}'.format(url))\n            self.load_state_dict(pretrained_state_dict, strict=False)\n            print('=> init deconv weights from normal distribution')\n            for name, m in self.deconv_layers.named_modules():\n                if isinstance(m, nn.BatchNorm2d):\n                    nn.init.constant_(m.weight, 1)\n                    nn.init.constant_(m.bias, 0)\n\n\nresnet_spec = {18: (BasicBlock, [2, 2, 2, 2]),\n               34: (BasicBlock, [3, 4, 6, 3]),\n               50: (Bottleneck, [3, 4, 6, 3]),\n               101: (Bottleneck, [3, 4, 23, 3]),\n               152: (Bottleneck, [3, 8, 36, 3])}\n\n\ndef get_pose_net(num_layers, heads, head_conv=256):\n  block_class, layers = resnet_spec[num_layers]\n\n  model = PoseResNet(block_class, layers, heads, head_conv=head_conv)\n  model.init_weights(num_layers)\n  return model\n"
  },
  {
    "path": "Network/rigidmask/submodule.py",
    "content": "from __future__ import print_function\nimport torch\nimport torch.nn as nn\nimport torch.utils.data\nfrom torch.autograd import Variable\nimport torch.nn.functional as F\nimport math\nimport numpy as np\nimport pdb\nimport kornia\n\nclass residualBlock(nn.Module):\n    expansion = 1\n\n    def __init__(self, in_channels, n_filters, stride=1, downsample=None,dilation=1,with_bn=True):\n        super(residualBlock, self).__init__()\n        if dilation > 1:\n            padding = dilation\n        else:\n            padding = 1\n\n        if with_bn:\n            self.convbnrelu1 = conv2DBatchNormRelu(in_channels, n_filters, 3,  stride, padding, dilation=dilation)\n            self.convbn2 = conv2DBatchNorm(n_filters, n_filters, 3, 1, 1)\n        else:\n            self.convbnrelu1 = conv2DBatchNormRelu(in_channels, n_filters, 3,  stride, padding, dilation=dilation,with_bn=False)\n            self.convbn2 = conv2DBatchNorm(n_filters, n_filters, 3, 1, 1, with_bn=False)\n        self.downsample = downsample\n        self.relu = nn.LeakyReLU(0.1, inplace=True)\n\n    def forward(self, x):\n        residual = x\n\n        out = self.convbnrelu1(x)\n        out = self.convbn2(out)\n\n        if self.downsample is not None:\n            residual = self.downsample(x)\n\n        out += residual\n        return self.relu(out)\n\ndef conv(in_planes, out_planes, kernel_size=3, stride=1, padding=1, dilation=1):   \n    return nn.Sequential(\n            nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, \n                        padding=padding, dilation=dilation, bias=True),\n            nn.BatchNorm2d(out_planes),\n            nn.LeakyReLU(0.1,inplace=True))\n\n\nclass conv2DBatchNorm(nn.Module):\n    def __init__(self, in_channels, n_filters, k_size,  stride, padding, dilation=1, with_bn=True):\n        super(conv2DBatchNorm, self).__init__()\n        bias = not with_bn\n\n        if dilation > 1:\n            conv_mod = nn.Conv2d(int(in_channels), int(n_filters), kernel_size=k_size,\n                                 padding=padding, stride=stride, bias=bias, dilation=dilation)\n\n        else:\n            conv_mod = nn.Conv2d(int(in_channels), int(n_filters), kernel_size=k_size,\n                                 padding=padding, stride=stride, bias=bias, dilation=1)\n\n\n        if with_bn:\n            self.cb_unit = nn.Sequential(conv_mod,\n                                         nn.BatchNorm2d(int(n_filters)),)\n        else:\n            self.cb_unit = nn.Sequential(conv_mod,)\n\n    def forward(self, inputs):\n        outputs = self.cb_unit(inputs)\n        return outputs\n\nclass conv2DBatchNormRelu(nn.Module):\n    def __init__(self, in_channels, n_filters, k_size,  stride, padding, dilation=1, with_bn=True):\n        super(conv2DBatchNormRelu, self).__init__()\n        bias = not with_bn\n        if dilation > 1:\n            conv_mod = nn.Conv2d(int(in_channels), int(n_filters), kernel_size=k_size, \n                                 padding=padding, stride=stride, bias=bias, dilation=dilation)\n\n        else:\n            conv_mod = nn.Conv2d(int(in_channels), int(n_filters), kernel_size=k_size, \n                                 padding=padding, stride=stride, bias=bias, dilation=1)\n\n        if with_bn:\n            self.cbr_unit = nn.Sequential(conv_mod,\n                                          nn.BatchNorm2d(int(n_filters)),\n                                          nn.LeakyReLU(0.1, inplace=True),)\n        else:\n            self.cbr_unit = nn.Sequential(conv_mod,\n                                          nn.LeakyReLU(0.1, inplace=True),)\n\n    def forward(self, inputs):\n        outputs = self.cbr_unit(inputs)\n        return outputs\n\nclass pyramidPooling(nn.Module):\n\n    def __init__(self, in_channels, with_bn=True, levels=4):\n        super(pyramidPooling, self).__init__()\n        self.levels = levels\n\n        self.paths = []\n        for i in range(levels):\n            self.paths.append(conv2DBatchNormRelu(in_channels, in_channels, 1, 1, 0, with_bn=with_bn))\n        self.path_module_list = nn.ModuleList(self.paths)\n        self.relu = nn.LeakyReLU(0.1, inplace=True)\n    \n    def forward(self, x):\n        h, w = x.shape[2:]\n\n        k_sizes = []\n        strides = []\n        for pool_size in np.linspace(1,min(h,w)//2,self.levels,dtype=int):\n            k_sizes.append((int(h/pool_size), int(w/pool_size)))\n            strides.append((int(h/pool_size), int(w/pool_size)))\n        k_sizes = k_sizes[::-1]\n        strides = strides[::-1]\n\n        pp_sum = x\n\n        for i, module in enumerate(self.path_module_list):\n            out = F.avg_pool2d(x, k_sizes[i], stride=strides[i], padding=0)\n            out = module(out)\n            out = F.upsample(out, size=(h,w), mode='bilinear')\n            pp_sum = pp_sum + 1./self.levels*out\n        pp_sum = self.relu(pp_sum/2.)\n\n        return pp_sum\n\nclass pspnet(nn.Module):\n    \"\"\"\n    Modified PSPNet.  https://github.com/meetshah1995/pytorch-semseg/blob/master/ptsemseg/models/pspnet.py\n    \"\"\"\n    def __init__(self, is_proj=True,groups=1):\n        super(pspnet, self).__init__()\n        self.inplanes = 32\n        self.is_proj = is_proj\n\n        # Encoder\n        self.convbnrelu1_1 = conv2DBatchNormRelu(in_channels=3, k_size=3, n_filters=16,\n                                                 padding=1, stride=2)\n        self.convbnrelu1_2 = conv2DBatchNormRelu(in_channels=16, k_size=3, n_filters=16,\n                                                 padding=1, stride=1)\n        self.convbnrelu1_3 = conv2DBatchNormRelu(in_channels=16, k_size=3, n_filters=32,\n                                                 padding=1, stride=1)\n        # Vanilla Residual Blocks\n        self.res_block3 = self._make_layer(residualBlock,64,1,stride=2)\n        self.res_block5 = self._make_layer(residualBlock,128,1,stride=2)\n        self.res_block6 = self._make_layer(residualBlock,128,1,stride=2)\n        self.res_block7 = self._make_layer(residualBlock,128,1,stride=2)\n        self.pyramid_pooling = pyramidPooling(128, levels=3)\n\n        # Iconvs\n        self.upconv6 = nn.Sequential(nn.Upsample(scale_factor=2),\n                                     conv2DBatchNormRelu(in_channels=128, k_size=3, n_filters=64,\n                                                 padding=1, stride=1))\n        self.iconv5 = conv2DBatchNormRelu(in_channels=192, k_size=3, n_filters=128,\n                                                 padding=1, stride=1)\n        self.upconv5 = nn.Sequential(nn.Upsample(scale_factor=2),\n                                     conv2DBatchNormRelu(in_channels=128, k_size=3, n_filters=64,\n                                                 padding=1, stride=1))\n        self.iconv4 = conv2DBatchNormRelu(in_channels=192, k_size=3, n_filters=128,\n                                                 padding=1, stride=1)\n        self.upconv4 = nn.Sequential(nn.Upsample(scale_factor=2),\n                                     conv2DBatchNormRelu(in_channels=128, k_size=3, n_filters=64,\n                                                 padding=1, stride=1))\n        self.iconv3 = conv2DBatchNormRelu(in_channels=128, k_size=3, n_filters=64,\n                                                 padding=1, stride=1)\n        self.upconv3 = nn.Sequential(nn.Upsample(scale_factor=2),\n                                     conv2DBatchNormRelu(in_channels=64, k_size=3, n_filters=32,\n                                                 padding=1, stride=1))\n        self.iconv2 = conv2DBatchNormRelu(in_channels=64, k_size=3, n_filters=64,\n                                                 padding=1, stride=1)\n\n        if self.is_proj:\n            self.proj6 = conv2DBatchNormRelu(in_channels=128,k_size=1,n_filters=128//groups, padding=0,stride=1)\n            self.proj5 = conv2DBatchNormRelu(in_channels=128,k_size=1,n_filters=128//groups, padding=0,stride=1)\n            self.proj4 = conv2DBatchNormRelu(in_channels=128,k_size=1,n_filters=128//groups, padding=0,stride=1)\n            self.proj3 = conv2DBatchNormRelu(in_channels=64, k_size=1,n_filters=64//groups, padding=0,stride=1)\n            self.proj2 = conv2DBatchNormRelu(in_channels=64, k_size=1,n_filters=64//groups, padding=0,stride=1)\n\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels\n                m.weight.data.normal_(0, math.sqrt(2. / n))\n                if hasattr(m.bias,'data'):\n                    m.bias.data.zero_()\n       \n\n    def _make_layer(self, block, planes, blocks, stride=1):\n        downsample = None\n        if stride != 1 or self.inplanes != planes * block.expansion:\n            downsample = nn.Sequential(nn.Conv2d(self.inplanes, planes * block.expansion,\n                                                 kernel_size=1, stride=stride, bias=False),\n                                       nn.BatchNorm2d(planes * block.expansion),)\n        layers = []\n        layers.append(block(self.inplanes, planes, stride, downsample))\n        self.inplanes = planes * block.expansion\n        for i in range(1, blocks):\n            layers.append(block(self.inplanes, planes))\n        return nn.Sequential(*layers)\n\n    def forward(self, x):\n        # H, W -> H/2, W/2\n        conv1 = self.convbnrelu1_1(x)\n        conv1 = self.convbnrelu1_2(conv1)\n        conv1 = self.convbnrelu1_3(conv1)\n\n        ## H/2, W/2 -> H/4, W/4\n        pool1 = F.max_pool2d(conv1, 3, 2, 1)\n\n        # H/4, W/4 -> H/16, W/16\n        rconv3 = self.res_block3(pool1)\n        conv4 = self.res_block5(rconv3)\n        conv5 = self.res_block6(conv4)\n        conv6 = self.res_block7(conv5)\n        conv6 = self.pyramid_pooling(conv6)\n\n        conv6x = F.upsample(conv6, [conv5.size()[2],conv5.size()[3]],mode='bilinear')\n        concat5 = torch.cat((conv5,self.upconv6[1](conv6x)),dim=1)\n        conv5 = self.iconv5(concat5) \n\n        conv5x = F.upsample(conv5, [conv4.size()[2],conv4.size()[3]],mode='bilinear')\n        concat4 = torch.cat((conv4,self.upconv5[1](conv5x)),dim=1)\n        conv4 = self.iconv4(concat4) \n\n        conv4x = F.upsample(conv4, [rconv3.size()[2],rconv3.size()[3]],mode='bilinear')\n        concat3 = torch.cat((rconv3,self.upconv4[1](conv4x)),dim=1)\n        conv3 = self.iconv3(concat3) \n\n        conv3x = F.upsample(conv3, [pool1.size()[2],pool1.size()[3]],mode='bilinear')\n        concat2 = torch.cat((pool1,self.upconv3[1](conv3x)),dim=1)\n        conv2 = self.iconv2(concat2) \n\n        if self.is_proj:\n            proj6 = self.proj6(conv6)\n            proj5 = self.proj5(conv5)\n            proj4 = self.proj4(conv4)\n            proj3 = self.proj3(conv3)\n            proj2 = self.proj2(conv2)\n            return proj6,proj5,proj4,proj3,proj2\n        else:\n            return conv6, conv5, conv4, conv3, conv2\n\n\nclass pspnet_s(nn.Module):\n    \"\"\"\n    Modified PSPNet.  https://github.com/meetshah1995/pytorch-semseg/blob/master/ptsemseg/models/pspnet.py\n    \"\"\"\n    def __init__(self, is_proj=True,groups=1):\n        super(pspnet_s, self).__init__()\n        self.inplanes = 32\n        self.is_proj = is_proj\n\n        # Encoder\n        self.convbnrelu1_1 = conv2DBatchNormRelu(in_channels=3, k_size=3, n_filters=16,\n                                                 padding=1, stride=2)\n        self.convbnrelu1_2 = conv2DBatchNormRelu(in_channels=16, k_size=3, n_filters=16,\n                                                 padding=1, stride=1)\n        self.convbnrelu1_3 = conv2DBatchNormRelu(in_channels=16, k_size=3, n_filters=32,\n                                                 padding=1, stride=1)\n        # Vanilla Residual Blocks\n        self.res_block3 = self._make_layer(residualBlock,64,1,stride=2)\n        self.res_block5 = self._make_layer(residualBlock,128,1,stride=2)\n        self.res_block6 = self._make_layer(residualBlock,128,1,stride=2)\n        self.res_block7 = self._make_layer(residualBlock,128,1,stride=2)\n        self.pyramid_pooling = pyramidPooling(128, levels=3)\n\n        # Iconvs\n        self.upconv6 = nn.Sequential(nn.Upsample(scale_factor=2),\n                                     conv2DBatchNormRelu(in_channels=128, k_size=3, n_filters=64,\n                                                 padding=1, stride=1))\n        self.iconv5 = conv2DBatchNormRelu(in_channels=192, k_size=3, n_filters=128,\n                                                 padding=1, stride=1)\n        self.upconv5 = nn.Sequential(nn.Upsample(scale_factor=2),\n                                     conv2DBatchNormRelu(in_channels=128, k_size=3, n_filters=64,\n                                                 padding=1, stride=1))\n        self.iconv4 = conv2DBatchNormRelu(in_channels=192, k_size=3, n_filters=128,\n                                                 padding=1, stride=1)\n        self.upconv4 = nn.Sequential(nn.Upsample(scale_factor=2),\n                                     conv2DBatchNormRelu(in_channels=128, k_size=3, n_filters=64,\n                                                 padding=1, stride=1))\n        self.iconv3 = conv2DBatchNormRelu(in_channels=128, k_size=3, n_filters=64,\n                                                 padding=1, stride=1)\n        #self.upconv3 = nn.Sequential(nn.Upsample(scale_factor=2),\n        #                             conv2DBatchNormRelu(in_channels=64, k_size=3, n_filters=32,\n        #                                         padding=1, stride=1))\n        #self.iconv2 = conv2DBatchNormRelu(in_channels=64, k_size=3, n_filters=64,\n        #                                         padding=1, stride=1)\n\n        if self.is_proj:\n            self.proj6 = conv2DBatchNormRelu(in_channels=128,k_size=1,n_filters=128//groups, padding=0,stride=1)\n            self.proj5 = conv2DBatchNormRelu(in_channels=128,k_size=1,n_filters=128//groups, padding=0,stride=1)\n            self.proj4 = conv2DBatchNormRelu(in_channels=128,k_size=1,n_filters=128//groups, padding=0,stride=1)\n            self.proj3 = conv2DBatchNormRelu(in_channels=64, k_size=1,n_filters=64//groups, padding=0,stride=1)\n            #self.proj2 = conv2DBatchNormRelu(in_channels=64, k_size=1,n_filters=64//groups, padding=0,stride=1)\n\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels\n                m.weight.data.normal_(0, math.sqrt(2. / n))\n                if hasattr(m.bias,'data'):\n                    m.bias.data.zero_()\n       \n\n    def _make_layer(self, block, planes, blocks, stride=1):\n        downsample = None\n        if stride != 1 or self.inplanes != planes * block.expansion:\n            downsample = nn.Sequential(nn.Conv2d(self.inplanes, planes * block.expansion,\n                                                 kernel_size=1, stride=stride, bias=False),\n                                       nn.BatchNorm2d(planes * block.expansion),)\n        layers = []\n        layers.append(block(self.inplanes, planes, stride, downsample))\n        self.inplanes = planes * block.expansion\n        for i in range(1, blocks):\n            layers.append(block(self.inplanes, planes))\n        return nn.Sequential(*layers)\n\n    def forward(self, x):\n        # H, W -> H/2, W/2\n        conv1 = self.convbnrelu1_1(x)\n        conv1 = self.convbnrelu1_2(conv1)\n        conv1 = self.convbnrelu1_3(conv1)\n\n        ## H/2, W/2 -> H/4, W/4\n        pool1 = F.max_pool2d(conv1, 3, 2, 1)\n\n        # H/4, W/4 -> H/16, W/16\n        rconv3 = self.res_block3(pool1)\n        conv4 = self.res_block5(rconv3)\n        conv5 = self.res_block6(conv4)\n        conv6 = self.res_block7(conv5)\n        conv6 = self.pyramid_pooling(conv6)\n\n        conv6x = F.upsample(conv6, [conv5.size()[2],conv5.size()[3]],mode='bilinear')\n        concat5 = torch.cat((conv5,self.upconv6[1](conv6x)),dim=1)\n        conv5 = self.iconv5(concat5) \n\n        conv5x = F.upsample(conv5, [conv4.size()[2],conv4.size()[3]],mode='bilinear')\n        concat4 = torch.cat((conv4,self.upconv5[1](conv5x)),dim=1)\n        conv4 = self.iconv4(concat4) \n\n        conv4x = F.upsample(conv4, [rconv3.size()[2],rconv3.size()[3]],mode='bilinear')\n        concat3 = torch.cat((rconv3,self.upconv4[1](conv4x)),dim=1)\n        conv3 = self.iconv3(concat3) \n\n        #conv3x = F.upsample(conv3, [pool1.size()[2],pool1.size()[3]],mode='bilinear')\n        #concat2 = torch.cat((pool1,self.upconv3[1](conv3x)),dim=1)\n        #conv2 = self.iconv2(concat2) \n\n        if self.is_proj:\n            proj6 = self.proj6(conv6)\n            proj5 = self.proj5(conv5)\n            proj4 = self.proj4(conv4)\n            proj3 = self.proj3(conv3)\n        #    proj2 = self.proj2(conv2)\n        #    return proj6,proj5,proj4,proj3,proj2\n            return proj6,proj5,proj4,proj3\n        else:\n        #    return conv6, conv5, conv4, conv3, conv2\n            return conv6, conv5, conv4, conv3\n\nclass bfmodule(nn.Module):\n    def __init__(self, inplanes, outplanes):\n        super(bfmodule, self).__init__()\n        self.proj = conv2DBatchNormRelu(in_channels=inplanes,k_size=1,n_filters=64,padding=0,stride=1)\n        self.inplanes = 64\n        # Vanilla Residual Blocks\n        self.res_block3 = self._make_layer(residualBlock,64,1,stride=2)\n        self.res_block5 = self._make_layer(residualBlock,64,1,stride=2)\n        self.res_block6 = self._make_layer(residualBlock,64,1,stride=2)\n        self.res_block7 = self._make_layer(residualBlock,128,1,stride=2)\n        self.pyramid_pooling = pyramidPooling(128, levels=3)\n        # Iconvs\n        self.upconv6 = conv2DBatchNormRelu(in_channels=128, k_size=3, n_filters=64,\n                                                 padding=1, stride=1)\n        self.upconv5 = conv2DBatchNormRelu(in_channels=64, k_size=3, n_filters=32,\n                                                 padding=1, stride=1)\n        self.upconv4 = conv2DBatchNormRelu(in_channels=64, k_size=3, n_filters=32,\n                                                 padding=1, stride=1)\n        self.upconv3 = conv2DBatchNormRelu(in_channels=64, k_size=3, n_filters=32,\n                                                 padding=1, stride=1)\n        self.iconv5 = conv2DBatchNormRelu(in_channels=128, k_size=3, n_filters=64,\n                                                 padding=1, stride=1)\n        self.iconv4 = conv2DBatchNormRelu(in_channels=96, k_size=3, n_filters=64,\n                                                 padding=1, stride=1)\n        self.iconv3 = conv2DBatchNormRelu(in_channels=96, k_size=3, n_filters=64,\n                                                 padding=1, stride=1)\n        self.iconv2 = nn.Sequential(conv2DBatchNormRelu(in_channels=96, k_size=3, n_filters=64,\n                                                 padding=1, stride=1),\n                                    nn.Conv2d(64, outplanes,kernel_size=3, stride=1, padding=1, bias=True))\n\n        self.proj6 = nn.Conv2d(128, outplanes,kernel_size=3, stride=1, padding=1, bias=True)\n        self.proj5 = nn.Conv2d(64, outplanes,kernel_size=3, stride=1, padding=1, bias=True)\n        self.proj4 = nn.Conv2d(64, outplanes,kernel_size=3, stride=1, padding=1, bias=True)\n        self.proj3 = nn.Conv2d(64, outplanes,kernel_size=3, stride=1, padding=1, bias=True)\n\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels\n                m.weight.data.normal_(0, math.sqrt(2. / n))\n                if hasattr(m.bias,'data'):\n                    m.bias.data.zero_()\n       \n\n    def _make_layer(self, block, planes, blocks, stride=1):\n        downsample = None\n        if stride != 1 or self.inplanes != planes * block.expansion:\n            downsample = nn.Sequential(nn.Conv2d(self.inplanes, planes * block.expansion,\n                                                 kernel_size=1, stride=stride, bias=False),\n                                       nn.BatchNorm2d(planes * block.expansion),)\n        layers = []\n        layers.append(block(self.inplanes, planes, stride, downsample))\n        self.inplanes = planes * block.expansion\n        for i in range(1, blocks):\n            layers.append(block(self.inplanes, planes))\n        return nn.Sequential(*layers)\n\n    def forward(self, x):\n        proj = self.proj(x) # 4x\n        rconv3 = self.res_block3(proj) #8x\n        conv4 = self.res_block5(rconv3) #16x\n        conv5 = self.res_block6(conv4) #32x\n        conv6 = self.res_block7(conv5) #64x\n        conv6 = self.pyramid_pooling(conv6) #64x\n        pred6 = self.proj6(conv6)\n\n        conv6u = F.upsample(conv6, [conv5.size()[2],conv5.size()[3]], mode='bilinear')\n        concat5 = torch.cat((conv5,self.upconv6(conv6u)),dim=1) \n        conv5 = self.iconv5(concat5) #32x\n        pred5 = self.proj5(conv5)\n\n        conv5u = F.upsample(conv5, [conv4.size()[2],conv4.size()[3]], mode='bilinear')\n        concat4 = torch.cat((conv4,self.upconv5(conv5u)),dim=1)\n        conv4 = self.iconv4(concat4) #16x\n        pred4 = self.proj4(conv4)\n\n        conv4u = F.upsample(conv4, [rconv3.size()[2],rconv3.size()[3]], mode='bilinear')\n        concat3 = torch.cat((rconv3,self.upconv4(conv4u)),dim=1)\n        conv3 = self.iconv3(concat3) # 8x\n        pred3 = self.proj3(conv3)\n\n        conv3u = F.upsample(conv3, [x.size()[2],x.size()[3]], mode='bilinear')\n        concat2 = torch.cat((proj,self.upconv3(conv3u)),dim=1)\n        pred2 = self.iconv2(concat2)  # 4x\n\n        return pred2, pred3, pred4, pred5, pred6\n\nclass bfmodule_feat(nn.Module):\n    def __init__(self, inplanes, outplanes):\n        super(bfmodule_feat, self).__init__()\n        self.proj = conv2DBatchNormRelu(in_channels=inplanes,k_size=1,n_filters=64,padding=0,stride=1)\n        self.inplanes = 64\n        # Vanilla Residual Blocks\n        self.res_block3 = self._make_layer(residualBlock,64,1,stride=2)\n        self.res_block5 = self._make_layer(residualBlock,64,1,stride=2)\n        self.res_block6 = self._make_layer(residualBlock,64,1,stride=2)\n        self.res_block7 = self._make_layer(residualBlock,128,1,stride=2)\n        self.pyramid_pooling = pyramidPooling(128, levels=3)\n        # Iconvs\n        self.upconv6 = conv2DBatchNormRelu(in_channels=128, k_size=3, n_filters=64,\n                                                 padding=1, stride=1)\n        self.upconv5 = conv2DBatchNormRelu(in_channels=64, k_size=3, n_filters=32,\n                                                 padding=1, stride=1)\n        self.upconv4 = conv2DBatchNormRelu(in_channels=64, k_size=3, n_filters=32,\n                                                 padding=1, stride=1)\n        self.upconv3 = conv2DBatchNormRelu(in_channels=64, k_size=3, n_filters=32,\n                                                 padding=1, stride=1)\n        self.iconv5 = conv2DBatchNormRelu(in_channels=128, k_size=3, n_filters=64,\n                                                 padding=1, stride=1)\n        self.iconv4 = conv2DBatchNormRelu(in_channels=96, k_size=3, n_filters=64,\n                                                 padding=1, stride=1)\n        self.iconv3 = conv2DBatchNormRelu(in_channels=96, k_size=3, n_filters=64,\n                                                 padding=1, stride=1)\n        self.iconv2 = conv2DBatchNormRelu(in_channels=96, k_size=3, n_filters=64,\n                                                 padding=1, stride=1)\n\n        self.proj6 = nn.Conv2d(128, outplanes,kernel_size=3, stride=1, padding=1, bias=True)\n        self.proj5 = nn.Conv2d(64, outplanes,kernel_size=3, stride=1, padding=1, bias=True)\n        self.proj4 = nn.Conv2d(64, outplanes,kernel_size=3, stride=1, padding=1, bias=True)\n        self.proj3 = nn.Conv2d(64, outplanes,kernel_size=3, stride=1, padding=1, bias=True)\n        self.proj2 = nn.Conv2d(64, outplanes,kernel_size=3, stride=1, padding=1, bias=True)\n\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels\n                m.weight.data.normal_(0, math.sqrt(2. / n))\n                if hasattr(m.bias,'data'):\n                    m.bias.data.zero_()\n       \n\n    def _make_layer(self, block, planes, blocks, stride=1):\n        downsample = None\n        if stride != 1 or self.inplanes != planes * block.expansion:\n            downsample = nn.Sequential(nn.Conv2d(self.inplanes, planes * block.expansion,\n                                                 kernel_size=1, stride=stride, bias=False),\n                                       nn.BatchNorm2d(planes * block.expansion),)\n        layers = []\n        layers.append(block(self.inplanes, planes, stride, downsample))\n        self.inplanes = planes * block.expansion\n        for i in range(1, blocks):\n            layers.append(block(self.inplanes, planes))\n        return nn.Sequential(*layers)\n\n    def forward(self, x):\n        proj = self.proj(x) # 4x\n        rconv3 = self.res_block3(proj) #8x\n        conv4 = self.res_block5(rconv3) #16x\n        conv5 = self.res_block6(conv4) #32x\n        conv6 = self.res_block7(conv5) #64x\n        conv6 = self.pyramid_pooling(conv6) #64x\n        pred6 = self.proj6(conv6)\n\n        conv6u = F.upsample(conv6, [conv5.size()[2],conv5.size()[3]], mode='bilinear')\n        concat5 = torch.cat((conv5,self.upconv6(conv6u)),dim=1) \n        conv5 = self.iconv5(concat5) #32x\n        pred5 = self.proj5(conv5)\n\n        conv5u = F.upsample(conv5, [conv4.size()[2],conv4.size()[3]], mode='bilinear')\n        concat4 = torch.cat((conv4,self.upconv5(conv5u)),dim=1)\n        conv4 = self.iconv4(concat4) #16x\n        pred4 = self.proj4(conv4)\n\n        conv4u = F.upsample(conv4, [rconv3.size()[2],rconv3.size()[3]], mode='bilinear')\n        concat3 = torch.cat((rconv3,self.upconv4(conv4u)),dim=1)\n        conv3 = self.iconv3(concat3) # 8x\n        pred3 = self.proj3(conv3)\n\n        conv3u = F.upsample(conv3, [x.size()[2],x.size()[3]], mode='bilinear')\n        concat2 = torch.cat((proj,self.upconv3(conv3u)),dim=1)\n        conv2 = self.iconv2(concat2)  # 4x\n        pred2 = self.proj2(conv2)  # 4x\n        return pred2, conv2\n\n\ndef compute_geo_costs(rot, trans, Ex, Kinv, hp0, hp1, tau, Kinv_n=None):\n    if Kinv_n is None: Kinv_n = Kinv\n    R01 = kornia.angle_axis_to_rotation_matrix(rot)\n    H01 = Kinv.inverse().matmul(R01).matmul(Kinv_n)\n    comp_hp1 = H01.matmul(hp1.permute(0,2,1))\n    foe = (comp_hp1-tau*hp0.permute(0,2,1))\n    parallax3d = Kinv.matmul(foe)\n    p3dmag = parallax3d.norm(2,1)[:,np.newaxis]\n    parallax2d = (comp_hp1/comp_hp1[:,-1:]-hp0.permute(0,2,1))[:,:2]\n    p2dmag = parallax2d.norm(2,1)[:,np.newaxis]\n    p2dnorm = parallax2d / (1e-9+p2dmag)\n    foe_cam = Kinv.inverse().matmul(trans[:,:,np.newaxis])\n    foe_cam = foe_cam[:,:2] / (1e-9+foe_cam[:,-1:])\n    direct = foe_cam -hp0.permute(0,2,1)[:,:2]\n    directn = direct / (1e-9+direct.norm(2,1)[:,np.newaxis])\n\n    # cost metrics: 0) R-homography+symterr; 1) sampson 2) 2D angular (P+P) 3) 3D distance 4) 3D angular (P+P)\n    ##TODO validate\n    comp_hp0 = H01.inverse().matmul(hp0.permute(0,2,1))\n    mcost00 = parallax2d.norm(2,1)\n    mcost01 = (comp_hp0/comp_hp0[:,-1:] - hp1.permute(0,2,1))[:,:2].norm(2,1)\n    mcost1 = sampson_err(Kinv.matmul(hp0.permute(0,2,1)),\n                         Kinv_n.matmul(hp1.permute(0,2,1)),Ex.cuda().permute(0,2,1))  # variable K\n    mcost2 = -(trans[:,-1:,np.newaxis]).sign()*(directn*p2dnorm).sum(1,keepdims=True)\n    mcost4 = -(trans[:,:,np.newaxis]*parallax3d).sum(1,keepdims=True)/(p3dmag+1e-9)\n    mcost3 = torch.clamp(1-mcost4.pow(2),0,1).sqrt()*p3dmag*mcost4.sign()\n    mcost10 = torch.clamp(1-mcost2.pow(2),0,1).sqrt()*p2dmag*mcost2.sign()\n    return mcost00, mcost01, mcost1, mcost2, mcost3, mcost4, p3dmag, mcost10\n\ndef get_skew_mat(transx,rotx):\n    rot = kornia.angle_axis_to_rotation_matrix(rotx)\n    trans = -rot.permute(0,2,1).matmul(transx[:,:,np.newaxis])[:,:,0]\n    rot = rot.permute(0,2,1)\n    tx = torch.zeros(transx.shape[0],3,3)\n    tx[:,0,1] = -transx[:,2]\n    tx[:,0,2] = transx[:,1]\n    tx[:,1,0] = transx[:,2]\n    tx[:,1,2] = -transx[:,0]\n    tx[:,2,0] = -transx[:,1]\n    tx[:,2,1] = transx[:,0]\n    return rot.matmul(tx)\n\ndef sampson_err(x1h, x2h, F):\n    l2 = F.permute(0,2,1).matmul(x1h)\n    l1 = F.matmul(x2h)\n    algdis = (l1 * x1h).sum(1)\n    dis = algdis**2 /  (1e-9+l1[:,0]**2+l1[:,1]**2+l2[:,0]**2+l2[:,1]**2)\n    return dis\n\n\ndef get_intrinsics(intr, noise=False):\n    f =  intr[0].float()\n    cx = intr[1].float()\n    cy = intr[2].float()\n    if len(intr)>10:  # test time\n        dfx= intr[10].float()\n        dfy= intr[11].float()\n        dfx = 1.\n        dfy = 1.\n    else:  # train time\n        dfx = 1.\n        dfy = 1.\n    bs = f.shape[0]\n\n    delta = 1e-4\n    if noise:\n        fo = f.clone()\n        cxo = cx.clone()\n        cyo = cy.clone()\n        f = torch.Tensor(np.random.normal(loc=0., scale=delta,size=(bs,))).cuda().exp() * fo\n        cx = torch.Tensor(np.random.normal(loc=0.,scale=delta,size=(bs,))).cuda().exp() * cxo\n        cy = torch.Tensor(np.random.normal(loc=0.,scale=delta,size=(bs,))).cuda().exp() * cyo\n\n    #Kinv = torch.Tensor(np.eye(3)[np.newaxis]).cuda().repeat(bs,1,1)\n    #Kinv[:,2,2] *= f\n    #Kinv[:,0,2] -= cx\n    #Kinv[:,1,2] -= cy\n    #Kinv /= f[:,np.newaxis,np.newaxis] #4,3,3\n\n    Kinv = torch.Tensor(np.eye(3)[np.newaxis]).cuda().repeat(bs,1,1)\n    Kinv[:,0,0] =  f/dfx\n    Kinv[:,1,1] =  f/dfy\n    Kinv[:,0,2] = cx/dfx\n    Kinv[:,1,2] = cy/dfy\n    Kinv = Kinv.inverse()\n\n    Taug = torch.cat(intr[4:10],-1).view(-1,bs).T # 4,6\n    Taug = torch.cat((Taug.view(bs,3,2).permute(0,2,1),Kinv[:,2:3]),1)\n    Kinv = Kinv.matmul(Taug)\n    if len(intr)>12:\n        Kinv_n = torch.Tensor(np.eye(3)[np.newaxis]).cuda().repeat(bs,1,1)\n        fn = intr[12].float()\n        #Kinv_n[:,2,2] *= fn\n        #Kinv_n[:,0,2] -= cx\n        #Kinv_n[:,1,2] -= cy\n        #Kinv_n /= fn[:,np.newaxis,np.newaxis] #4,3,3\n        Kinv_n = torch.Tensor(np.eye(3)[np.newaxis]).cuda().repeat(bs,1,1)\n        Kinv_n[:,0,0] = fn/dfx\n        Kinv_n[:,1,1] = fn/dfy\n        Kinv_n[:,0,2] = cx/dfx\n        Kinv_n[:,1,2] = cy/dfy\n        Kinv_n = Kinv_n.inverse()\n\n    elif noise:\n        f = torch.Tensor(np.random.normal(loc=0., scale=delta,size=(bs,))).cuda().exp() * fo\n        cx = torch.Tensor(np.random.normal(loc=0.,scale=delta,size=(bs,))).cuda().exp() * cxo\n        cy = torch.Tensor(np.random.normal(loc=0.,scale=delta,size=(bs,))).cuda().exp() * cyo\n\n        Kinv_n = torch.Tensor(np.eye(3)[np.newaxis]).cuda().repeat(bs,1,1)\n        Kinv_n[:,2,2] *= f\n        Kinv_n[:,0,2] -= cx\n        Kinv_n[:,1,2] -= cy\n        Kinv_n /= f[:,np.newaxis,np.newaxis] #4,3,3\n\n        Taug = torch.cat(intr[4:10],-1).view(-1,bs).T # 4,6\n        Taug = torch.cat((Taug.view(bs,3,2).permute(0,2,1),Kinv_n[:,2:3]),1)\n        Kinv_n = Kinv_n.matmul(Taug)\n    else:\n        Kinv_n = Kinv\n\n    return Kinv, Kinv_n\n\ndef testEss(K0,K1,R,T,p1,p2):\n    import cv2\n    testP = cv2.triangulatePoints(K0.dot(np.concatenate( (np.eye(3),np.zeros((3,1))), -1)), \n                          K1.dot(np.concatenate( (R,T), -1)), \n                          p1[:2],p2[:2])\n    Z1 = testP[2,:]/testP[-1,:]\n    Z2 = (R.dot(Z1*np.linalg.inv(K0).dot(p1))+T)[-1,:]\n    if ((Z1>0).sum() > (Z1<=0).sum()) and ((Z2>0).sum() > (Z2<=0).sum()):\n        #print(Z1)\n        #print(Z2)\n        return True\n    else:\n        return False\n"
  },
  {
    "path": "README.md",
    "content": "# DytanVO: Joint Refinement of Visual Odometry and Motion Segmentation in Dynamic Environments\r\n\r\n<p align=\"center\">\r\n    <a href=\"https://ieeexplore.ieee.org/abstract/document/10161306\"><img src=\"https://img.shields.io/badge/ICRA-2023-yellow?logo=ieee\"></a>\r\n    <a href=\"https://arxiv.org/abs/2209.08430\"><img src=\"https://img.shields.io/badge/arXiv-2209.08430-b31b1b\"></a>\r\n    <a href=\"https://youtu.be/6yO7RsZjSBQ\"><img src=\"https://img.shields.io/badge/Video-Demo-critical?logo=youtube\"></a>\r\n    <a href=\"https://github.com/castacks/DytanVO/blob/main/LICENSE\"><img src=\"https://img.shields.io/badge/License-BSD%203--Clause-blue.svg\"></a>\r\n</p>\r\n<p align=\"center\">\r\n\tDytanVO: Joint Refinement of Visual Odometry and Motion Segmentation in Dynamic Environments (ICRA 2023)<br>\r\n  By\r\n  <a href=\"https://github.com/Geniussh/\">Shihao Shen</a>, \r\n  <a href=\"http://missinglight.github.io/\">Yilin Cai</a>, \r\n  <a href=\"http://www.wangwenshan.com/\">Wenshan Wang</a>, and \r\n  <a href=\"https://theairlab.org/team/sebastian/\">Sebastian Scherer</a>.\r\n</p>\r\n\r\n### What's new.\r\n\r\n- 01-17-2023: Our paper has been accepted to ICRA 2023!\r\n\r\n- 01-05-2023: Clean up and upload the codebase for _DytanVO_. Pretrained weights and datasets are also ready.\r\n\r\n- 09-20-2022: Remove _Dynamic Dense RGB-D SLAM with Learning-Based Visual Odometry_. The repo will be used to release codebase for the most recent ICRA 2023 submission.\r\n\r\n\r\n## Introduction\r\nDytanVO is a learning-based visual odometry (VO) based on its precursor, [TartanVO](https://github.com/castacks/tartanvo). It is the first supervised learning-based VO method that deals with dynamic environments. It takes two consecutive monocular frames in real-time and predicts camera ego-motion in an iterative fashion. It achieves an average improvement of 27.7% over state-of-the-art VO solutions in real-world dynamic environments, and even performs competitively among dynamic visual SLAM systems which optimize the trajectory on the backend. Experiments on plentiful unseen environments also demonstrate its generalizability.\r\n\r\n\r\n## Installation\r\nWe provide an environment file using [anaconda](https://www.anaconda.com/). The code has been tested on an RTX 2080Ti with CUDA 11.4.\r\n```bash\r\nconda env create -f environment.yml\r\nconda activate dytanvo\r\n```\r\n\r\nCompile [DCNv2](https://github.com/MatthewHowe/DCNv2).\r\n```\r\ncd Network/rigidmask/networks/DCNv2/; python setup.py install; cd -\r\n```\r\n\r\n\r\n## Models and Data\r\n\r\n### Pretrained weights\r\nDownload [here](https://drive.google.com/file/d/1ujYmKv5FHXYe1KETabTnSs-R2OE0KJV3/view?usp=share_link) and unzip it to the `models` folder. \r\n\r\n### KITTI dynamic sequences\r\nOriginal sequences in [KITTI Odometry](https://www.cvlibs.net/datasets/kitti/eval_odometry.php) are trimmed into sub-sequences which contain moving pedestrians, vehicles and cyclists so that VO's robustness to dynamic objects can be explicitly evaluated. Download [DynaKITTI](https://drive.google.com/file/d/1BDnraRWzNf938UsfprWIkcqCSfOUyGt9/view?usp=share_link) and unzip it to the `data` folder. Please cite this paper if you find it useful in your work. \r\n\r\n### AirDOS-Shibuya\r\nFollow [tartanair-shibuya](https://github.com/haleqiu/tartanair-shibuya) and download it to the `data` folder.\r\n\r\n### (Optional) Scene Flow\r\nOne can also test the model on [Scene Flow datasets](https://lmb.informatik.uni-freiburg.de/resources/datasets/SceneFlowDatasets.en.html), which was used to train both the VO and the segmentation networks. Scene Flow datasets have very challenging sequences with large areas of dynamic objects in image frames. \r\n\r\nYou can create symbolic links to wherever the datasets were downloaded in the `data` folder.\r\n\r\n```Shell\r\n├── data\r\n    ├── AirDOS_shibuya\r\n        ├── RoadCrossing03\r\n            ├── image_0\r\n            ├── ...\r\n            ├── gt_pose.txt\r\n        ├── RoadCrossing04\r\n        ├── ...\r\n    ├── DynaKITTI\r\n        ├── 00_1\r\n            ├── image_2\r\n            ├── ...\r\n            ├── pose_left.txt\r\n            ├── calib.txt\r\n        ├── 01_0\r\n        ├── ...\r\n    ├── SceneFlow\r\n        ├── FlyThings3D\r\n            ├── frames_cleanpass\r\n            ├── frames_finalpass\r\n            ├── optical_flow\r\n            ├── camera_data\r\n        ├── Driving\r\n        ├── Monkaa\r\n    ├── ...\r\n```\r\n\r\n\r\n## Evaluation\r\nCreate a folder to save output flow, segmentation, or poses. \r\n```bash\r\nmkdir results\r\n```\r\n\r\n### Dynamic sequences in KITTI (loading the finetuned VO model at once)\r\n```bash\r\ntraj=00_1\r\npython -W ignore::UserWarning vo_trajectory_from_folder.py --vo-model-name vonet_ft.pkl  \\\r\n\t\t\t\t\t\t\t   --seg-model-name segnet-kitti.pth  \\\r\n\t\t\t\t\t\t\t   --kitti --kitti-intrinsics-file data/DynaKITTI/$traj/calib.txt  \\\r\n\t\t\t\t\t\t\t   --test-dir data/DynaKITTI/$traj/image_2  \\\r\n\t\t\t\t\t\t\t   --pose-file data/DynaKITTI/$traj/pose_left.txt \r\n```\r\n\r\n### AirDOS-Shibuya (loading FlowNet and PoseNet separately)\r\n```bash\r\ntraj=RoadCrossing03\r\npython -W ignore::UserWarning vo_trajectory_from_folder.py --flow-model-name flownet.pkl  \\\r\n\t\t\t\t\t\t\t   --pose-model-name posenet.pkl  \\\r\n\t\t\t\t\t\t\t   --seg-model segnet-sf.pth  \\\r\n\t\t\t\t\t\t\t   --airdos  \\\r\n\t\t\t\t\t\t\t   --test-dir data/AirDOS_shibuya/$traj/image_0  \\\r\n\t\t\t\t\t\t\t   --pose-file data/AirDOS_shibuya/$traj/gt_pose.txt \r\n```\r\n\r\n### Scene Flow\r\n```bash\r\nimg=Driving/frames_finalpass/15mm_focallength/scene_forwards/fast/left\r\npose=Driving/camera_data/15mm_focallength/scene_forwards/fast/camera_data.txt\r\npython -W ignore::UserWarning vo_trajectory_from_folder.py --flow-model-name flownet.pkl  \\\r\n\t\t\t\t\t\t\t   --pose-model-name posenet.pkl  \\\r\n\t\t\t\t\t\t\t   --seg-model segnet-sf.pth  \\\r\n\t\t\t\t\t\t\t   --sceneflow  \\\r\n\t\t\t\t\t\t\t   --test-dir data/SceneFlow/$img  \\\r\n\t\t\t\t\t\t\t   --pose-file data/SceneFlow/$pose\r\n```\r\n\r\nAdd `--save-flow` tag to save intermediate optical flow outputs into the `results` folder.\r\n\r\nAdjust the batch size and the worker number by `--batch-size 10`, `--worker-num 5`. \r\n\r\n\r\n## (Optional) Segmentation Mask Ground Truth\r\nIf your dataset has ground truth for camera motion, optical flow and disparity change across consecutive frames, we provide an example script to automatically generate ground truth of segmentation mask given these two modalities based on the pure geometry for the [Scene Flow datasets](https://lmb.informatik.uni-freiburg.de/resources/datasets/SceneFlowDatasets.en.html). \r\n\r\n```bash\r\npython Datasets/segmask_gt.py --database data/SceneFlow --frames_pass clean --dataset FlyingThings3D\r\n```\r\n\r\nAdd `--debug` flag to save visualizations of the generated masks.\r\n\r\n## Citation\r\nIf you find our code, paper or dataset useful, please cite\r\n```bibtex\r\n@inproceedings{shen2023dytanvo,\r\n  title={Dytanvo: Joint refinement of visual odometry and motion segmentation in dynamic environments},\r\n  author={Shen, Shihao and Cai, Yilin and Wang, Wenshan and Scherer, Sebastian},\r\n  booktitle={2023 IEEE International Conference on Robotics and Automation (ICRA)},\r\n  pages={4048--4055},\r\n  year={2023},\r\n  organization={IEEE}\r\n}\r\n```\r\n\r\n## Acknowledgement\r\nWe built DytanVO on top of [TartanVO](https://github.com/castacks/tartanvo). We implemented the segmentation network by adapting [rigidmask](https://github.com/gengshan-y/rigidmask). We thank [Gengshan Yang](https://gengshan-y.github.io/) for his code and suggestions. \r\n\r\n## License\r\nThis software is BSD licensed.\r\n\r\nCopyright (c) 2020, Carnegie Mellon University All rights reserved.\r\n\r\nRedistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:\r\n\r\nRedistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.\r\n\r\nRedistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.\r\n\r\nNeither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.\r\n\r\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\r\n"
  },
  {
    "path": "environment.yml",
    "content": "name: dytanvo\nchannels:\n  - pytorch3d\n  - pytorch\n  - conda-forge\n  - defaults\ndependencies:\n  - _libgcc_mutex=0.1=main\n  - blas=1.0=mkl\n  - bzip2=1.0.8=h516909a_3\n  - ca-certificates=2022.5.18.1=ha878542_0\n  - cairo=1.14.12=h8948797_3\n  - certifi=2022.5.18.1=py38h578d9bd_0\n  - colorama=0.4.4=pyh9f0ad1d_0\n  - cudatoolkit=10.2.89=hfd86e86_1\n  - cudatoolkit-dev=11.4.0=py38h497a2fe_1\n  - cycler=0.10.0=py38_0\n  - dbus=1.13.18=hb2f20db_0\n  - expat=2.2.10=he6710b0_2\n  - ffmpeg=4.0.2=ha0c5888_2\n  - fontconfig=2.13.0=h9420a91_0\n  - freeglut=3.2.1=h58526e2_0\n  - freetype=2.10.4=h5ab3b9f_0\n  - glib=2.63.1=h5a9c865_0\n  - gmp=6.1.2=hf484d3e_1000\n  - gnutls=3.5.19=h2a4e5f8_1\n  - graphite2=1.3.13=h58526e2_1001\n  - gst-plugins-base=1.14.0=hbbd80ab_1\n  - gstreamer=1.14.0=hb453b48_1\n  - harfbuzz=1.8.8=hffaf4a1_0\n  - hdf5=1.10.2=hc401514_3\n  - icu=58.2=he6710b0_3\n  - intel-openmp=2020.2=254\n  - jasper=2.0.14=h07fcdf6_1\n  - jpeg=9b=h024ee3a_2\n  - kiwisolver=1.3.0=py38h2531618_0\n  - kornia=0.5.3=pyhd8ed1ab_0\n  - lcms2=2.11=h396b838_0\n  - ld_impl_linux-64=2.33.1=h53a641e_7\n  - libedit=3.1.20191231=h14c3975_1\n  - libffi=3.2.1=hf484d3e_1007\n  - libgcc-ng=9.1.0=hdf63c60_0\n  - libgfortran=3.0.0=1\n  - libgfortran-ng=7.3.0=hdf63c60_0\n  - libglu=9.0.0=he1b5a44_1001\n  - libiconv=1.16=h516909a_0\n  - libopencv=3.4.2=hb342d67_1\n  - libpng=1.6.37=hbc83047_0\n  - libstdcxx-ng=9.1.0=hdf63c60_0\n  - libtiff=4.1.0=h2733197_1\n  - libuuid=1.0.3=h1bed415_2\n  - libuv=1.40.0=h7b6447c_0\n  - libxcb=1.14=h7b6447c_0\n  - libxml2=2.9.10=hb55368b_3\n  - lz4-c=1.9.2=heb0550a_3\n  - matplotlib=3.3.2=0\n  - matplotlib-base=3.3.2=py38h817c723_0\n  - mkl=2020.2=256\n  - mkl-service=2.3.0=py38he904b0f_0\n  - mkl_fft=1.2.0=py38h23d657b_0\n  - mkl_random=1.1.1=py38h0573a6f_0\n  - ncurses=6.2=he6710b0_1\n  - nettle=3.3=0\n  - ninja=1.10.2=py38hff7bd54_0\n  - olefile=0.46=py_0\n  - openh264=1.8.0=hdbcaa40_1000\n  - openssl=1.1.1h=h516909a_0\n  - pcre=8.44=he6710b0_0\n  - pillow=8.0.1=py38he98fc37_0\n  - pip=20.3=py38h06a4308_0\n  - pixman=0.40.0=h36c2ea0_0\n  - pyparsing=2.4.7=py_0\n  - pyqt=5.9.2=py38h05f1152_4\n  - python=3.8.3=cpython_he5300dc_0\n  - python-dateutil=2.8.2=pyhd8ed1ab_0\n  - python_abi=3.8=1_cp38\n  - pytorch=1.7.0=py3.8_cuda10.2.89_cudnn7.6.5_0\n  - pytorch3d=0.3.0=py38_cu102_pyt170\n  - pyyaml=5.3.1=py38h8df0ef7_1\n  - qt=5.9.7=h5867ecd_1\n  - readline=8.0=h7b6447c_0\n  - scipy=1.5.2=py38h0b6359f_0\n  - setuptools=50.3.2=py38h06a4308_2\n  - sip=4.19.13=py38he6710b0_0\n  - six=1.15.0=py38h06a4308_0\n  - sqlite=3.33.0=h62c20be_0\n  - termcolor=1.1.0=py_2\n  - tk=8.6.10=hbc83047_0\n  - torchaudio=0.7.0=py38\n  - torchvision=0.8.1=py38_cu102\n  - tornado=6.1=py38h27cfd23_0\n  - typing_extensions=4.3.0=pyha770c72_0\n  - wheel=0.36.0=pyhd3eb1b0_0\n  - x264=1!152.20180806=h14c3975_0\n  - xorg-fixesproto=5.0=h14c3975_1002\n  - xorg-inputproto=2.3.2=h14c3975_1002\n  - xorg-kbproto=1.0.7=h14c3975_1002\n  - xorg-libx11=1.6.12=h516909a_0\n  - xorg-libxau=1.0.9=h14c3975_0\n  - xorg-libxext=1.3.4=h516909a_0\n  - xorg-libxfixes=5.0.3=h516909a_1004\n  - xorg-libxi=1.7.10=h516909a_0\n  - xorg-xextproto=7.3.0=h14c3975_1002\n  - xorg-xproto=7.0.31=h14c3975_1007\n  - xz=5.2.5=h7b6447c_0\n  - yacs=0.1.8=pyhd8ed1ab_0\n  - yaml=0.2.5=h516909a_0\n  - zlib=1.2.11=h7b6447c_3\n  - zstd=1.4.5=h9ceee32_0\n  - pip:\n    - absl-py==0.11.0\n    - antlr4-python3-runtime==4.9.3\n    - appdirs==1.4.4\n    - beautifulsoup4==4.11.1\n    - black==21.4b2\n    - cachetools==4.1.1\n    - chardet==3.0.4\n    - charset-normalizer==2.1.1\n    - cloudpickle==1.6.0\n    - cupy-cuda102==11.1.0\n    - cython==0.29.21\n    - data==0.4\n    - dataclasses==0.6\n    - dcnv2==0.1\n    - decorator==5.1.1\n    - detectron2==0.5+cu102\n    - fastrlock==0.8\n    - filelock==3.8.0\n    - funcsigs==1.0.2\n    - future==0.18.2\n    - fvcore==0.1.2.post20201122\n    - gdown==4.5.1\n    - google-auth==1.23.0\n    - google-auth-oauthlib==0.4.2\n    - grpcio==1.34.0\n    - hydra-core==1.2.0\n    - idna==2.10\n    - imageio==2.9.0\n    - importlib-resources==5.9.0\n    - iopath==0.1.8\n    - joblib==0.17.0\n    - jsonpatch==1.32\n    - jsonpointer==2.3\n    - latex==0.7.0\n    - lxml==4.9.1\n    - markdown==3.3.3\n    - mypy-extensions==0.4.3\n    - ngransac==0.0.0\n    - numpy==1.23.2\n    - oauthlib==3.1.0\n    - omegaconf==2.2.3\n    - opencv-python==4.4.0.46\n    - packaging==21.3\n    - pathspec==0.10.1\n    - portalocker==2.0.0\n    - protobuf==3.14.0\n    - pyasn1==0.4.8\n    - pyasn1-modules==0.2.8\n    - pycocotools==2.0.4\n    - pydot==1.4.1\n    - pypng==0.0.20\n    - pysocks==1.7.1\n    - pytransform3d==1.14.0\n    - pyzmq==23.2.1\n    - regex==2022.8.17\n    - requests==2.25.0\n    - requests-oauthlib==1.3.0\n    - rsa==4.6\n    - shutilwhich==1.1.0\n    - soupsieve==2.3.2.post1\n    - splines==0.2.0\n    - tabulate==0.8.7\n    - tempdir==0.7.1\n    - tensorboard==2.4.0\n    - tensorboard-data-server==0.6.1\n    - tensorboard-plugin-wit==1.7.0\n    - timm==0.6.7\n    - toml==0.10.2\n    - torchfile==0.1.0\n    - tqdm==4.54.0\n    - trimesh==3.9.3\n    - urllib3==1.26.2\n    - visdom==0.1.8.9\n    - websocket-client==1.4.0\n    - werkzeug==1.0.1\n    - workflow==1.0\n    - zipp==3.8.1\nprefix: /home/shihao/miniconda3/envs/rigidmask_v0\n"
  },
  {
    "path": "evaluator/__init__.py",
    "content": ""
  },
  {
    "path": "evaluator/evaluate_ate_scale.py",
    "content": "#!/usr/bin/python\n\n# Modified by Wenshan Wang\n# Modified by Raul Mur-Artal\n# Automatically compute the optimal scale factor for monocular VO/SLAM.\n\n# Software License Agreement (BSD License)\n#\n# Copyright (c) 2013, Juergen Sturm, TUM\n# All rights reserved.\n#\n# Redistribution and use in source and binary forms, with or without\n# modification, are permitted provided that the following conditions\n# are met:\n#\n#  * Redistributions of source code must retain the above copyright\n#    notice, this list of conditions and the following disclaimer.\n#  * Redistributions in binary form must reproduce the above\n#    copyright notice, this list of conditions and the following\n#    disclaimer in the documentation and/or other materials provided\n#    with the distribution.\n#  * Neither the name of TUM nor the names of its\n#    contributors may be used to endorse or promote products derived\n#    from this software without specific prior written permission.\n#\n# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n# \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS\n# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE\n# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,\n# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,\n# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\n# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\n# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT\n# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN\n# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\n# POSSIBILITY OF SUCH DAMAGE.\n#\n# Requirements: \n# sudo apt-get install python-argparse\n\n\"\"\"\nThis script computes the absolute trajectory error from the ground truth\ntrajectory and the estimated trajectory.\n\"\"\"\n\nimport numpy\n\ndef align(model,data,calc_scale=False):\n    \"\"\"Align two trajectories using the method of Horn (closed-form).\n    \n    Input:\n    model -- first trajectory (3xn)\n    data -- second trajectory (3xn)\n    \n    Output:\n    rot -- rotation matrix (3x3)\n    trans -- translation vector (3x1)\n    trans_error -- translational error per point (1xn)\n    \n    \"\"\"\n    numpy.set_printoptions(precision=3,suppress=True)\n    model_zerocentered = model - model.mean(1)\n    data_zerocentered = data - data.mean(1)\n    \n    W = numpy.zeros( (3,3) )\n    for column in range(model.shape[1]):\n        W += numpy.outer(model_zerocentered[:,column],data_zerocentered[:,column])\n    U,d,Vh = numpy.linalg.linalg.svd(W.transpose())\n    S = numpy.matrix(numpy.identity( 3 ))\n    if(numpy.linalg.det(U) * numpy.linalg.det(Vh)<0):\n        S[2,2] = -1\n    rot = U*S*Vh\n\n    if calc_scale:\n        rotmodel = rot*model_zerocentered\n        dots = 0.0\n        norms = 0.0\n        for column in range(data_zerocentered.shape[1]):\n            dots += numpy.dot(data_zerocentered[:,column].transpose(),rotmodel[:,column])\n            normi = numpy.linalg.norm(model_zerocentered[:,column])\n            norms += normi*normi\n        # s = float(dots/norms)  \n        s = float(norms/dots)\n    else:\n        s = 1.0  \n\n    # trans = data.mean(1) - s*rot * model.mean(1)\n    # model_aligned = s*rot * model + trans\n    # alignment_error = model_aligned - data\n\n    # scale the est to the gt, otherwise the ATE could be very small if the est scale is small\n    trans = s*data.mean(1) - rot * model.mean(1)\n    model_aligned = rot * model + trans\n    data_alingned = s * data\n    alignment_error = model_aligned - data_alingned\n    \n    trans_error = numpy.sqrt(numpy.sum(numpy.multiply(alignment_error,alignment_error),0)).A[0]\n        \n    return rot,trans,trans_error, s\n\ndef plot_traj(ax,stamps,traj,style,color,label):\n    \"\"\"\n    Plot a trajectory using matplotlib. \n    \n    Input:\n    ax -- the plot\n    stamps -- time stamps (1xn)\n    traj -- trajectory (3xn)\n    style -- line style\n    color -- line color\n    label -- plot legend\n    \n    \"\"\"\n    stamps.sort()\n    interval = numpy.median([s-t for s,t in zip(stamps[1:],stamps[:-1])])\n    x = []\n    y = []\n    last = stamps[0]\n    for i in range(len(stamps)):\n        if stamps[i]-last < 2*interval:\n            x.append(traj[i][0])\n            y.append(traj[i][1])\n        elif len(x)>0:\n            ax.plot(x,y,style,color=color,label=label)\n            label=\"\"\n            x=[]\n            y=[]\n        last= stamps[i]\n    if len(x)>0:\n        ax.plot(x,y,style,color=color,label=label)\n            \n\n"
  },
  {
    "path": "evaluator/evaluate_kitti.py",
    "content": "# Copyright (c) 2020 Carnegie Mellon University, Wenshan Wang <wenshanw@andrew.cmu.edu>\n# For License information please see the LICENSE file in the root directory.\n# This is a python reinplementation of the KITTI metric: http://www.cvlibs.net/datasets/kitti/eval_odometry.php\n# Cridit: Xiangwei Wang https://github.com/TimingSpace\n\nimport numpy as np\nimport sys\n\ndef trajectory_distances(poses):\n    distances = []\n    distances.append(0)\n    for i in range(1,len(poses)):\n        p1 = poses[i-1]\n        p2 = poses[i]\n        delta = p1[0:3,3] - p2[0:3,3]\n        distances.append(distances[i-1]+np.linalg.norm(delta))\n    return distances\n\ndef last_frame_from_segment_length(dist,first_frame,length):\n    for i in range(first_frame,len(dist)):\n        if dist[i]>dist[first_frame]+length:\n            return i\n    return -1\n\ndef rotation_error(pose_error):\n    a = pose_error[0,0]\n    b = pose_error[1,1]\n    c = pose_error[2,2]\n    d = 0.5*(a+b+c-1)\n    rot_error = np.arccos(max(min(d,1.0),-1.0))\n    return rot_error\n\ndef translation_error(pose_error):\n    dx = pose_error[0,3]\n    dy = pose_error[1,3]\n    dz = pose_error[2,3]\n    return np.sqrt(dx*dx+dy*dy+dz*dz)\n\n# def line2matrix(pose_line):\n#     pose_line = np.array(pose_line)\n#     pose_m = np.matrix(np.eye(4))\n#     pose_m[0:3,:] = pose_line.reshape(3,4)\n#     return pose_m\n    \ndef calculate_sequence_error(poses_gt,poses_result,lengths=[10,20,30,40,50,60,70,80]):\n    # error_vetor\n    errors = []\n\n    # paramet\n    step_size = 1 #10; # every second\n    num_lengths = len(lengths)\n\n    # import ipdb;ipdb.set_trace()\n    # pre-compute distances (from ground truth as reference)\n    dist = trajectory_distances(poses_gt)\n    # for all start positions do\n    for  first_frame in range(0, len(poses_gt), step_size):\n    # for all segment lengths do\n        for i in range(0,num_lengths):\n            #  current length\n            length = lengths[i];\n\n            # compute last frame\n            last_frame = last_frame_from_segment_length(dist,first_frame,length);\n            # continue, if sequence not long enough\n            if (last_frame==-1):\n                continue;\n\n            # compute rotational and translational errors\n            pose_delta_gt     = np.linalg.inv(poses_gt[first_frame]).dot(poses_gt[last_frame])\n            pose_delta_result = np.linalg.inv(poses_result[first_frame]).dot(poses_result[last_frame])\n            pose_error        = np.linalg.inv(pose_delta_result).dot(pose_delta_gt)\n            r_err = rotation_error(pose_error);\n            t_err = translation_error(pose_error);\n\n            # compute speed\n            num_frames = (float)(last_frame-first_frame+1);\n            speed = length/(0.1*num_frames);\n\n            # write to file\n            error = [first_frame,r_err/length,t_err/length,length,speed]\n            errors.append(error)\n            # return error vector\n    return errors\n\ndef calculate_ave_errors(errors,lengths=[10,20,30,40,50,60,70,80]):\n    rot_errors=[]\n    tra_errors=[]\n    for length in lengths:\n        rot_error_each_length =[]\n        tra_error_each_length =[]\n        for error in errors:\n            if abs(error[3]-length)<0.1:\n                rot_error_each_length.append(error[1])\n                tra_error_each_length.append(error[2])\n\n        if len(rot_error_each_length)==0:\n            # import ipdb;ipdb.set_trace()\n            continue\n        else:\n            rot_errors.append(sum(rot_error_each_length)/len(rot_error_each_length))\n            tra_errors.append(sum(tra_error_each_length)/len(tra_error_each_length))\n    return np.array(rot_errors)*180/np.pi, tra_errors\n\ndef evaluate(gt, data,kittitype=True):\n    if kittitype:\n        lens =  [100,200,300,400,500,600,700,800] #\n    else:\n        lens = [5,10,15,20,25,30,35,40] #[1,2,3,4,5,6] # \n    errors = calculate_sequence_error(gt, data, lengths=lens)\n    rot,tra = calculate_ave_errors(errors, lengths=lens)\n    return np.mean(rot), np.mean(tra)\n\ndef  main():\n    # usage: python main.py path_to_ground_truth path_to_predict_pose\n    # load and preprocess data\n    ground_truth_data  = np.loadtxt(sys.argv[1])\n    predict_pose__data = np.loadtxt(sys.argv[2])\n    errors = calculate_sequence_error(ground_truth_data,predict_pose__data)\n    rot,tra = calculate_ave_errors(errors)\n    print(rot,'\\n',tra)\n    #print(error)\n    # evaluate the vo result\n    # save and visualization the evaluatation result\n\nif __name__ == \"__main__\":\n    main()\n\n\n"
  },
  {
    "path": "evaluator/evaluate_rpe.py",
    "content": "#!/usr/bin/python\n# Software License Agreement (BSD License)\n#\n# Modified by Wenshan Wang\n# Copyright (c) 2013, Juergen Sturm, TUM\n# All rights reserved.\n#\n# Redistribution and use in source and binary forms, with or without\n# modification, are permitted provided that the following conditions\n# are met:\n#\n#  * Redistributions of source code must retain the above copyright\n#    notice, this list of conditions and the following disclaimer.\n#  * Redistributions in binary form must reproduce the above\n#    copyright notice, this list of conditions and the following\n#    disclaimer in the documentation and/or other materials provided\n#    with the distribution.\n#  * Neither the name of TUM nor the names of its\n#    contributors may be used to endorse or promote products derived\n#    from this software without specific prior written permission.\n#\n# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n# \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS\n# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE\n# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,\n# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,\n# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\n# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\n# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT\n# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN\n# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\n# POSSIBILITY OF SUCH DAMAGE.\n\n\"\"\"\nThis script computes the relative pose error from the ground truth trajectory\nand the estimated trajectory.\n\"\"\"\n\nimport random\nimport numpy as np\nimport sys\n\ndef ominus(a,b):\n    \"\"\"\n    Compute the relative 3D transformation between a and b.\n    \n    Input:\n    a -- first pose (homogeneous 4x4 matrix)\n    b -- second pose (homogeneous 4x4 matrix)\n    \n    Output:\n    Relative 3D transformation from a to b.\n    \"\"\"\n    return np.dot(np.linalg.inv(a),b)\n\ndef compute_distance(transform):\n    \"\"\"\n    Compute the distance of the translational component of a 4x4 homogeneous matrix.\n    \"\"\"\n    return np.linalg.norm(transform[0:3,3])\n\ndef compute_angle(transform):\n    \"\"\"\n    Compute the rotation angle from a 4x4 homogeneous matrix.\n    \"\"\"\n    # an invitation to 3-d vision, p 27\n    return np.arccos( min(1,max(-1, (np.trace(transform[0:3,0:3]) - 1)/2) ))\n\ndef distances_along_trajectory(traj):\n    \"\"\"\n    Compute the translational distances along a trajectory. \n    \"\"\"\n    motion = [ominus(traj[i+1],traj[i]) for i in range(len(traj)-1)]\n    distances = [0]\n    sum = 0\n    for t in motion:\n        sum += compute_distance(t)\n        distances.append(sum)\n    return distances\n    \n\ndef evaluate_trajectory(traj_gt, traj_est, param_max_pairs=10000, param_fixed_delta=False,\n                        param_delta=1.00):\n    \"\"\"\n    Compute the relative pose error between two trajectories.\n    \n    Input:\n    traj_gt -- the first trajectory (ground truth)\n    traj_est -- the second trajectory (estimated trajectory)\n    param_max_pairs -- number of relative poses to be evaluated\n    param_fixed_delta -- false: evaluate over all possible pairs\n                         true: only evaluate over pairs with a given distance (delta)\n    param_delta -- distance between the evaluated pairs\n    param_delta_unit -- unit for comparison:\n                        \"s\": seconds\n                        \"m\": meters\n                        \"rad\": radians\n                        \"deg\": degrees\n                        \"f\": frames\n    param_offset -- time offset between two trajectories (to model the delay)\n    param_scale -- scale to be applied to the second trajectory\n    \n    Output:\n    list of compared poses and the resulting translation and rotation error\n    \"\"\"\n    \n    if not param_fixed_delta:\n        if(param_max_pairs==0 or len(traj_est)<np.sqrt(param_max_pairs)):\n            pairs = [(i,j) for i in range(len(traj_est)) for j in range(len(traj_est))]\n        else:\n            pairs = [(random.randint(0,len(traj_est)-1),random.randint(0,len(traj_est)-1)) for i in range(param_max_pairs)]\n    else:\n        pairs = []\n        for i in range(len(traj_est)):\n            j = i + param_delta\n            if j < len(traj_est): \n                pairs.append((i,j))\n        if(param_max_pairs!=0 and len(pairs)>param_max_pairs):\n            pairs = random.sample(pairs,param_max_pairs)\n        \n    result = []\n    for i,j in pairs:\n        \n        error44 = ominus(  ominus( traj_est[j], traj_est[i] ),\n                           ominus( traj_gt[j], traj_gt[i] ) )\n        \n        trans = compute_distance(error44)\n        rot = compute_angle(error44)\n        \n        result.append([i,j,trans,rot])\n        \n    if len(result)<2:\n        raise Exception(\"Couldn't find pairs between groundtruth and estimated trajectory!\")\n        \n    return result\n\n"
  },
  {
    "path": "evaluator/evaluator_base.py",
    "content": "# Copyright (c) 2020 Carnegie Mellon University, Wenshan Wang <wenshanw@andrew.cmu.edu>\n# For License information please see the LICENSE file in the root directory.\n\nimport numpy as np\nfrom .trajectory_transform import trajectory_transform, rescale\nfrom .transformation import pos_quats2SE_matrices, SE2pos_quat\nfrom .evaluate_ate_scale import align, plot_traj\nfrom .evaluate_rpe import evaluate_trajectory\nfrom .evaluate_kitti import evaluate as kittievaluate\n\nnp.set_printoptions(suppress=True, precision=2, threshold=100000)\n\ndef transform_trajs(gt_traj, est_traj, cal_scale):\n    gt_traj, est_traj = trajectory_transform(gt_traj, est_traj)\n    if cal_scale :\n        est_traj, s = rescale(gt_traj, est_traj)\n        print('  Scale, {}'.format(s))\n    else:\n        s = 1.0\n    return gt_traj, est_traj, s\n\ndef quats2SEs(gt_traj, est_traj):\n    gt_SEs = pos_quats2SE_matrices(gt_traj)\n    est_SEs = pos_quats2SE_matrices(est_traj)\n    return gt_SEs, est_SEs\n\ndef per_frame_scale_alignment(gt_motions, est_motions):\n    dist_gt = np.linalg.norm(gt_motions[:,:3], axis=1)\n    # scale the output frame by frame\n    motions_scale = est_motions.copy()\n    dist = np.linalg.norm(motions_scale[:,:3],axis=1)\n    scale_gt = dist_gt/dist\n    motions_scale[:,:3] = est_motions[:,:3] * scale_gt.reshape(-1,1)\n\n    return motions_scale\n\nclass ATEEvaluator(object):\n    def __init__(self):\n        super(ATEEvaluator, self).__init__()\n\n\n    def evaluate(self, gt_traj, est_traj, scale):\n        gt_xyz = np.matrix(gt_traj[:,0:3].transpose())\n        est_xyz = np.matrix(est_traj[:, 0:3].transpose())\n\n        rot, trans, trans_error, s = align(gt_xyz, est_xyz, scale)\n        print('  ATE scale: {}'.format(s))\n        error = np.sqrt(np.dot(trans_error,trans_error) / len(trans_error))\n\n        # align two trajs \n        est_SEs = pos_quats2SE_matrices(est_traj)\n        T = np.eye(4) \n        T[:3,:3] = rot\n        T[:3,3:] = trans \n        T = np.linalg.inv(T)\n        est_traj_aligned = []\n        for se in est_SEs:\n            se[:3,3] = se[:3,3] * s\n            se_new = T.dot(se)\n            se_new = SE2pos_quat(se_new)\n            est_traj_aligned.append(se_new)\n\n        est_traj_aligned = np.array(est_traj_aligned)\n        return error, gt_traj, est_traj_aligned\n\n# =======================\n\n\nclass RPEEvaluator(object):\n    def __init__(self):\n        super(RPEEvaluator, self).__init__()\n\n\n    def evaluate(self, gt_SEs, est_SEs):\n        result = evaluate_trajectory(gt_SEs, est_SEs)\n        \n        trans_error = np.array(result)[:,2]\n        rot_error = np.array(result)[:,3]\n\n        trans_error_mean = np.mean(trans_error)\n        rot_error_mean = np.mean(rot_error)\n\n        # import ipdb;ipdb.set_trace()\n\n        return (rot_error_mean, trans_error_mean)\n\n# =======================\n\n\nclass KittiEvaluator(object):\n    def __init__(self):\n        super(KittiEvaluator, self).__init__()\n\n    # return rot_error, tra_error\n    def evaluate(self, gt_SEs, est_SEs, kittitype):\n        # trajectory_scale(est_SEs, 0.831984631412)\n        error = kittievaluate(gt_SEs, est_SEs, kittitype=kittitype)\n        return error\n"
  },
  {
    "path": "evaluator/tartanair_evaluator.py",
    "content": "# Copyright (c) 2020 Carnegie Mellon University, Wenshan Wang <wenshanw@andrew.cmu.edu>\n# For License information please see the LICENSE file in the root directory.\n\nimport numpy as np\nfrom .evaluator_base import ATEEvaluator, RPEEvaluator, KittiEvaluator, transform_trajs, quats2SEs\nfrom os.path import isdir, isfile\n\n# from trajectory_transform import timestamp_associate\n\nclass TartanAirEvaluator:\n    def __init__(self, scale = False, round=1):\n        self.ate_eval = ATEEvaluator()\n        self.rpe_eval = RPEEvaluator()\n        self.kitti_eval = KittiEvaluator()\n        \n    def evaluate_one_trajectory(self, gt_traj, est_traj, scale=False, kittitype=True):\n        \"\"\"\n        scale = True: calculate a global scale\n        \"\"\"\n        # load trajectories\n        try:\n            gt_traj = np.loadtxt(gt_traj)\n            est_traj = np.loadtxt(est_traj)\n        except:\n            pass\n\n        if gt_traj.shape[0] != est_traj.shape[0]:\n            raise Exception(\"POSEFILE_LENGTH_ILLEGAL\")\n        if gt_traj.shape[1] != 7 or est_traj.shape[1] != 7:\n            raise Exception(\"POSEFILE_FORMAT_ILLEGAL\")\n\n        # transform and scale\n        gt_traj_trans, est_traj_trans, s = transform_trajs(gt_traj, est_traj, scale)\n        gt_SEs, est_SEs = quats2SEs(gt_traj_trans, est_traj_trans)\n\n        ate_score, gt_ate_aligned, est_ate_aligned = self.ate_eval.evaluate(gt_traj, est_traj, scale)\n        rpe_score = self.rpe_eval.evaluate(gt_SEs, est_SEs)\n        kitti_score = self.kitti_eval.evaluate(gt_SEs, est_SEs, kittitype=kittitype)\n\n        return {'ate_score': ate_score, \n                'rpe_score': rpe_score, \n                'kitti_score': kitti_score,\n                'gt_aligned': gt_ate_aligned, \n                'est_aligned': est_ate_aligned}\n\nif __name__ == \"__main__\":\n    \n    # scale = True for monocular track, scale = False for stereo track\n    aicrowd_evaluator = TartanAirEvaluator()\n    result = aicrowd_evaluator.evaluate_one_trajectory('pose_gt.txt', 'pose_est.txt', scale=True)\n    print(result)\n"
  },
  {
    "path": "evaluator/trajectory_transform.py",
    "content": "# Copyright (c) 2020 Carnegie Mellon University, Wenshan Wang <wenshanw@andrew.cmu.edu>\n# For License information please see the LICENSE file in the root directory.\n\nimport numpy as np\nfrom .transformation import pos_quats2SE_matrices, SE2pos_quat, pose2motion, motion2pose\n\ndef shift0(traj): \n    '''\n    Traj: a list of [t + quat]\n    Return: translate and rotate the traj\n    '''\n    traj_ses = pos_quats2SE_matrices(np.array(traj))\n    traj_init = traj_ses[0]\n    traj_init_inv = np.linalg.inv(traj_init)\n    new_traj = []\n    for tt in traj_ses:\n        ttt=traj_init_inv.dot(tt)\n        new_traj.append(SE2pos_quat(ttt))\n    return np.array(new_traj)\n\ndef ned2cam(traj):\n    '''\n    transfer a ned traj to camera frame traj\n    '''\n    T = np.array([[0,1,0,0],\n                  [0,0,1,0],\n                  [1,0,0,0],\n                  [0,0,0,1]], dtype=np.float32) \n    T_inv = np.linalg.inv(T)\n    new_traj = []\n    traj_ses = pos_quats2SE_matrices(np.array(traj))\n\n    for tt in traj_ses:\n        ttt=T.dot(tt).dot(T_inv)\n        new_traj.append(SE2pos_quat(ttt))\n        \n    return np.array(new_traj)\n\ndef cam2ned(traj):\n    '''\n    transfer a camera traj to ned frame traj\n    '''\n    T = np.array([[0,0,1,0],\n                  [1,0,0,0],\n                  [0,1,0,0],\n                  [0,0,0,1]], dtype=np.float32) \n    T_inv = np.linalg.inv(T)\n    new_traj = []\n    traj_ses = pos_quats2SE_matrices(np.array(traj))\n\n    for tt in traj_ses:\n        ttt=T.dot(tt).dot(T_inv)\n        new_traj.append(SE2pos_quat(ttt))\n        \n    return np.array(new_traj)\n\n\ndef trajectory_transform(gt_traj, est_traj):\n    '''\n    1. center the start frame to the axis origin\n    2. align the GT frame (NED) with estimation frame (camera)\n    '''\n    gt_traj_trans = shift0(gt_traj)\n    est_traj_trans = shift0(est_traj)\n\n    # gt_traj_trans = ned2cam(gt_traj_trans)\n    # est_traj_trans = cam2ned(est_traj_trans)\n\n    return gt_traj_trans, est_traj_trans\n\ndef rescale_bk(poses_gt, poses):\n    motion_gt = pose2motion(poses_gt)\n    motion    = pose2motion(poses)\n    \n    speed_square_gt = np.sum(motion_gt[:,0:3,3]*motion_gt[:,0:3,3],1)\n    speed_gt = np.sqrt(speed_square_gt)\n    speed_square    = np.sum(motion[:,0:3,3]*motion[:,0:3,3],1)\n    speed = np.sqrt(speed_square)\n    # when the speed is small, the scale could become very large\n    # import ipdb;ipdb.set_trace()\n    mask = (speed_gt>0.0001) # * (speed>0.00001)\n    scale = np.mean((speed[mask])/speed_gt[mask])\n    scale = 1.0/scale\n    motion[:,0:3,3] = motion[:,0:3,3]*scale\n    pose_update = motion2pose(motion)\n    return  pose_update, scale\n\ndef pose2trans(pose_data):\n    data_size = len(pose_data)\n    trans = []\n    for i in range(0,data_size-1):\n        tran = np.array(pose_data[i+1][:3]) - np.array(pose_data[i][:3]) # np.linalg.inv(data[i]).dot(data[i+1])\n        trans.append(tran)\n\n    return np.array(trans) # N x 3\n\n\ndef rescale(poses_gt, poses):\n    '''\n    similar to rescale\n    poses_gt/poses: N x 7 poselist in quaternion format\n    '''\n    trans_gt = pose2trans(poses_gt)\n    trans    = pose2trans(poses)\n    \n    speed_square_gt = np.sum(trans_gt*trans_gt,1)\n    speed_gt = np.sqrt(speed_square_gt)\n    speed_square    = np.sum(trans*trans,1)\n    speed = np.sqrt(speed_square)\n    # when the speed is small, the scale could become very large\n    # import ipdb;ipdb.set_trace()\n    mask = (speed_gt>0.0001) # * (speed>0.00001)\n    scale = np.mean((speed[mask])/speed_gt[mask])\n    scale = 1.0/scale\n    poses[:,0:3] = poses[:,0:3]*scale\n    return  poses, scale\n\ndef trajectory_scale(traj, scale):\n    for ttt in traj:\n        ttt[0:3,3] = ttt[0:3,3]*scale\n    return traj\n \ndef timestamp_associate(first_list, second_list, max_difference):\n    \"\"\"\n    Associate two trajectory of [stamp,data]. As the time stamps never match exactly, we aim \n    to find the closest match for every input tuple.\n    \n    Input:\n    first_list -- first list of (stamp,data)\n    second_list -- second list of (stamp,data)\n    max_difference -- search radius for candidate generation\n\n    Output:\n    first_res: matched data from the first list\n    second_res: matched data from the second list\n    \n    \"\"\"\n    first_dict = dict([(l[0],l[1:]) for l in first_list if len(l)>1])\n    second_dict = dict([(l[0],l[1:]) for l in second_list if len(l)>1])\n\n    first_keys = first_dict.keys()\n    second_keys = second_dict.keys()\n    potential_matches = [(abs(a - b ), a, b) \n                         for a in first_keys \n                         for b in second_keys \n                         if abs(a - b) < max_difference]\n    potential_matches.sort()\n    matches = []\n    for diff, a, b in potential_matches:\n        if a in first_keys and b in second_keys:\n            first_keys.remove(a)\n            second_keys.remove(b)\n            matches.append((a, b))\n    \n    matches.sort()\n\n    first_res = []\n    second_res = []\n    for t1, t2 in matches:\n        first_res.append(first_dict[t1])\n        second_res.append(second_dict[t2])\n    return np.array(first_res), np.array(second_res)\n"
  },
  {
    "path": "evaluator/transformation.py",
    "content": "# Copyright (c) 2020 Carnegie Mellon University, Wenshan Wang <wenshanw@andrew.cmu.edu>\n# For License information please see the LICENSE file in the root directory.\n# Credit: Xiangwei Wang https://github.com/TimingSpace\n\nimport numpy as np\nfrom scipy.spatial.transform import Rotation as R\n\ndef line2mat(line_data):\n    '''\n    12 -> 4 x 4\n    '''\n    mat = np.eye(4)\n    mat[0:3,:] = line_data.reshape(3,4)\n    return np.matrix(mat)\n\ndef mat2line(mat_data):\n    '''\n    4 x 4 -> 12\n    '''\n    line_data = np.zeros(12)\n    line_data[:]=mat_data[:3,:].reshape((12))\n    return line_data\n\ndef motion2pose(data):\n    '''\n    data: N x 12\n    all_pose: (N+1) x 12\n    '''\n    data_size = data.shape[0]\n    all_pose = np.zeros((data_size+1,12))\n    temp = np.eye(4,4).reshape(1,16)\n    all_pose[0,:] = temp[0,0:12]\n    pose = np.matrix(np.eye(4,4))\n    for i in range(0,data_size):\n        data_mat = line2mat(data[i,:])\n        pose = pose*data_mat\n        pose_line = np.array(pose[0:3,:]).reshape(1,12)\n        all_pose[i+1,:] = pose_line\n    return all_pose\n\ndef pose2motion(data, skip=0):\n    '''\n    data: N x 12\n    all_motion (N-1-skip) x 12\n    '''\n    data_size = data.shape[0]\n    all_motion = np.zeros((data_size-1,12))\n    for i in range(0,data_size-1-skip):\n        pose_curr = line2mat(data[i,:])\n        pose_next = line2mat(data[i+1+skip,:])\n        motion = pose_curr.I*pose_next\n        motion_line = np.array(motion[0:3,:]).reshape(1,12)\n        all_motion[i,:] = motion_line\n    return all_motion\n\ndef SE2se(SE_data):\n    result = np.zeros((6))\n    result[0:3] = np.array(SE_data[0:3,3].T)\n    result[3:6] = SO2so(SE_data[0:3,0:3]).T\n    return result\n    \ndef SO2so(SO_data):\n    return R.from_matrix(SO_data).as_rotvec()\n\ndef so2SO(so_data):\n    return R.from_rotvec(so_data).as_matrix()\n\ndef se2SE(se_data):\n    result_mat = np.matrix(np.eye(4))\n    result_mat[0:3,0:3] = so2SO(se_data[3:6])\n    result_mat[0:3,3]   = np.matrix(se_data[0:3]).T\n    return result_mat\n### can get wrong result\ndef se_mean(se_datas):\n    all_SE = np.matrix(np.eye(4))\n    for i in range(se_datas.shape[0]):\n        se = se_datas[i,:]\n        SE = se2SE(se)\n        all_SE = all_SE*SE\n    all_se = SE2se(all_SE)\n    mean_se = all_se/se_datas.shape[0]\n    return mean_se\n\ndef ses_mean(se_datas):\n    se_datas = np.array(se_datas)\n    se_datas = np.transpose(se_datas.reshape(se_datas.shape[0],se_datas.shape[1],se_datas.shape[2]*se_datas.shape[3]),(0,2,1))\n    se_result = np.zeros((se_datas.shape[0],se_datas.shape[2]))\n    for i in range(0,se_datas.shape[0]):\n        mean_se = se_mean(se_datas[i,:,:])\n        se_result[i,:] = mean_se\n    return se_result\n\ndef ses2poses(data):\n    data_size = data.shape[0]\n    all_pose = np.zeros((data_size+1,12))\n    temp = np.eye(4,4).reshape(1,16)\n    all_pose[0,:] = temp[0,0:12]\n    pose = np.matrix(np.eye(4,4))\n    for i in range(0,data_size):\n        data_mat = se2SE(data[i,:])\n        pose = pose*data_mat\n        pose_line = np.array(pose[0:3,:]).reshape(1,12)\n        all_pose[i+1,:] = pose_line\n    return all_pose\n\ndef ses2poses_quat(data):\n    '''\n    ses: N x 6\n    '''\n    data_size = data.shape[0]\n    all_pose_quat = np.zeros((data_size+1,7))\n    all_pose_quat[0,:] = np.array([0., 0., 0., 0., 0., 0., 1.])\n    pose = np.matrix(np.eye(4,4))\n    for i in range(0,data_size):\n        data_mat = se2SE(data[i,:])\n        pose = pose*data_mat\n        quat = SO2quat(pose[0:3,0:3])\n        all_pose_quat[i+1,:3] = np.array([pose[0,3], pose[1,3], pose[2,3]])\n        all_pose_quat[i+1,3:] = quat      \n    return all_pose_quat\n\ndef SEs2ses(motion_data):\n    data_size = motion_data.shape[0]\n    ses = np.zeros((data_size,6))\n    for i in range(0,data_size):\n        SE = np.matrix(np.eye(4))\n        SE[0:3,:] = motion_data[i,:].reshape(3,4)\n        ses[i,:] = SE2se(SE)\n    return ses\n\ndef so2quat(so_data):\n    so_data = np.array(so_data)\n    theta = np.sqrt(np.sum(so_data*so_data))\n    axis = so_data/theta\n    quat=np.zeros(4)\n    quat[0:3] = np.sin(theta/2)*axis\n    quat[3] = np.cos(theta/2)\n    return quat\n\ndef quat2so(quat_data):\n    quat_data = np.array(quat_data)\n    sin_half_theta = np.sqrt(np.sum(quat_data[0:3]*quat_data[0:3]))\n    axis = quat_data[0:3]/sin_half_theta\n    cos_half_theta = quat_data[3]\n    theta = 2*np.arctan2(sin_half_theta,cos_half_theta)\n    so = theta*axis\n    return so\n\n# input so_datas batch*channel*height*width\n# return quat_datas batch*numner*channel\ndef sos2quats(so_datas,mean_std=[[1],[1]]):\n    so_datas = np.array(so_datas)\n    so_datas = so_datas.reshape(so_datas.shape[0],so_datas.shape[1],so_datas.shape[2]*so_datas.shape[3])\n    so_datas = np.transpose(so_datas,(0,2,1))\n    quat_datas = np.zeros((so_datas.shape[0],so_datas.shape[1],4))\n    for i_b in range(0,so_datas.shape[0]):\n        for i_p in range(0,so_datas.shape[1]):\n            so_data = so_datas[i_b,i_p,:]\n            quat_data = so2quat(so_data)\n            quat_datas[i_b,i_p,:] = quat_data\n    return quat_datas\n\ndef SO2quat(SO_data):\n    rr = R.from_matrix(SO_data)\n    return rr.as_quat()\n\ndef quat2SO(quat_data):\n    return R.from_quat(quat_data).as_matrix()\n\n\ndef pos_quat2SE(quat_data):\n    SO = R.from_quat(quat_data[3:7]).as_matrix()\n    SE = np.matrix(np.eye(4))\n    SE[0:3,0:3] = np.matrix(SO)\n    SE[0:3,3]   = np.matrix(quat_data[0:3]).T\n    SE = np.array(SE[0:3,:]).reshape(1,12)\n    return SE\n\n\ndef pos_quats2SEs(quat_datas):\n    data_len = quat_datas.shape[0]\n    SEs = np.zeros((data_len,12))\n    for i_data in range(0,data_len):\n        SE = pos_quat2SE(quat_datas[i_data,:])\n        SEs[i_data,:] = SE\n    return SEs\n\n\ndef pos_quats2SE_matrices(quat_datas):\n    data_len = quat_datas.shape[0]\n    SEs = []\n    for quat in quat_datas:\n        SO = R.from_quat(quat[3:7]).as_matrix()\n        SE = np.eye(4)\n        SE[0:3,0:3] = SO\n        SE[0:3,3]   = quat[0:3]\n        SEs.append(SE)\n    return SEs\n\ndef SE2pos_quat(SE_data):\n    pos_quat = np.zeros(7)\n    pos_quat[3:] = SO2quat(SE_data[0:3,0:3])\n    pos_quat[:3] = SE_data[0:3,3].T\n    return pos_quat\n\ndef SEs2ses(data):\n    '''\n    data: N x 12\n    ses: N x 6\n    '''\n    data_size = data.shape[0]\n    ses = np.zeros((data_size,6))\n    for i in range(0,data_size):\n        ses[i,:] = SE2se(line2mat(data[i]))\n    return ses\n\ndef ses2SEs(data):\n    '''\n    data: N x 6\n    SEs: N x 12\n    '''\n    data_size = data.shape[0]\n    SEs = np.zeros((data_size,12))\n    for i in range(0,data_size):\n        SEs[i,:] = mat2line(se2SE(data[i]))\n    return SEs\n\ndef SE2quat(SE_data):\n    '''\n    SE_data: 4 x 4\n    quat: 7\n    '''\n    pos_quat = np.zeros(7)\n    pos_quat[3:] = SO2quat(SE_data[0:3,0:3])\n    pos_quat[:3] = SE_data[0:3,3].T\n    return pos_quat\n\ndef quat2SE(quat_data):\n    '''\n    quat_data: 7\n    SE: 4 x 4\n    '''\n    SO = R.from_quat(quat_data[3:7]).as_matrix()\n    SE = np.matrix(np.eye(4))\n    SE[0:3,0:3] = np.matrix(SO)\n    SE[0:3,3]   = np.matrix(quat_data[0:3]).T\n    return SE\n\ndef SEs2quats(SEs_data):\n    '''\n    SE_data: N x 12\n    quat: N x 7\n    '''\n    data_len = SEs_data.shape[0]\n    all_quats = np.zeros((data_len,7))\n    for i in range(0,data_len):\n        SE = line2mat(SEs_data[i])\n        all_quats[i] = SE2quat(SE)\n    return all_quats\n\ndef quats2SEs(quat_datas):\n    '''\n    pos_quats: N x 7\n    SEs: N x 12\n    '''\n    data_len = quat_datas.shape[0]\n    SEs = np.zeros((data_len,12))\n    for i_data in range(0,data_len):\n        SE = quat2SE(quat_datas[i_data,:])\n        SEs[i_data,:] = mat2line(SE)\n    return SEs\n\ndef motion_ses2pose_quats(data):\n    '''\n    data: N x 6 motion data\n    poses_quat: (N+1) x 7 pose data\n    '''\n    motions_SEs = ses2SEs(data) # N x 6 -> N x 12\n    poses_SEs  = motion2pose(motions_SEs) # N x 12 -> (N + 1) x 12\n    poses_quat = SEs2quats(poses_SEs) # (N + 1) x 12 -> (N+1) x 7\n    return poses_quat\n\ndef pose_quats2motion_ses(data):\n    '''\n    data: N x 7 pose list\n    motions: (N-1-skip) x 6 se3 list\n    '''\n    poses_SEs = quats2SEs(data) # N x 7 -> N x 12\n    matrix = pose2motion(poses_SEs) # N x 12 -> (N-1-skip) x 12\n    motions = SEs2ses(matrix).astype(np.float32) # (N-1-skip) x 12 -> (N-1-skip) x 6\n    return motions\n\ndef kitti2tartan(traj):\n    '''\n    traj: in kitti style, N x 12 numpy array, in camera frame\n    output: in TartanAir style, N x 7 numpy array, in NED frame\n    '''\n    T = np.array([[0,0,1,0],\n                  [1,0,0,0],\n                  [0,1,0,0],\n                  [0,0,0,1]], dtype=np.float32) \n    T_inv = np.linalg.inv(T)\n    new_traj = []\n\n    for pose in traj:\n        tt = np.eye(4)\n        tt[:3,:] = pose.reshape(3,4)\n        ttt=T.dot(tt).dot(T_inv)\n        new_traj.append(SE2pos_quat(ttt))\n        \n    return np.array(new_traj)\n\ndef tartan2kitti(traj):\n    T = np.array([[0,1,0,0],\n                  [0,0,1,0],\n                  [1,0,0,0],\n                  [0,0,0,1]], dtype=np.float32) \n    T_inv = np.linalg.inv(T)\n    new_traj = []\n\n    for pose in traj:\n        tt = np.eye(4)\n        tt[:3,:] = pos_quat2SE(pose).reshape(3,4)\n        ttt=T.dot(tt).dot(T_inv)\n        new_traj.append(ttt[:3,:].reshape(12))\n        \n    return np.array(new_traj)"
  },
  {
    "path": "vo_trajectory_from_folder.py",
    "content": "from torch.utils.data import DataLoader\nfrom Datasets.utils import ToTensor, Compose, CropCenter, ResizeData, dataset_intrinsics, DownscaleFlow\nfrom Datasets.utils import plot_traj, visflow, load_kiiti_intrinsics, load_sceneflow_extrinsics\nfrom Datasets.tartanTrajFlowDataset import TrajFolderDataset\nfrom evaluator.transformation import pose_quats2motion_ses, motion_ses2pose_quats\nfrom evaluator.tartanair_evaluator import TartanAirEvaluator\nfrom evaluator.evaluator_base import per_frame_scale_alignment\nfrom DytanVO import DytanVO\n\nimport argparse\nimport numpy as np\nimport cv2\nfrom os import mkdir\nfrom os.path import isdir\n\ndef get_args():\n    parser = argparse.ArgumentParser(description='Inference code of DytanVO')\n\n    parser.add_argument('--batch-size', type=int, default=1,\n                        help='batch size (default: 1)')\n    parser.add_argument('--worker-num', type=int, default=1,\n                        help='data loader worker number (default: 1)')\n    parser.add_argument('--image-width', type=int, default=640,\n                        help='image width (default: 640)')\n    parser.add_argument('--image-height', type=int, default=448,\n                        help='image height (default: 448)')\n    parser.add_argument('--vo-model-name', default='',\n                        help='name of pretrained VO model (default: \"\")')\n    parser.add_argument('--flow-model-name', default='',\n                        help='name of pretrained flow model (default: \"\")')\n    parser.add_argument('--pose-model-name', default='',\n                        help='name of pretrained pose model (default: \"\")')\n    parser.add_argument('--seg-model-name', default='',\n                        help='name of pretrained segmentation model (default: \"\")')\n    parser.add_argument('--airdos', action='store_true', default=False,\n                        help='airdos test (default: False)')\n    parser.add_argument('--rs_d435', action='store_true', default=False,\n                        help='realsense d435i test (default: False)')\n    parser.add_argument('--sceneflow', action='store_true', default=False,\n                        help='sceneflow test (default: False)')\n    parser.add_argument('--kitti', action='store_true', default=False,\n                        help='kitti test (default: False)')\n    parser.add_argument('--commaai', action='store_true', default=False,\n                        help='commaai test (default: False)')\n    parser.add_argument('--kitti-intrinsics-file',  default='',\n                        help='kitti intrinsics file calib.txt (default: )')\n    parser.add_argument('--test-dir', default='',\n                        help='test trajectory folder where the RGB images are (default: \"\")')\n    parser.add_argument('--pose-file', default='',\n                        help='test trajectory gt pose file, used for scale calculation, and visualization (default: \"\")')\n    parser.add_argument('--save-flow', action='store_true', default=False,\n                        help='save optical flow (default: False)')\n    parser.add_argument('--seg-thresh', type=float, default=0.7,\n                        help='threshold for motion segmentation')\n    parser.add_argument('--iter-num', type=int, default=2,\n                        help='number of iterations')\n\n    args = parser.parse_args()\n\n    return args\n\n\nif __name__ == '__main__':\n    args = get_args()\n\n    testvo = DytanVO(args.vo_model_name, args.seg_model_name, args.image_height, args.image_width, \n                    args.kitti, args.flow_model_name, args.pose_model_name)\n\n    # load trajectory data from a folder\n    if args.kitti:\n        datastr = 'kitti'\n    elif args.airdos:\n        datastr = 'airdos'\n    elif args.rs_d435:\n        datastr = 'rs_d435'\n    elif args.sceneflow:\n        datastr = 'sceneflow'\n    elif args.commaai:\n        datastr = 'commaai'\n    else:\n        datastr = 'tartanair'\n    focalx, focaly, centerx, centery, baseline = dataset_intrinsics(datastr, '15mm' in args.test_dir) \n    if args.kitti_intrinsics_file.endswith('.txt') and datastr == 'kitti':\n        focalx, focaly, centerx, centery, baseline = load_kiiti_intrinsics(args.kitti_intrinsics_file)\n\n    if datastr == 'kitti':\n        transform = Compose([ResizeData((args.image_height, 1226)), CropCenter((args.image_height, args.image_width)), DownscaleFlow(), ToTensor()])\n    else:\n        transform = Compose([CropCenter((args.image_height, args.image_width)), DownscaleFlow(), ToTensor()])\n\n    testDataset = TrajFolderDataset(args.test_dir, transform=transform, \n                                        focalx=focalx, focaly=focaly, centerx=centerx, centery=centery)\n    testDataloader = DataLoader(testDataset, batch_size=args.batch_size, \n                                        shuffle=False, num_workers=args.worker_num)\n    testDataiter = iter(testDataloader)\n\n    motionlist = []\n    testname = datastr + '_' + args.vo_model_name.split('.')[0] + '_' + args.test_dir.split('/')[-1]\n    if args.save_flow:\n        flowdir = 'results/'+testname+'_flow'\n        if not isdir(flowdir):\n            mkdir(flowdir)\n        flowcount = 0\n    while True:\n        try:\n            sample = testDataiter.next()\n        except StopIteration:\n            break\n\n        motion, flow = testvo.test_batch(sample, [focalx, centerx, centery, baseline], args.seg_thresh, args.iter_num)\n        motionlist.append(motion)\n\n        if args.save_flow:\n            for k in range(flow.shape[0]):\n                flowk = flow[k].transpose(1,2,0)\n                np.save(flowdir+'/'+str(flowcount).zfill(6)+'.npy',flowk)\n                flow_vis = visflow(flowk)\n                cv2.imwrite(flowdir+'/'+str(flowcount).zfill(6)+'.png',flow_vis)\n                flowcount += 1\n\n    motions = np.array(motionlist)\n\n    # calculate ATE, RPE, KITTI-RPE\n    if args.pose_file.endswith('.txt'):\n        if datastr == 'sceneflow':\n            gtposes = load_sceneflow_extrinsics(args.pose_file)\n        else:\n            gtposes = np.loadtxt(args.pose_file)\n            if datastr == 'airdos':\n                gtposes = gtposes[:,1:]  # remove the first column of timestamps\n        \n        gtmotions = pose_quats2motion_ses(gtposes)\n        estmotion_scale = per_frame_scale_alignment(gtmotions, motions)\n        estposes = motion_ses2pose_quats(estmotion_scale)\n\n        evaluator = TartanAirEvaluator()\n        results = evaluator.evaluate_one_trajectory(gtposes, estposes, scale=True, kittitype=(datastr=='kitti'))\n        \n        print(\"==> ATE: %.4f,\\t KITTI-R/t: %.4f, %.4f\" %(results['ate_score'], results['kitti_score'][0], results['kitti_score'][1]))\n\n        # save results and visualization\n        plot_traj(results['gt_aligned'], results['est_aligned'], vis=False, savefigname='results/'+testname+'.png', title='ATE %.4f' %(results['ate_score']))\n        np.savetxt('results/'+testname+'.txt',results['est_aligned'])\n    else:\n        np.savetxt('results/'+testname+'.txt', motion_ses2pose_quats(motions))"
  }
]