[
  {
    "path": "README.md",
    "content": "# Neural Points\n【Code of CVPR 2022 paper】Neural Points: Point Cloud Representation with Neural Fields for Arbitrary Upsampling (CVPR 2022).\n\n- Paper address: [https://arxiv.org/abs/2112.04148](https://arxiv.org/abs/2112.04148)\n- Project webpage: [https://wanquanf.github.io/NeuralPoints.html](https://wanquanf.github.io/NeuralPoints.html)\n\n\n![avatar](./utils/Pipeline_v5.png)\n\n## Prerequisite Installation\nThe code has been tested on Ubuntu 18, with Python3.8, PyTorch 1.6 and Cuda 10.2:\n\n    conda create --name NePs\n    \n    conda activate NePs\n    \n    conda install pytorch=1.6.0 torchvision=0.7.0 cudatoolkit=10.2 -c pytorch\n    \n    conda install -c conda-forge igl\n    \nBefore running the code, you need to build the cuda&C++ extensions of Pytorch:\n\n    cd [ProjectPath]/model/model_for_supp/pointnet2\n    \n    python setup.py install\n\n    \n## How to use the code: \nDownload our dataset: [dataset](https://pan.baidu.com/s/1BLFobnIkuLqrXsdAAVqA0g), (extracting code: qiqq). Put the 'Sketchfab2' folder into: [ProjectPath]/data.\n\nFirstly, you need to change the working directory: \n\n    cd [ProjectPath]/model/conpu_v6\n\nTo obtain the testing results of the testing set, run:\n\n    python train_script101_test.py\n\nTo train our network, run:\n\n    python train_script101.py\n\n\n## Citation\nPlease cite this paper with the following bibtex:\n\n    @inproceedings{feng2022np,\n        author    = {Wanquan Feng and Jin li and Hongrui Cai and Xiaonan Luo and Juyong Zhang},\n        title     = {Neural Points: Point Cloud Representation with Neural Fields for Arbitrary Upsampling},\n        booktitle = {{IEEE/CVF} Conference on Computer Vision and Pattern Recognition (CVPR)},\n        year      = {2022}\n    }\n\n\n## Acknowledgement\nIn this repo, we borrowed the backbone structure from [DGCNN](https://github.com/WangYueFt/dgcnn).\n"
  },
  {
    "path": "code/colormap.py",
    "content": "rb_colormap_list =[ 0,         0,    0.5625,\n         0,         0,    0.6250,\n         0,         0,    0.6875,\n         0,         0,    0.7500,\n         0,         0,    0.8125,\n         0,         0,    0.8750,\n         0,         0,    0.9375,\n         0,         0,    1.0000,\n         0,    0.0625,    1.0000,\n         0,    0.1250,    1.0000,\n         0,    0.1875,    1.0000,\n         0,    0.2500,    1.0000,\n         0,    0.3125,    1.0000,\n         0,    0.3750,    1.0000,\n         0,    0.4375,    1.0000,\n         0,    0.5000,    1.0000,\n         0,    0.5625,    1.0000,\n         0,    0.6250,    1.0000,\n         0,    0.6875,    1.0000,\n         0,    0.7500,    1.0000,\n         0,    0.8125,    1.0000,\n         0,    0.8750,    1.0000,\n         0,    0.9375,    1.0000,\n         0,    1.0000,    1.0000,\n    0.0625,    1.0000,    0.9375,\n    0.1250,    1.0000,    0.8750,\n    0.1875,    1.0000,    0.8125,\n    0.2500,    1.0000,    0.7500,\n    0.3125,    1.0000,    0.6875,\n    0.3750,    1.0000,    0.6250,\n    0.4375,    1.0000,    0.5625,\n    0.5000,    1.0000,    0.5000,\n    0.5625,    1.0000,    0.4375,\n    0.6250,    1.0000,    0.3750,\n    0.6875,    1.0000,    0.3125,\n    0.7500,    1.0000,    0.2500,\n    0.8125,    1.0000,    0.1875,\n    0.8750,    1.0000,    0.1250,\n    0.9375,    1.0000,    0.0625,\n    1.0000,    1.0000,         0,\n    1.0000,    0.9375,         0,\n    1.0000,    0.8750,         0,\n    1.0000,    0.8125,         0,\n    1.0000,    0.7500,         0,\n    1.0000,    0.6875,         0,\n    1.0000,    0.6250,         0,\n    1.0000,    0.5625,         0,\n    1.0000,    0.5000,         0,\n    1.0000,    0.4375,         0,\n    1.0000,    0.3750,         0,\n    1.0000,    0.3125,         0,\n    1.0000,    0.2500,         0,\n    1.0000,    0.1875,         0,\n    1.0000,    0.1250,         0,\n    1.0000,    0.0625,         0,\n    1.0000,         0,         0,\n    0.9375,         0,         0,\n    0.8750,         0,         0,\n    0.8125,         0,         0,\n    0.7500,         0,         0,\n    0.6875,         0,         0,\n    0.6250,         0,         0,\n    0.5625,         0,         0,\n    0.5000,         0,         0]\n    \nrb_colormap_list_little = [ 0,         0,    0.5625,\n         0,    0.1250,    1.0000,\n         0,    0.6875,    1.0000,\n    0.2500,    1.0000,    0.7500,\n    0.8125,    1.0000,    0.1875,\n    1.0000,    0.6250,         0,\n    1.0000,    0.0625,         0,\n    0.5000,         0,         0]\n"
  },
  {
    "path": "code/mesh_operations.py",
    "content": "#### Author : Wanquan Feng (University of Science and Technology of China)\n#### Description : Some operations of the mesh/pc based on the numpy array\n#### Data : 2021-10-16\n\nimport os\nimport sys\nimport numpy\nimport igl\n\n\n#  off format\ndef read_off_(off_file_name):\n    v,f,_ = igl.read_off(off_file_name)\n    return v,f\ndef write_off_(off_file_name,v,face_=numpy.zeros((1))):\n    fout = open(off_file_name,'w')\n    fout.write('OFF\\n')\n    fout.write(str(v.shape[0])+' '+str(face_.shape[0])+' 0\\n')\n    for i in range(v.shape[0]):\n        fout.write(str(v[i][0])+' '+str(v[i][1])+' '+str(v[i][2])+'\\n')\n    if face_.shape[0]<2:return None\n    for i in range(face_.shape[0]):\n        fout.write('3 '+str(face_[i][0])+' '+str(face_[i][1])+' '+str(face_[i][2])+'\\n')\n    fout.close()\n    return None\n\n# obj format\ndef write_obj_(obj_write_name,v,face_=numpy.zeros((1)),color_=numpy.zeros((1)),normal_=numpy.zeros((1))):\n    f=open(obj_write_name,'w')\n    vnum = v.shape[0]\n    for vid in range(vnum):\n        f.write('v '+str(v[vid][0])+' '+str(v[vid][1])+' '+str(v[vid][2]))\n        if color_.shape[0]<vnum: f.write('\\n')\n        else:f.write(' '+str(color_[vid][0])+' '+str(color_[vid][1])+' '+str(color_[vid][2])+'\\n')\n        if normal_.shape[0]==vnum:\n            f.write('vn '+str(normal_[vid][0])+' '+str(normal_[vid][1])+' '+str(normal_[vid][2])+'\\n')\n    if face_.shape[0]<2:\n        f.close()\n        return None\n    fnum = face_.shape[0]\n    for fid in range(fnum):\n        f.write('f '+str(face_[fid][0]+1)+' '+str(face_[fid][1]+1)+' '+str(face_[fid][2]+1)+'\\n')\n    f.close()\n    return None\ndef read_obj_(obj_write_name):\n    v, _, n, f, _, _ = igl.read_obj(obj_write_name)\n    return v, f, n\n\n# xyz format\ndef write_xyz_(xyz_write_name,v,normal_=numpy.zeros((1))):\n    f = open(xyz_write_name, 'w')\n    vnum = v.shape[0]\n    for i in range(vnum):\n        f.write(str(v[i][0])+' '+str(v[i][1])+' '+str(v[i][2]))\n        if normal_.shape[0]<vnum: f.write('\\n')\n        else:f.write(' '+str(normal_[i][0])+' '+str(normal_[i][1])+' '+str(normal_[i][2])+'\\n')\n    f.close()\n    return None\ndef read_xyz_(xyz_name):\n    v_ = []\n    n_ = []\n    ff = open(xyz_name)\n    lines = ff.readlines()\n    for i, aline in enumerate(lines):\n        words = aline.split(' ')\n        x,y,z = float(words[0]), float(words[1]), float(words[2])\n        v_.append([x,y,z])\n        if len(words)>=6:\n            nx,ny,nz = float(words[3]), float(words[4]), float(words[5])\n            n_.append([nx,ny,nz])\n    v_ = numpy.array(v_).astype(numpy.float32)\n    n_ = numpy.array(n_).astype(numpy.float32)\n    if n_.shape[0] < v_.shape[0]:\n        n_ = None\n    return v_, n_\n\n# format converting\ndef convert_obj_to_off_(obj_path_in, off_path_out):\n    v,face_,_ = read_obj_(obj_path_in)\n    write_off_(off_path_out, v, face_)\n    return None\n    \n# normalize the points to sphere\ndef normalize_points_to_sphere_(v_in):\n    v_out = v_in.copy()\n    center = numpy.mean(v_out,axis=0,keepdims=True)\n    v_out = v_out-center\n    factor = numpy.sum(v_out*v_out, axis=-1, keepdims=True).max()**0.5\n    v_out /= factor\n    return v_out, center, factor\n\n# normalize the points to sphere with given center and factor\ndef normalize_points_to_sphere_with_given_center_and_factor_(v_in, center, factor):\n    v_out = v_in.copy()\n    v_out = v_out-center\n    v_out /= factor\n    return v_out, center, factor\n                \n"
  },
  {
    "path": "code/torch_tensor_functions.py",
    "content": "#### Author : Wanquan Feng (University of Science and Technology of China)\n#### Description : Some operations of the point cloud based on the pytorch tensor\n#### Data : 2021-10-16\n\n\nimport os\nimport sys\nimport torch\nimport numpy\nimport mesh_operations\n\n\n\ndef compute_sqrdis_map(points_x, points_y):\n    ## The shape of the input and output ##\n    # points_x : batchsize * M * 3\n    # points_y : batchsize * N * 3\n    # output   : batchsize * M * N\n    thisbatchsize = points_x.size()[0]\n    pn_x = points_x.size()[1]\n    pn_y = points_y.size()[1]\n    x_sqr = torch.sum(torch.mul(points_x, points_x), dim=-1).view(thisbatchsize, pn_x, 1).expand(-1,-1,pn_y)\n    y_sqr = torch.sum(torch.mul(points_y, points_y), dim=-1).view(thisbatchsize, 1, pn_y).expand(-1,pn_x,-1)\n    inner = torch.bmm(points_x, points_y.transpose(1,2))\n    sqrdis = x_sqr + y_sqr - 2*inner\n    return sqrdis\n\ndef draw_tensor_point_xyz_with_normal(save_path, torch_tensor_points, torch_tensor_normals=torch.ones([1])):\n    ## The shape of the input ##\n    # torch_tensor_points : M * 3\n    # torch_tensor_normals (optional) : M * 3 \n    if len(torch_tensor_points.size())!=2:\n        print('The size of the point tensor should be 2. Exit here.')\n        exit()\n    if torch_tensor_points.size()[1]!=3:\n        print('The dim of the point tensor is not correct. It should be (num_point, 3).')\n        exit()\n    numpy_points = torch_tensor_points.cpu().numpy()\n    numpy_normals = torch_tensor_normals.cpu().numpy()\n    mesh_operations.write_xyz_(save_path, numpy_points, numpy_normals)\n\n\ndef draw_tensor_point_xyz_with_normal_by_threshold(save_path, torch_tensor_points, torch_anchor, torch_tensor_normals=torch.ones([1]), threshold=0.95, ):\n    ## The shape of the input ##\n    # torch_tensor_points : M * 3\n    # torch_tensor_normals (optional) : M * 3 \n    # threshold : a float value < 1\n    if len(torch_tensor_points.size())!=2:\n        print('The size of the point tensor should be 2. Exit here.')\n        exit()\n    if torch_tensor_points.size()[1]!=3:\n        print('The dim of the point tensor is not correct. It should be (num_point, 3).')\n        exit()\n    torch_tensor_points_norm = torch.sum(torch.mul(torch_tensor_points, torch_tensor_points), dim=1)\n\n    numpy_points = torch_tensor_points.cpu().numpy()\n    numpy_normals = torch_tensor_normals.cpu().numpy()\n    mesh_operations.write_xyz_(save_path, numpy_points, numpy_normals)\n\ndef draw_tensor_point_obj_with_color(save_path, torch_tensor_points, torch_tensor_color=torch.ones([1])):\n    ## The shape of the input ##\n    # torch_tensor_points : M * 3\n    # torch_tensor_color (optional) : M * 3 \n    if len(torch_tensor_points.size())!=2:\n        print('The size of the point tensor should be 2. Exit here.')\n        exit()\n    if torch_tensor_points.size()[1]!=3:\n        print('The dim of the point tensor is not correct. It should be (num_point, 3).')\n        exit()\n    numpy_points = torch_tensor_points.cpu().numpy()\n    numpy_color = torch_tensor_color.cpu().numpy()\n    mesh_operations.write_obj_(save_path, numpy_points, color_=torch_tensor_color.cpu().numpy())\n\n\ndef draw_tensor_point_batch_xyz_with_normal(save_batch_path, torch_tensor_points_batch, torch_tensor_normals_batch=torch.ones([1])):\n    ## The shape of the input ##\n    # torch_tensor_points : B * M * 3\n    # torch_tensor_normals (optional) : B * M * 3 \n    if not os.path.exists(save_batch_path):os.mkdir(save_batch_path)\n    thisbatchsize = len(torch_tensor_points_batch)\n    for bi in range(thisbatchsize):\n        bi_path = save_batch_path+'/'+str(bi)+'.xyz'\n        torch_tensor_points = torch_tensor_points_batch[bi]\n        if len(torch_tensor_normals_batch.size())==1: torch_tensor_normals = torch.ones([1])\n        else:torch_tensor_normals = torch_tensor_normals_batch[bi]\n        draw_tensor_point_xyz_with_normal(bi_path, torch_tensor_points, torch_tensor_normals)\n    \n\ndef euler2rot(euler_angle):\n    batch_size = euler_angle.shape[0]\n    one = torch.ones(batch_size, 1, 1).to(euler_angle.device)\n    zero = torch.zeros(batch_size, 1, 1).to(euler_angle.device)\n    theta = euler_angle[:, 0].reshape(-1, 1, 1)\n    phi = euler_angle[:, 1].reshape(-1, 1, 1)\n    psi = euler_angle[:, 2].reshape(-1, 1, 1)\n    rot_x = torch.cat((\n        torch.cat((one, zero, zero), 1),\n        torch.cat((zero, theta.cos(), theta.sin()), 1),\n        torch.cat((zero, -theta.sin(), theta.cos()), 1),\n    ), 2)\n    rot_y = torch.cat((\n        torch.cat((phi.cos(), zero, -phi.sin()), 1),\n        torch.cat((zero, one, zero), 1),\n        torch.cat((phi.sin(), zero, phi.cos()), 1),\n    ), 2)\n    rot_z = torch.cat((\n        torch.cat((psi.cos(), -psi.sin(), zero), 1),\n        torch.cat((psi.sin(), psi.cos(), zero), 1),\n        torch.cat((zero, zero, one), 1)\n    ), 2)\n    return torch.bmm(rot_z, torch.bmm(rot_y, rot_x))\n\n\n\ndef get_neighbor_index(vertices: \"(bs, vertice_num, 3)\",  neighbor_num: int):\n    # Return: (bs, vertice_num, neighbor_num)\n    bs, v, _ = vertices.size()\n    device = vertices.device\n    inner = torch.bmm(vertices, vertices.transpose(1, 2)) #(bs, v, v)\n    quadratic = torch.sum(vertices**2, dim= 2) #(bs, v)\n    distance = inner * (-2) + quadratic.unsqueeze(1) + quadratic.unsqueeze(2)\n    neighbor_index = torch.topk(distance, k= neighbor_num + 1, dim= -1, largest= False)[1]\n    neighbor_index = neighbor_index[:, :, 1:]\n    return neighbor_index\n\n\ndef indexing_neighbor(tensor: \"(bs, vertice_num, dim)\", index: \"(bs, query_vertice_num, neighbor_num)\" ):\n    # Return: (bs, query_vertice_num, neighbor_num, dim)\n    bs, v, n = index.size()\n    id_0 = torch.arange(bs).view(-1, 1, 1)\n    tensor_indexed = tensor[id_0, index]\n    return tensor_indexed\n\n\ndef indexing_by_id(tensor: \"(bs, vertice_num, dim)\", index: \"(bs, query_num, neighbor_num)\" ):\n    # Return: (bs, query_num, neighbor_num, dim)\n    bs, v, n = index.size()\n    id_0 = torch.arange(bs).view(-1, 1, 1)\n    tensor_indexed = tensor[id_0, index]\n    return tensor_indexed"
  },
  {
    "path": "model/conpu_v6/chamfer_distance/__init__.py",
    "content": "from .chamfer_distance import ChamferDistance\n"
  },
  {
    "path": "model/conpu_v6/chamfer_distance/chamfer_distance.cpp",
    "content": "#include <torch/torch.h>\n\n// CUDA forward declarations\nint ChamferDistanceKernelLauncher(\n    const int b, const int n,\n    const float* xyz,\n    const int m,\n    const float* xyz2,\n    float* result,\n    int* result_i,\n    float* result2,\n    int* result2_i);\n\nint ChamferDistanceGradKernelLauncher(\n    const int b, const int n,\n    const float* xyz1,\n    const int m,\n    const float* xyz2,\n    const float* grad_dist1,\n    const int* idx1,\n    const float* grad_dist2,\n    const int* idx2,\n    float* grad_xyz1,\n    float* grad_xyz2);\n\n\nvoid chamfer_distance_forward_cuda(\n    const at::Tensor xyz1, \n    const at::Tensor xyz2, \n    const at::Tensor dist1, \n    const at::Tensor dist2, \n    const at::Tensor idx1, \n    const at::Tensor idx2) \n{\n//    std::cout<<\"here\"<<std::endl;\n//    std::cout<<xyz1.size(0)<<std::endl;\n//    std::cout<<xyz1.size(1)<<std::endl;\n//    std::cout<<dist2.device()<<std::endl;\n    \n    ChamferDistanceKernelLauncher(xyz1.size(0), xyz1.size(1), xyz1.data<float>(),\n                                            xyz2.size(1), xyz2.data<float>(),\n                                            dist1.data<float>(), idx1.data<int>(),\n                                            dist2.data<float>(), idx2.data<int>());\n}\n\nvoid chamfer_distance_backward_cuda(\n    const at::Tensor xyz1,\n    const at::Tensor xyz2, \n    at::Tensor gradxyz1, \n    at::Tensor gradxyz2, \n    at::Tensor graddist1, \n    at::Tensor graddist2, \n    at::Tensor idx1, \n    at::Tensor idx2)\n{\n    ChamferDistanceGradKernelLauncher(xyz1.size(0), xyz1.size(1), xyz1.data<float>(),\n                                           xyz2.size(1), xyz2.data<float>(),\n                                           graddist1.data<float>(), idx1.data<int>(),\n                                           graddist2.data<float>(), idx2.data<int>(),\n                                           gradxyz1.data<float>(), gradxyz2.data<float>());\n}\n\n\nvoid nnsearch(\n    const int b, const int n, const int m,\n    const float* xyz1,\n    const float* xyz2,\n    float* dist,\n    int* idx)\n{\n    for (int i = 0; i < b; i++) {\n        for (int j = 0; j < n; j++) {\n            const float x1 = xyz1[(i*n+j)*3+0];\n            const float y1 = xyz1[(i*n+j)*3+1];\n            const float z1 = xyz1[(i*n+j)*3+2];\n            double best = 0;\n            int besti = 0;\n            for (int k = 0; k < m; k++) {\n                const float x2 = xyz2[(i*m+k)*3+0] - x1;\n                const float y2 = xyz2[(i*m+k)*3+1] - y1;\n                const float z2 = xyz2[(i*m+k)*3+2] - z1;\n                const double d=x2*x2+y2*y2+z2*z2;\n                if (k==0 || d < best){\n                    best = d;\n                    besti = k;\n                }\n            }\n            dist[i*n+j] = best;\n            idx[i*n+j] = besti;\n        }\n    }\n}\n\n\nvoid chamfer_distance_forward(\n    const at::Tensor xyz1, \n    const at::Tensor xyz2, \n    const at::Tensor dist1, \n    const at::Tensor dist2, \n    const at::Tensor idx1, \n    const at::Tensor idx2) \n{\n    const int batchsize = xyz1.size(0);\n    const int n = xyz1.size(1);\n    const int m = xyz2.size(1);\n\n    const float* xyz1_data = xyz1.data<float>();\n    const float* xyz2_data = xyz2.data<float>();\n    float* dist1_data = dist1.data<float>();\n    float* dist2_data = dist2.data<float>();\n    int* idx1_data = idx1.data<int>();\n    int* idx2_data = idx2.data<int>();\n\n    nnsearch(batchsize, n, m, xyz1_data, xyz2_data, dist1_data, idx1_data);\n    nnsearch(batchsize, m, n, xyz2_data, xyz1_data, dist2_data, idx2_data);\n}\n\n\nvoid chamfer_distance_backward(\n    const at::Tensor xyz1, \n    const at::Tensor xyz2, \n    at::Tensor gradxyz1, \n    at::Tensor gradxyz2, \n    at::Tensor graddist1, \n    at::Tensor graddist2, \n    at::Tensor idx1, \n    at::Tensor idx2) \n{\n    const int b = xyz1.size(0);\n    const int n = xyz1.size(1);\n    const int m = xyz2.size(1);\n\n    const float* xyz1_data = xyz1.data<float>();\n    const float* xyz2_data = xyz2.data<float>();\n    float* gradxyz1_data = gradxyz1.data<float>();\n    float* gradxyz2_data = gradxyz2.data<float>();\n    float* graddist1_data = graddist1.data<float>();\n    float* graddist2_data = graddist2.data<float>();\n    const int* idx1_data = idx1.data<int>();\n    const int* idx2_data = idx2.data<int>();\n\n    for (int i = 0; i < b*n*3; i++)\n        gradxyz1_data[i] = 0;\n    for (int i = 0; i < b*m*3; i++)\n        gradxyz2_data[i] = 0;\n    for (int i = 0;i < b; i++) {\n        for (int j = 0; j < n; j++) {\n            const float x1 = xyz1_data[(i*n+j)*3+0];\n            const float y1 = xyz1_data[(i*n+j)*3+1];\n            const float z1 = xyz1_data[(i*n+j)*3+2];\n            const int j2 = idx1_data[i*n+j];\n\n            const float x2 = xyz2_data[(i*m+j2)*3+0];\n            const float y2 = xyz2_data[(i*m+j2)*3+1];\n            const float z2 = xyz2_data[(i*m+j2)*3+2];\n            const float g = graddist1_data[i*n+j]*2;\n\n            gradxyz1_data[(i*n+j)*3+0] += g*(x1-x2);\n            gradxyz1_data[(i*n+j)*3+1] += g*(y1-y2);\n            gradxyz1_data[(i*n+j)*3+2] += g*(z1-z2);\n            gradxyz2_data[(i*m+j2)*3+0] -= (g*(x1-x2));\n            gradxyz2_data[(i*m+j2)*3+1] -= (g*(y1-y2));\n            gradxyz2_data[(i*m+j2)*3+2] -= (g*(z1-z2));\n        }\n        for (int j = 0; j < m; j++) {\n            const float x1 = xyz2_data[(i*m+j)*3+0];\n            const float y1 = xyz2_data[(i*m+j)*3+1];\n            const float z1 = xyz2_data[(i*m+j)*3+2];\n            const int j2 = idx2_data[i*m+j];\n            const float x2 = xyz1_data[(i*n+j2)*3+0];\n            const float y2 = xyz1_data[(i*n+j2)*3+1];\n            const float z2 = xyz1_data[(i*n+j2)*3+2];\n            const float g = graddist2_data[i*m+j]*2;\n            gradxyz2_data[(i*m+j)*3+0] += g*(x1-x2);\n            gradxyz2_data[(i*m+j)*3+1] += g*(y1-y2);\n            gradxyz2_data[(i*m+j)*3+2] += g*(z1-z2);\n            gradxyz1_data[(i*n+j2)*3+0] -= (g*(x1-x2));\n            gradxyz1_data[(i*n+j2)*3+1] -= (g*(y1-y2));\n            gradxyz1_data[(i*n+j2)*3+2] -= (g*(z1-z2));\n        }\n    }\n}\n\n\nPYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {\n    m.def(\"forward\", &chamfer_distance_forward, \"ChamferDistance forward\");\n    m.def(\"forward_cuda\", &chamfer_distance_forward_cuda, \"ChamferDistance forward (CUDA)\");\n    m.def(\"backward\", &chamfer_distance_backward, \"ChamferDistance backward\");\n    m.def(\"backward_cuda\", &chamfer_distance_backward_cuda, \"ChamferDistance backward (CUDA)\");\n}\n"
  },
  {
    "path": "model/conpu_v6/chamfer_distance/chamfer_distance.cu",
    "content": "#include <ATen/ATen.h>\n\n#include <cuda.h>\n#include <cuda_runtime.h>\n\n__global__ \nvoid ChamferDistanceKernel(\n\tint b,\n\tint n,\n\tconst float* xyz,\n\tint m,\n\tconst float* xyz2,\n\tfloat* result,\n\tint* result_i)\n{\n\tconst int batch=512;\n\t__shared__ float buf[batch*3];\n\tfor (int i=blockIdx.x;i<b;i+=gridDim.x){\n\t\tfor (int k2=0;k2<m;k2+=batch){\n\t\t\tint end_k=min(m,k2+batch)-k2;\n\t\t\tfor (int j=threadIdx.x;j<end_k*3;j+=blockDim.x){\n\t\t\t\tbuf[j]=xyz2[(i*m+k2)*3+j];\n\t\t\t}\n\t\t\t__syncthreads();\n\t\t\tfor (int j=threadIdx.x+blockIdx.y*blockDim.x;j<n;j+=blockDim.x*gridDim.y){\n\t\t\t\tfloat x1=xyz[(i*n+j)*3+0];\n\t\t\t\tfloat y1=xyz[(i*n+j)*3+1];\n\t\t\t\tfloat z1=xyz[(i*n+j)*3+2];\n\t\t\t\tint best_i=0;\n\t\t\t\tfloat best=0;\n\t\t\t\tint end_ka=end_k-(end_k&3);\n\t\t\t\tif (end_ka==batch){\n\t\t\t\t\tfor (int k=0;k<batch;k+=4){\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tfloat x2=buf[k*3+0]-x1;\n\t\t\t\t\t\t\tfloat y2=buf[k*3+1]-y1;\n\t\t\t\t\t\t\tfloat z2=buf[k*3+2]-z1;\n\t\t\t\t\t\t\tfloat d=x2*x2+y2*y2+z2*z2;\n\t\t\t\t\t\t\tif (k==0 || d<best){\n\t\t\t\t\t\t\t\tbest=d;\n\t\t\t\t\t\t\t\tbest_i=k+k2;\n\t\t\t\t\t\t\t}\n\t\t\t\t\t\t}\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tfloat x2=buf[k*3+3]-x1;\n\t\t\t\t\t\t\tfloat y2=buf[k*3+4]-y1;\n\t\t\t\t\t\t\tfloat z2=buf[k*3+5]-z1;\n\t\t\t\t\t\t\tfloat d=x2*x2+y2*y2+z2*z2;\n\t\t\t\t\t\t\tif (d<best){\n\t\t\t\t\t\t\t\tbest=d;\n\t\t\t\t\t\t\t\tbest_i=k+k2+1;\n\t\t\t\t\t\t\t}\n\t\t\t\t\t\t}\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tfloat x2=buf[k*3+6]-x1;\n\t\t\t\t\t\t\tfloat y2=buf[k*3+7]-y1;\n\t\t\t\t\t\t\tfloat z2=buf[k*3+8]-z1;\n\t\t\t\t\t\t\tfloat d=x2*x2+y2*y2+z2*z2;\n\t\t\t\t\t\t\tif (d<best){\n\t\t\t\t\t\t\t\tbest=d;\n\t\t\t\t\t\t\t\tbest_i=k+k2+2;\n\t\t\t\t\t\t\t}\n\t\t\t\t\t\t}\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tfloat x2=buf[k*3+9]-x1;\n\t\t\t\t\t\t\tfloat y2=buf[k*3+10]-y1;\n\t\t\t\t\t\t\tfloat z2=buf[k*3+11]-z1;\n\t\t\t\t\t\t\tfloat d=x2*x2+y2*y2+z2*z2;\n\t\t\t\t\t\t\tif (d<best){\n\t\t\t\t\t\t\t\tbest=d;\n\t\t\t\t\t\t\t\tbest_i=k+k2+3;\n\t\t\t\t\t\t\t}\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\t\t\t\t}else{\n\t\t\t\t\tfor (int k=0;k<end_ka;k+=4){\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tfloat x2=buf[k*3+0]-x1;\n\t\t\t\t\t\t\tfloat y2=buf[k*3+1]-y1;\n\t\t\t\t\t\t\tfloat z2=buf[k*3+2]-z1;\n\t\t\t\t\t\t\tfloat d=x2*x2+y2*y2+z2*z2;\n\t\t\t\t\t\t\tif (k==0 || d<best){\n\t\t\t\t\t\t\t\tbest=d;\n\t\t\t\t\t\t\t\tbest_i=k+k2;\n\t\t\t\t\t\t\t}\n\t\t\t\t\t\t}\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tfloat x2=buf[k*3+3]-x1;\n\t\t\t\t\t\t\tfloat y2=buf[k*3+4]-y1;\n\t\t\t\t\t\t\tfloat z2=buf[k*3+5]-z1;\n\t\t\t\t\t\t\tfloat d=x2*x2+y2*y2+z2*z2;\n\t\t\t\t\t\t\tif (d<best){\n\t\t\t\t\t\t\t\tbest=d;\n\t\t\t\t\t\t\t\tbest_i=k+k2+1;\n\t\t\t\t\t\t\t}\n\t\t\t\t\t\t}\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tfloat x2=buf[k*3+6]-x1;\n\t\t\t\t\t\t\tfloat y2=buf[k*3+7]-y1;\n\t\t\t\t\t\t\tfloat z2=buf[k*3+8]-z1;\n\t\t\t\t\t\t\tfloat d=x2*x2+y2*y2+z2*z2;\n\t\t\t\t\t\t\tif (d<best){\n\t\t\t\t\t\t\t\tbest=d;\n\t\t\t\t\t\t\t\tbest_i=k+k2+2;\n\t\t\t\t\t\t\t}\n\t\t\t\t\t\t}\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tfloat x2=buf[k*3+9]-x1;\n\t\t\t\t\t\t\tfloat y2=buf[k*3+10]-y1;\n\t\t\t\t\t\t\tfloat z2=buf[k*3+11]-z1;\n\t\t\t\t\t\t\tfloat d=x2*x2+y2*y2+z2*z2;\n\t\t\t\t\t\t\tif (d<best){\n\t\t\t\t\t\t\t\tbest=d;\n\t\t\t\t\t\t\t\tbest_i=k+k2+3;\n\t\t\t\t\t\t\t}\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t\tfor (int k=end_ka;k<end_k;k++){\n\t\t\t\t\tfloat x2=buf[k*3+0]-x1;\n\t\t\t\t\tfloat y2=buf[k*3+1]-y1;\n\t\t\t\t\tfloat z2=buf[k*3+2]-z1;\n\t\t\t\t\tfloat d=x2*x2+y2*y2+z2*z2;\n\t\t\t\t\tif (k==0 || d<best){\n\t\t\t\t\t\tbest=d;\n\t\t\t\t\t\tbest_i=k+k2;\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t\tif (k2==0 || result[(i*n+j)]>best){\n\t\t\t\t\tresult[(i*n+j)]=best;\n\t\t\t\t\tresult_i[(i*n+j)]=best_i;\n\t\t\t\t}\n\t\t\t}\n\t\t\t__syncthreads();\n\t\t}\n\t}\n}\n\nvoid ChamferDistanceKernelLauncher(\n    const int b, const int n,\n    const float* xyz,\n    const int m,\n    const float* xyz2,\n    float* result,\n    int* result_i,\n    float* result2,\n    int* result2_i)\n{\n\tChamferDistanceKernel<<<dim3(32,16,1),512>>>(b, n, xyz, m, xyz2, result, result_i);\n\tChamferDistanceKernel<<<dim3(32,16,1),512>>>(b, m, xyz2, n, xyz, result2, result2_i);\n\n\tcudaError_t err = cudaGetLastError();\n\tif (err != cudaSuccess)\n\t    printf(\"error in chamfer distance updateOutput: %s\\n\", cudaGetErrorString(err));\n}\n\n\n__global__ \nvoid ChamferDistanceGradKernel(\n\tint b, int n,\n\tconst float* xyz1,\n\tint m,\n\tconst float* xyz2,\n\tconst float* grad_dist1,\n\tconst int* idx1,\n\tfloat* grad_xyz1,\n\tfloat* grad_xyz2)\n{\n\tfor (int i = blockIdx.x; i<b; i += gridDim.x) {\n\t\tfor (int j = threadIdx.x + blockIdx.y * blockDim.x; j < n; j += blockDim.x*gridDim.y) {\n\t\t\tfloat x1=xyz1[(i*n+j)*3+0];\n\t\t\tfloat y1=xyz1[(i*n+j)*3+1];\n\t\t\tfloat z1=xyz1[(i*n+j)*3+2];\n\t\t\tint j2=idx1[i*n+j];\n\t\t\tfloat x2=xyz2[(i*m+j2)*3+0];\n\t\t\tfloat y2=xyz2[(i*m+j2)*3+1];\n\t\t\tfloat z2=xyz2[(i*m+j2)*3+2];\n\t\t\tfloat g=grad_dist1[i*n+j]*2;\n\t\t\tatomicAdd(&(grad_xyz1[(i*n+j)*3+0]),g*(x1-x2));\n\t\t\tatomicAdd(&(grad_xyz1[(i*n+j)*3+1]),g*(y1-y2));\n\t\t\tatomicAdd(&(grad_xyz1[(i*n+j)*3+2]),g*(z1-z2));\n\t\t\tatomicAdd(&(grad_xyz2[(i*m+j2)*3+0]),-(g*(x1-x2)));\n\t\t\tatomicAdd(&(grad_xyz2[(i*m+j2)*3+1]),-(g*(y1-y2)));\n\t\t\tatomicAdd(&(grad_xyz2[(i*m+j2)*3+2]),-(g*(z1-z2)));\n\t\t}\n\t}\n}\n\nvoid ChamferDistanceGradKernelLauncher(\n    const int b, const int n,\n    const float* xyz1,\n    const int m,\n    const float* xyz2,\n    const float* grad_dist1,\n    const int* idx1,\n    const float* grad_dist2,\n    const int* idx2,\n    float* grad_xyz1,\n    float* grad_xyz2)\n{\n\tcudaMemset(grad_xyz1, 0, b*n*3*4);\n\tcudaMemset(grad_xyz2, 0, b*m*3*4);\n\tChamferDistanceGradKernel<<<dim3(1,16,1), 256>>>(b, n, xyz1, m, xyz2, grad_dist1, idx1, grad_xyz1, grad_xyz2);\n\tChamferDistanceGradKernel<<<dim3(1,16,1), 256>>>(b, m, xyz2, n, xyz1, grad_dist2, idx2, grad_xyz2, grad_xyz1);\n\n\tcudaError_t err = cudaGetLastError();\n  \tif (err != cudaSuccess)\n\t    printf(\"error in chamfer distance get grad: %s\\n\", cudaGetErrorString(err));\n}\n"
  },
  {
    "path": "model/conpu_v6/chamfer_distance/chamfer_distance.py",
    "content": "\nimport torch\n\nfrom torch.utils.cpp_extension import load\ncd = load(name=\"cd\",\n          sources=[\"chamfer_distance/chamfer_distance.cpp\",\n                   \"chamfer_distance/chamfer_distance.cu\"],\n                   extra_cflags=['-g'])\n\nclass ChamferDistanceFunction(torch.autograd.Function):\n    @staticmethod\n    def forward(ctx, xyz1, xyz2):\n        batchsize, n, _ = xyz1.size()\n        _, m, _ = xyz2.size()\n        xyz1 = xyz1.contiguous()\n        xyz2 = xyz2.contiguous()\n        dist1 = torch.zeros(batchsize, n)\n        dist2 = torch.zeros(batchsize, m)\n\n        idx1 = torch.zeros(batchsize, n, dtype=torch.int)\n        idx2 = torch.zeros(batchsize, m, dtype=torch.int)\n        if not xyz1.is_cuda:\n            cd.forward(xyz1, xyz2, dist1, dist2, idx1, idx2)\n        else:\n            dist1 = dist1.to(xyz1.device)\n            dist2 = dist2.to(xyz1.device)\n            idx1 = idx1.to(xyz1.device)\n            idx2 = idx2.to(xyz1.device)\n            \n            cd.forward_cuda(xyz1, xyz2, dist1, dist2, idx1, idx2)\n        ctx.save_for_backward(xyz1, xyz2, idx1, idx2)\n\n        return dist1, dist2\n\n    @staticmethod\n    def backward(ctx, graddist1, graddist2):\n        xyz1, xyz2, idx1, idx2 = ctx.saved_tensors\n\n        graddist1 = graddist1.contiguous()\n        graddist2 = graddist2.contiguous()\n\n        gradxyz1 = torch.zeros(xyz1.size())\n        gradxyz2 = torch.zeros(xyz2.size())\n\n        if not graddist1.is_cuda:\n            cd.backward(xyz1, xyz2, gradxyz1, gradxyz2, graddist1, graddist2, idx1, idx2)\n        else:\n            gradxyz1 = gradxyz1.to(graddist1.device)\n            gradxyz2 = gradxyz2.to(graddist1.device)\n            cd.backward_cuda(xyz1, xyz2, gradxyz1, gradxyz2, graddist1, graddist2, idx1, idx2)\n\n        return gradxyz1, gradxyz2\n\n\nclass ChamferDistance(torch.nn.Module):\n    def forward(self, xyz1, xyz2):\n        return ChamferDistanceFunction.apply(xyz1, xyz2)\n"
  },
  {
    "path": "model/conpu_v6/chamfer_distance/setup.py",
    "content": "from setuptools import setup\nfrom torch.utils.cpp_extension import BuildExtension, CUDAExtension\n\nsetup(\n    name='chamferdis',\n    ext_modules=[\n        CUDAExtension('chamferdis', [\n            'chamfer_distance.cpp',\n            'chamfer_distance.cu',\n        ],\n                extra_compile_args=['-g']),\n    ],\n    cmdclass={\n        'build_ext': BuildExtension\n    })\n"
  },
  {
    "path": "model/conpu_v6/loss.py",
    "content": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport numpy as np\nimport scipy.ndimage\nimport sys\nimport time\nsys.path.append('../')\nsys.path.append('../../')\nsys.path.append('../../../')\nsys.path.append('../../code/')\nimport cv2 as cv\nfrom PIL import Image\nfrom chamfer_distance import ChamferDistance\nchamfer_dist = ChamferDistance()\nimport glob\nimport trimesh\nimport random\nimport numpy as np\nimport math\nfrom math import ceil\nimport time\nimport cv2\nfrom PIL import Image\n#from options import TestOptions\n#import trimesh\nimport struct\nimport pickle\nfrom pointnet2 import pointnet2_utils as pn2_utils\n\n\nimport torch_tensor_functions\n\nclass Loss(nn.Module):\n    def __init__(self, args):\n        super(Loss, self).__init__()\n        self.args = args\n        \n    def loss_on_cd(self, deformation_p, p1):\n        thisbatchsize = deformation_p.size()[0]\n        output = 0\n        dist1, dist2 = chamfer_dist(deformation_p, p1)\n        output += (torch.sum(dist1) + torch.sum(dist2))*0.5\n        return output/thisbatchsize\n    \n    def loss_on_proj(self, p0, p1):\n        # p0 : B, M, 3\n        # p1 : B, N, 3\n        thisbatchsize = p0.size()[0]\n        output = 0\n        dis_map = torch_tensor_functions.compute_sqrdis_map(p0, p1)   # B, M, N\n\n        neighbour_id_01 = torch.topk(dis_map, k=5, dim=-1, largest= False)[1]\n        neighbour_dis_01 = torch.topk(dis_map, k=5, dim=-1, largest= False)[0]\n        neighbour_id_01 = neighbour_id_01[:,:,1:]\n        neighbour_coor_01 = torch_tensor_functions.indexing_neighbor(p1, neighbour_id_01)\n        neighbour_dis_01 = neighbour_dis_01[:,:,1:]\n        neighbour_weight_01 = neighbour_dis_01.detach() * 1000\n        neighbour_weight_01 = torch.exp(-1*neighbour_weight_01)\n        neighbour_weight_01 = neighbour_weight_01/(torch.sum(neighbour_weight_01, dim=-1, keepdim=True)+0.00001)\n        dis_01 = p0.view(thisbatchsize,-1,1,3) - neighbour_coor_01\n        dis_01 = torch.sum(torch.mul(dis_01, dis_01), dim=-1, keepdim=False)\n        pro_dis_01 = torch.mul(neighbour_weight_01, dis_01)\n        output += 0.5 * torch.sum(pro_dis_01)\n\n        neighbour_id_10 = torch.topk(dis_map, k=5, dim=1, largest= False)[1].transpose(2,1)\n        neighbour_dis_10 = torch.topk(dis_map, k=5, dim=1, largest= False)[0].transpose(2,1)\n        neighbour_id_10 = neighbour_id_10[:,:,1:]\n        neighbour_coor_10 = torch_tensor_functions.indexing_neighbor(p0, neighbour_id_10)\n        neighbour_dis_10 = neighbour_dis_10[:,:,1:]\n        neighbour_weight_10 = neighbour_dis_10.detach() * 1000\n        neighbour_weight_10 = torch.exp(-1*neighbour_weight_10)\n        neighbour_weight_10 = neighbour_weight_10/(torch.sum(neighbour_weight_10, dim=-1, keepdim=True)+0.00001)\n        dis_10 = p1.view(thisbatchsize,-1,1,3) - neighbour_coor_10\n        dis_10 = torch.sum(torch.mul(dis_10, dis_10), dim=-1, keepdim=False)\n        pro_dis_10 = torch.mul(neighbour_weight_10, dis_10)\n        output += 0.5 * torch.sum(pro_dis_10)\n\n        return output/thisbatchsize\n\n    \n    def loss_on_normal(self, p0, p1, n0, n1):\n        # p0 : B, M, 3 ; n0 : B, M, 3\n        # p1 : B, N, 3 ; n1 : B, N, 3\n        thisbatchsize = p0.size()[0]\n        output = 0\n        dis_map = torch_tensor_functions.compute_sqrdis_map(p0, p1)   # B, M, N\n\n        neighbour_id_01 = torch.topk(dis_map, k=5, dim=-1, largest= False)[1]\n        neighbour_dis_01 = torch.topk(dis_map, k=5, dim=-1, largest= False)[0]\n        neighbour_id_01 = neighbour_id_01[:,:,1:]\n        neighbour_normal_01 = torch_tensor_functions.indexing_neighbor(n1, neighbour_id_01)\n        neighbour_dis_01 = neighbour_dis_01[:,:,1:]\n        neighbour_weight_01 = neighbour_dis_01.detach() * 1000\n        neighbour_weight_01 = torch.exp(-1*neighbour_weight_01)\n        neighbour_weight_01 = neighbour_weight_01/(torch.sum(neighbour_weight_01,   dim=-1, keepdim=True)+0.00001)\n        dis_01 = n0.view(thisbatchsize,-1,1,3) - neighbour_normal_01\n        dis_01 = torch.sum(torch.mul(dis_01, dis_01), dim=-1, keepdim=False)\n        dis_01_ = n0.view(thisbatchsize,-1,1,3) + neighbour_normal_01\n        dis_01_ = torch.sum(torch.mul(dis_01_, dis_01_), dim=-1, keepdim=False)\n        bar_ = torch.sign(dis_01 - dis_01_)\n        dis_01_min = torch.mul((bar_+1)*0.5, dis_01_) + torch.mul((1-bar_)*0.5, dis_01)\n        dis_01_min = torch.mul(neighbour_weight_01, dis_01_min)\n        output += 0.5 * torch.sum(dis_01_min)\n\n        return output/thisbatchsize\n    \n    def loss_on_reg(self, gen_points_batch, train_points_sparse_batch):\n        thisbatchsize = gen_points_batch.size()[0]\n        output = 0\n        up_ratio_here = gen_points_batch.size()[1]//train_points_sparse_batch.size()[1]\n        gen_points_batch_ = gen_points_batch.view(thisbatchsize,-1,up_ratio_here,3)\n        train_points_sparse_batch_ = train_points_sparse_batch.view(thisbatchsize,-1,1,3)\n        dis = train_points_sparse_batch_ - gen_points_batch_\n        squdis = torch.sum(torch.mul(dis,dis),dim=-1,keepdim=True)\n        squdis_bar = squdis.detach()*0+0.04\n        squdis_sign = torch.sign(squdis.detach() - squdis_bar)*0.5+1\n        squdis = torch.mul(squdis,squdis_sign)\n        output += torch.sum(squdis)\n        return output/thisbatchsize\n    \n    def loss_on_arap(self, gen_points_batch, uv_sampling_coors):\n        thisbatchsize = gen_points_batch.size()[0]\n        output = 0\n        gen_points_batch_ = gen_points_batch.reshape(thisbatchsize*self.args.num_point, -1 ,3)\n        uv_sampling_coors_ = uv_sampling_coors.reshape(thisbatchsize*self.args.num_point, -1 ,2).detach()\n        uv_sampling_coors_ = torch.cat((uv_sampling_coors_, uv_sampling_coors_[:,:,:1]),dim=-1)\n        uv_sampling_coors_[:,:,2:]*=0\n        neighbour_indexes = torch_tensor_functions.get_neighbor_index(uv_sampling_coors_, 4) \n        uv_neibour_points_ = torch_tensor_functions.indexing_neighbor(uv_sampling_coors_, neighbour_indexes)\n        gen_neibour_points_ = torch_tensor_functions.indexing_neighbor(gen_points_batch_, neighbour_indexes)\n        uv_dis = uv_neibour_points_ - uv_sampling_coors_.view(thisbatchsize*self.args.num_point, -1 ,1, 3)\n        gen_dis = gen_neibour_points_ - gen_points_batch_.view(thisbatchsize*self.args.num_point, -1 ,1, 3)\n        uv_squ_dis = torch.sqrt( torch.sum(torch.mul(uv_dis, uv_dis),dim=-1) + 0.00000001 )\n        gen_squ_dis = torch.sqrt( torch.sum(torch.mul(gen_dis, gen_dis),dim=-1) + 0.00000001 )\n        uv_sum_dis = torch.sum(uv_squ_dis)\n        gen_sum_dis = torch.sum(gen_squ_dis).detach()\n        uv_squ_dis *= gen_sum_dis / uv_sum_dis\n        delta = uv_squ_dis - gen_squ_dis\n        output += torch.sum(torch.mul(delta, delta))\n        return output/thisbatchsize\n\n    def loss_on_overlap(self, gen_points_batch, train_points_sparse_batch):\n        thisbatchsize = gen_points_batch.size()[0]\n        output = 0\n        gen_points_batch_ = gen_points_batch.reshape(thisbatchsize*self.args.num_point, -1 ,3)\n        neighbour_indexes = torch_tensor_functions.get_neighbor_index(train_points_sparse_batch, 6) \n        sparse_neibour_points_ = torch_tensor_functions.indexing_neighbor(train_points_sparse_batch, neighbour_indexes)\n        sparse_neibour_points_ = sparse_neibour_points_.reshape(thisbatchsize*self.args.num_point, -1, 3)\n        cross_dis = torch_tensor_functions.compute_sqrdis_map(sparse_neibour_points_, gen_points_batch_)\n        dis = torch.sum(torch.min(cross_dis,dim=-1)[0])\n        output += dis\n        return output/thisbatchsize\n\n\n    def loss_on_ndirection(self, gen_points_batch, uv_sampling_coors, gen_normals_batch):\n        thisbatchsize = gen_points_batch.size()[0]\n        output = 0\n        # gen_points_batch_ = gen_points_batch.reshape(thisbatchsize*self.args.num_point, -1 ,3)\n        gen_normals_batch_ = gen_normals_batch.reshape(thisbatchsize*self.args.num_point, -1 ,3)\n        uv_sampling_coors_ = uv_sampling_coors.reshape(thisbatchsize*self.args.num_point, -1 ,2).detach()\n        uv_sampling_coors_ = torch.cat((uv_sampling_coors_, uv_sampling_coors_[:,:,:1]),dim=-1)\n        uv_sampling_coors_[:,:,2:]*=0\n        neighbour_indexes = torch_tensor_functions.get_neighbor_index(uv_sampling_coors_, 4) \n        uv_neibour_points_ = torch_tensor_functions.indexing_neighbor(uv_sampling_coors_, neighbour_indexes)\n        # gen_neibour_points_ = torch_tensor_functions.indexing_neighbor(gen_points_batch_, neighbour_indexes)\n        gen_neibour_normals_ = torch_tensor_functions.indexing_neighbor(gen_normals_batch_, neighbour_indexes)\n        gen_normals_batch_ = gen_normals_batch_.view(thisbatchsize*self.args.num_point, -1 ,1, 3)\n        gen_neibour_normals_delta_ = gen_neibour_normals_ - gen_normals_batch_\n        gen_neibour_normals_delta_squ = torch.mul(gen_neibour_normals_delta_, gen_neibour_normals_delta_)\n\n        normals_delta_squ_bar = gen_neibour_normals_delta_squ.detach()*0+1\n        normals_delta_squ_sign = torch.sign(gen_neibour_normals_delta_squ.detach() - normals_delta_squ_bar)*0.5+1\n        gen_neibour_normals_delta_squ = torch.mul(gen_neibour_normals_delta_squ, normals_delta_squ_sign)\n\n        output += torch.sum(gen_neibour_normals_delta_squ)\n        \n        return output/thisbatchsize\n\n\n    \n    def forward(self, gen_points_batch, gen_normals_batch, uv_sampling_coors, train_points_sparse_batch, train_normals_sparse_batch, train_points_dense_batch, train_normals_dense_batch):\n        thisbatchsize = gen_points_batch.size()[0]\n        loss = torch.mean(torch.zeros((1),dtype = torch.float, device=gen_points_batch.device))\n        zero_tensor = torch.mean(torch.zeros((1),dtype = torch.float, device=gen_points_batch.device))\n        loss_stages=[]\n        \n        if self.args.weight_cd > 0:\n            # L^{cd}  # n*3, n*3\n            loss_cd = 0 \n            loss_cd += self.loss_on_cd(gen_points_batch, train_points_dense_batch)\n            loss += loss_cd * self.args.weight_cd\n            loss_stages.append(loss_cd)\n        else:\n            loss_stages.append(zero_tensor)    \n\n        if self.args.weight_reg > 0:\n            # L^{reg}  # n*3, n*3\n            loss_reg = 0 \n            loss_reg += self.loss_on_reg(gen_points_batch, train_points_sparse_batch)\n            loss += loss_reg * self.args.weight_reg\n            loss_stages.append(loss_reg)\n        else:\n            loss_stages.append(zero_tensor) \n\n        if self.args.weight_arap > 0:\n            # L^{arap}  # \n            loss_arap = 0 \n            loss_arap += self.loss_on_arap(gen_points_batch, uv_sampling_coors)\n            loss += loss_arap * self.args.weight_arap\n            loss_stages.append(loss_arap)\n        else:\n            loss_stages.append(zero_tensor) \n\n\n        if self.args.weight_overlap > 0:\n            # L^{overlap}  # \n            loss_overlap = 0 \n            loss_overlap += self.loss_on_overlap(gen_points_batch, train_points_sparse_batch)\n            loss += loss_overlap * self.args.weight_overlap\n            loss_stages.append(loss_overlap)\n        else:\n            loss_stages.append(zero_tensor) \n           \n        \n        if self.args.weight_proj > 0:\n            # L^{proj}  # \n            loss_proj = 0 \n            loss_proj += self.loss_on_proj(gen_points_batch, train_points_dense_batch)\n            loss += loss_proj * self.args.weight_proj\n            loss_stages.append(loss_proj)\n        else:\n            loss_stages.append(zero_tensor) \n\n        if self.args.weight_normal > 0:\n            # L^{normal}  # \n            loss_normal = 0 \n            loss_normal += self.loss_on_normal(gen_points_batch, train_points_dense_batch, gen_normals_batch, train_normals_dense_batch)\n            loss += loss_normal * self.args.weight_normal\n            loss_stages.append(loss_normal)\n        else:\n            loss_stages.append(zero_tensor) \n\n\n        if self.args.weight_ndirection > 0:\n            # L^{ndirection}  # \n            loss_ndirection = 0 \n            loss_ndirection += self.loss_on_ndirection(gen_points_batch, uv_sampling_coors, gen_normals_batch)\n            loss += loss_ndirection * self.args.weight_ndirection\n            loss_stages.append(loss_ndirection)\n        else:\n            loss_stages.append(zero_tensor) \n           \n            \n        return loss, loss_stages\n"
  },
  {
    "path": "model/conpu_v6/network.py",
    "content": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport torch.optim as optim\nfrom torchvision import datasets, transforms\nfrom  torch.utils.data import DataLoader\nimport torch.optim.lr_scheduler as lr_scheduler\nfrom torch.autograd import grad\nimport math\nimport numpy as np\nimport torch.nn.init as init\nimport struct\nimport os\nimport sys\nimport glob\nimport h5py\nimport copy\nsys.path.append('../')\nsys.path.append('../../')\nsys.path.append('../../code')\nimport igl\nfrom torch_scatter import scatter\nfrom torch_geometric.utils import to_dense_batch\nimport torch_tensor_functions\nimport mesh_operations\nfrom pointnet2 import pointnet2_utils as pn2_utils\n#from chamfer_distance import ChamferDistance\n#chamfer_dist = ChamferDistance()\n\n\n######## TODO: START PART: FUNCTIONS ABOUT DGCNN. IT IS USED AS THE FEATURE EXTRACTOR IN OUR FRAMEWORK. ########\n#### The DGCNN network ####\nclass DGCNN_multi_knn_c5(nn.Module):\n    def __init__(self, emb_dims=512, args=None):\n        super(DGCNN_multi_knn_c5, self).__init__()\n        self.args = args\n        self.conv1 = nn.Conv2d(6, 64, kernel_size=1, bias=False)\n        init.xavier_normal_(self.conv1.weight, gain=1.0)\n        self.conv2 = nn.Conv2d(64*2, 64, kernel_size=1, bias=False)\n        init.xavier_normal_(self.conv2.weight, gain=1.0)\n        self.conv3 = nn.Conv2d(64*2, 128, kernel_size=1, bias=False)\n        init.xavier_normal_(self.conv3.weight, gain=1.0)\n        self.conv4 = nn.Conv2d(128*2, 256, kernel_size=1, bias=False)\n        init.xavier_normal_(self.conv4.weight, gain=1.0)\n        self.conv5 = nn.Conv2d(512, emb_dims, kernel_size=1, bias=False)\n        init.xavier_normal_(self.conv5.weight, gain=1.0)\n        self.bn1 = nn.BatchNorm2d(64)\n        self.bn2 = nn.BatchNorm2d(64)\n        self.bn3 = nn.BatchNorm2d(128)\n        self.bn4 = nn.BatchNorm2d(256)\n        self.bn5 = nn.BatchNorm2d(emb_dims)\n    def forward(self, x, if_relu_atlast = False):\n        batch_size, num_dims, num_points = x.size()\n        x = get_graph_feature(x) # This sub model get the graph-based features for the following 2D convs\n        # The x is similar with 2D image\n        if self.args.if_bn == True: x = F.relu(self.bn1(self.conv1(x)))\n        else: x = F.relu(self.conv1(x))\n        x1 = x.max(dim=-1, keepdim=False)[0]\n        x = get_graph_feature(x1)\n        if self.args.if_bn == True: x = F.relu(self.bn2(self.conv2(x))) \n        else: x = F.relu(self.conv2(x))\n        x2 = x.max(dim=-1, keepdim=False)[0]\n        x = get_graph_feature(x2)\n        if self.args.if_bn == True: x = F.relu(self.bn3(self.conv3(x))) \n        else: x = F.relu(self.conv3(x))\n        x3 = x.max(dim=-1, keepdim=False)[0]\n        x = get_graph_feature(x3)\n        if self.args.if_bn == True: x = F.relu(self.bn4(self.conv4(x))) \n        else: x = F.relu(self.conv4(x))\n        x4 = x.max(dim=-1, keepdim=False)[0]\n        x = torch.cat((x1, x2, x3, x4), dim=1).unsqueeze(3)\n        if if_relu_atlast == False:\n            return torch.tanh(self.conv5(x)).view(batch_size, -1, num_points)\n        x = F.relu(self.conv5(x)).view(batch_size, -1, num_points)\n        return x\n#### The knn function used in graph_feature ####\ndef knn(x, k):\n    inner = -2 * torch.matmul(x.transpose(2, 1).contiguous(), x)\n    xx = torch.sum(x ** 2, dim=1, keepdim=True)\n    pairwise_distance = -xx - inner - xx.transpose(2, 1).contiguous()\n    idx = pairwise_distance.topk(k=k, dim=-1)[1]  # (batch_size, num_points, k)\n    return idx\n#### The edge_feature used in DGCNN ####\ndef get_graph_feature(x, k=4):\n    idx = knn(x, k=k)  # (batch_size, num_points, k)\n    batch_size, num_points, _ = idx.size()\n    device = torch.device('cuda')\n    idx_base = torch.arange(0, batch_size, device=device).view(-1, 1, 1) * num_points\n    idx = idx + idx_base\n    idx = idx.view(-1)\n    _, num_dims, _ = x.size()\n    x = x.transpose(2,1).contiguous()  # (batch_size, num_points, num_dims)  -> (batch_size*num_points, num_dims) #   batch_size * num_points * k + range(0, batch_size*num_points)\n    feature = x.view(batch_size * num_points, -1)[idx, :]\n    feature = feature.view(batch_size, num_points, k, num_dims)\n    x = x.view(batch_size, num_points, 1, num_dims).repeat(1, 1, k, 1)\n    feature = torch.cat((feature, x), dim=3).permute(0, 3, 1, 2)\n    return feature\n######## TODO: END PART: FUNCTIONS ABOUT DGCNN. IT IS USED AS THE FEATURE EXTRACTOR IN OUR FRAMEWORK. ########\n\n######## TODO: START PART: NEURAL IMPLICIT FUNCTION, MLP with ReLU. ########\n#### Construct the neural implicit function. ####\nclass MLPNet_relu(torch.nn.Module):\n    \"\"\" Multi-layer perception.\n        [B, Cin, N] -> [B, Cout, N] or\n        [B, Cin] -> [B, Cout]\n    \"\"\"\n    def __init__(self, nch_input, nch_layers, b_shared=True, bn_momentum=0.1, dropout=0.0, if_bn = True):\n        super().__init__()\n        list_layers = mlp_layers_relu(nch_input, nch_layers, b_shared, bn_momentum, dropout, if_bn)\n        self.layers = torch.nn.Sequential(*list_layers)\n    def forward(self, inp):\n        out = self.layers(inp)\n        return out\n#### Construct the mlp_layers of the neural implicit function. ####\ndef mlp_layers_relu(nch_input, nch_layers, b_shared=True, bn_momentum=0.1, dropout=0.0, if_bn=True):\n    \"\"\" [B, Cin, N] -> [B, Cout, N] or\n        [B, Cin] -> [B, Cout]\n    \"\"\"\n    layers = []\n    last = nch_input\n    for i, outp in enumerate(nch_layers):\n        if b_shared:\n            weights = torch.nn.Conv1d(last, outp, 1)\n            init.xavier_normal_(weights.weight, gain=1.0)\n            # if i==0: init.uniform_(weights.weight, a=-(6/last)**0.5*30, b=(6/last)**0.5*30)\n            # else: init.uniform_(weights.weight, a=-(6/last)**0.5, b=(6/last)**0.5)\n        else:\n            weights = torch.nn.Linear(last, outp)\n            init.xavier_normal_(weights.weight, gain=1.0)\n        layers.append(weights)\n        if if_bn==True:\n            layers.append(torch.nn.BatchNorm1d(outp, momentum=bn_momentum))\n        layers.append(torch.nn.ReLU())\n        # layers.append(Sine())\n        if b_shared == False and dropout > 0.0:\n            layers.append(torch.nn.Dropout(dropout))\n        last = outp\n    return layers\n######## TODO: END PART: NEURAL IMPLICIT FUNCTION, MLP with ReLU. ########\n\n\n######## TODO: START PART: NEURAL IMPLICIT FUNCTION, MLP with SIREN. ########\n#### Construct the neural implicit function. ####\nclass MLPNet(torch.nn.Module):\n    \"\"\" Multi-layer perception.\n        [B, Cin, N] -> [B, Cout, N] or\n        [B, Cin] -> [B, Cout]\n    \"\"\"\n    def __init__(self, nch_input, nch_layers, b_shared=True, bn_momentum=0.1, dropout=0.0, if_bn = True):\n        super().__init__()\n        list_layers = mlp_layers(nch_input, nch_layers, b_shared, bn_momentum, dropout, if_bn)\n        self.layers = torch.nn.Sequential(*list_layers)\n    def forward(self, inp):\n        out = self.layers(inp)\n        return out\n#### Construct the mlp_layers of the neural implicit function. ####\ndef mlp_layers(nch_input, nch_layers, b_shared=True, bn_momentum=0.1, dropout=0.0, if_bn=True):\n    \"\"\" [B, Cin, N] -> [B, Cout, N] or\n        [B, Cin] -> [B, Cout]\n    \"\"\"\n    layers = []\n    last = nch_input\n    for i, outp in enumerate(nch_layers):\n        if b_shared:\n            weights = torch.nn.Conv1d(last, outp, 1)\n            #init.xavier_normal_(weights.weight, gain=1.0)\n            if i==0: init.uniform_(weights.weight, a=-(6/last)**0.5*30, b=(6/last)**0.5*30)\n            else: init.uniform_(weights.weight, a=-(6/last)**0.5, b=(6/last)**0.5)\n        else:\n            weights = torch.nn.Linear(last, outp)\n            init.xavier_normal_(weights.weight, gain=1.0)\n        layers.append(weights)\n        if if_bn==True:\n            layers.append(torch.nn.BatchNorm1d(outp, momentum=bn_momentum))\n        #layers.append(torch.nn.ReLU())\n        layers.append(Sine())\n        if b_shared == False and dropout > 0.0:\n            layers.append(torch.nn.Dropout(dropout))\n        last = outp\n    return layers\n#### The nn.Moudle Sine, as the activation function, used in the nearal implicit function. ####\nclass Sine(nn.Module):\n    def __init(self):\n        super().__init__()\n    def forward(self, input):\n        return torch.sin(input)\n######## TODO: END PART: NEURAL IMPLICIT FUNCTION, MLP with SIREN. ########\n\n\n######## TODO: START PART: OUR OWN NETWORK ########\n#### The main network ####\nclass Net_conpu_v7(nn.Module):\n    def __init__(self, args):\n        super(Net_conpu_v7, self).__init__()\n        # basic settings\n        self.args = args # the args\n        self.emb_dims = args.emb_dims # the dim of the embedded feture\n        self.up_ratio = -1 # the upsampling factor\n        self.over_sampling_up_ratio = -1 # the scale of over-sampling\n        self.mlp_fitting_str = self.args.mlp_fitting_str\n        self.mlp_fitting = convert_str_2_list(self.mlp_fitting_str) # the channels of the layers in the MLP\n        ######################## START PART : LAYERS #########################\n        ## 1. The point-wise feature extraction, DGCNN.\n        self.emb_nn_sparse = DGCNN_multi_knn_c5(emb_dims=self.emb_dims, args=self.args) # the DGCNN backbone, which is shared by all the local parts\n        ## 2. The Neural Field, MLP.\n        if self.args.if_use_siren==True: self.fitting_mlp = MLPNet(2*self.emb_dims+(self.args.pe_out_L*4+2), self.mlp_fitting, b_shared=True, if_bn =False).layers\n        else: self.fitting_mlp = MLPNet_relu(2*self.emb_dims+(self.args.pe_out_L*4+2), self.mlp_fitting, b_shared=True, if_bn =False).layers   \n        self.reconstruct_out_p = torch.nn.Conv1d(self.mlp_fitting[-1], 3, 1)\n        init.xavier_normal_(self.reconstruct_out_p.weight, gain=1.0)\n        self.convert_feature_to_point_2to3 = torch.nn.Sequential(self.fitting_mlp, self.reconstruct_out_p)   # the Neural Field Fuction (MLP) \n        ######################## END PART : LAYERS #########################\n    \n    def forward(self, points_sparse):\n        # The input [points_sparse] should be in shape (thisbatchsize, self.args.num_point, 3)\n        thisbatchsize = points_sparse.size()[0]\n        neighbour_indexes_ = torch_tensor_functions.get_neighbor_index(points_sparse, self.args.feature_unfolding_nei_num)   # thisbatchsize, self.args.num_point, neighbor_num\n        ######### How to set the uv_sampling_coors ?\n        #### We DON'T NEED TO give the network the uv_sampling_coors, it would be computed automatically. And the up_ratio should be training_up_ratio/testing_up_ratio, depending on self.training.\n        uv_sampling_coors=torch.ones([1]).float().cuda()\n        if self.training == True : self.up_ratio = self.args.training_up_ratio\n        else : self.up_ratio = self.args.testing_up_ratio\n        self.over_sampling_up_ratio = int(self.up_ratio * self.args.over_sampling_scale)\n        if self.args.if_fix_sample == True: uv_sampling_coors = fix_sample(thisbatchsize, self.args.num_point, self.over_sampling_up_ratio)\n        else: \n            uv_sampling_coors_1 = uniform_random_sample(thisbatchsize, self.args.num_point, self.over_sampling_up_ratio-4)\n            uv_sampling_coors_2 = fix_sample(thisbatchsize, self.args.num_point, 4)\n            uv_sampling_coors_ = torch.cat((uv_sampling_coors_1, uv_sampling_coors_2), dim=2) \n            uv_sampling_coors = copy.deepcopy(uv_sampling_coors_.detach())\n        uv_sampling_coors = uv_sampling_coors.detach().contiguous()   # thisbatchsize, self.args.num_point, self.over_sampling_up_ratio, 2\n        uv_sampling_coors.requires_grad=True\n        ######### Set the uv_sampling_coors, Done.\n\n        # compute the point-wise feature, updated with local pooling\n        neighbour_indexes_feature_extract = torch_tensor_functions.get_neighbor_index(points_sparse, self.args.neighbor_k)   # bs, vertice_num, neighbor_num\n        points_in_local_patch_form = torch_tensor_functions.indexing_by_id(points_sparse,neighbour_indexes_feature_extract)\n        points_in_local_patch_form = points_in_local_patch_form - points_sparse.view(thisbatchsize,self.args.num_point,1,3)\n        points_in_local_patch_form = points_in_local_patch_form.view(thisbatchsize*self.args.num_point, self.args.neighbor_k, 3)\n        sparse_embedding = self.emb_nn_sparse(points_in_local_patch_form.transpose(1,2))  # B*num_point, self.emb_dims, self.neighbor_k\n        sparse_embedding = torch.max(sparse_embedding,dim=-1,keepdim=False)[0].view(thisbatchsize,self.args.num_point,-1).permute(0,2,1)\n        local_features_pooling = torch_tensor_functions.indexing_neighbor(sparse_embedding.transpose(1,2), neighbour_indexes_).permute(0,3,2,1)\n        local_features_pooling = torch.max(local_features_pooling, dim=2, keepdim=False)[0]\n        sparse_embedding = torch.cat((sparse_embedding,local_features_pooling),dim=1)\n        sparse_embedding = sparse_embedding.permute(0,2,1)  # thisbatchsize, self.args.num_point, self.emb_dims*2\n        \n\n        # get the uv_sampling_coors_id_in_sparse\n        uv_sampling_coors_id_in_sparse = torch.arange(self.args.num_point).view(1,-1,1).long()\n        uv_sampling_coors_id_in_sparse = uv_sampling_coors_id_in_sparse.expand(thisbatchsize,-1,self.over_sampling_up_ratio).reshape(thisbatchsize,-1,1)\n        upsampled_p, upsampled_np = self.convert_uv_to_xyzn(uv_sampling_coors.reshape(thisbatchsize,-1,2), uv_sampling_coors_id_in_sparse, sparse_embedding, points_sparse) # thisbatchsize, self.args.num_point*self.over_sampling_up_ratio, 3\n        \n\n        upsampled_p_fps_id = pn2_utils.furthest_point_sample(upsampled_p.contiguous(), self.up_ratio*self.args.num_point)\n        querying_points_3d = pn2_utils.gather_operation(upsampled_p.permute(0, 2, 1).contiguous(), upsampled_p_fps_id)\n        querying_points_n_3d = pn2_utils.gather_operation(upsampled_np.permute(0, 2, 1).contiguous(), upsampled_p_fps_id)\n        querying_points_3d = querying_points_3d.permute(0,2,1).contiguous()\n        querying_points_n_3d = querying_points_n_3d.permute(0,2,1).contiguous()\n\n        # Get the final upsampled points from the 3D querying points\n        glued_points, glued_normals = self.project_3d_query_point_to_patches(querying_points_3d, querying_points_n_3d, points_sparse, upsampled_p, upsampled_np)\n\n        \n\n        \n\n        # Notice that the returned uv_sampling_coors is not differentiable, just used to compute the loss.\n        return upsampled_p, upsampled_np, uv_sampling_coors, querying_points_3d, querying_points_n_3d, glued_points, glued_normals\n\n    def project_3d_query_point_to_patches(self, querying_points_3d, querying_points_n_3d, points_sparse, upsampled_p, upsampled_np):\n        # All3dQueryPointNum = self.args.num_point * self.up_ratio\n        # All2dQueryPointNum = self.args.num_point * self.over_sampling_up_ratio\n        # querying_points_3d     | should be in size : thisbatchsize, All3dQueryPointNum, 3\n        # querying_points_n_3d   | should be in size : thisbatchsize, All3dQueryPointNum, 3\n        # points_sparse          | should be in size : thisbatchsize, self.args.num_point, 3\n        # upsampled_p            | should be in size : thisbatchsize, All2dQueryPointNum, 3\n        # upsampled_np           | should be in size : thisbatchsize, All2dQueryPointNum, 3\n        \n        thisbatchsize = querying_points_3d.size()[0]\n        All3dQueryPointNum = querying_points_3d.size()[1]\n        All2dQueryPointNum = upsampled_p.size()[1]\n        #### Distribute the 3d querying points to the center points. ####\n        # 1. compute the distance map bwtween 3d querying points and center points. \n        querying_points_3d__center_p__dismap = torch_tensor_functions.compute_sqrdis_map(querying_points_3d, points_sparse)    # thisbatchsize, All3dQueryPointNum, self.args.num_point\n        # 2. find the neighbour ID from the 3d querying points to the center points.\n        querying_points_3d_distribute_to_centers_nei_id = torch.topk(querying_points_3d__center_p__dismap, k=self.args.glue_neighbor, dim=2, largest=False)[1] # thisbatchsize, All3dQueryPointNum, self.args.glue_neighbor\n        # 3. find the nearest distance from the 3d querying points to the center points.\n        querying_points_3d_distribute_to_centers_nei_dis = torch.topk(querying_points_3d__center_p__dismap, k=self.args.glue_neighbor, dim=2, largest=False)[0].detach() # thisbatchsize, All3dQueryPointNum, self.args.glue_neighbor \n        # 4. find the nearest points coordinates from the 3d querying points to the center points.\n        querying_points_3d_distribute_to_centers_nei_coor = torch_tensor_functions.indexing_by_id(points_sparse, querying_points_3d_distribute_to_centers_nei_id) # thisbatchsize, All3dQueryPointNum, self.args.glue_neighbor, 3 \n        # 5. compute the weight of the 3d querying points distributed to their neighbour center points. \n        Alpha_glue = 1.0/torch.mean(querying_points_3d_distribute_to_centers_nei_dis) \n        querying_points_3d_distribute_to_centers_nei_weight = torch.exp( -1 * Alpha_glue * querying_points_3d_distribute_to_centers_nei_dis )\n        querying_points_3d_distribute_to_centers_nei_weight = querying_points_3d_distribute_to_centers_nei_weight / (torch.sum(querying_points_3d_distribute_to_centers_nei_weight,dim=-1,keepdim=True)+0.0000001)  # thisbatchsize, All3dQueryPointNum, self.args.glue_neighbor. The last dim should sum up to 1.\n\n        #### Project the 3d querying points to their neighbour patches. ####\n        #### In this part, we can get a (thisbatchsize, All3dQueryPointNum, self.args.glue_neighbor, 3)-shaped tensor, which should be multiplied with the weight above.\n        # For each 3d querying point's each neighbour patch , find the projection points in the patch.  \n        querying_points_3d_ = querying_points_3d.view(thisbatchsize, All3dQueryPointNum, 1, 3)\n        querying_points_3d_ = querying_points_3d_.expand(-1, -1, self.args.glue_neighbor, -1)\n        querying_points_3d_ = querying_points_3d_.reshape(thisbatchsize, All3dQueryPointNum*self.args.glue_neighbor, 1, 3)  \n\n        upsampled_p_ = upsampled_p.view(thisbatchsize, self.args.num_point, -1, 3)\n        upsampled_np_ = upsampled_np.view(thisbatchsize, self.args.num_point, -1, 3)\n        up_ratio_here = upsampled_p_.size()[2]\n        upsampled_p_ = upsampled_p_.reshape(thisbatchsize, self.args.num_point, -1)\n        upsampled_np_ = upsampled_np_.reshape(thisbatchsize, self.args.num_point, -1)\n        all_queried_patches = torch_tensor_functions.indexing_by_id(upsampled_p_, querying_points_3d_distribute_to_centers_nei_id)\n        all_queried_patchesn = torch_tensor_functions.indexing_by_id(upsampled_np_, querying_points_3d_distribute_to_centers_nei_id)\n        all_queried_patches = all_queried_patches.view(thisbatchsize, All3dQueryPointNum*self.args.glue_neighbor, up_ratio_here, 3)\n        all_queried_patchesn = all_queried_patchesn.view(thisbatchsize, All3dQueryPointNum*self.args.glue_neighbor, up_ratio_here, 3)\n        \n\n        dis_from_3d_querying_points_to_its_corresponidng_patch = querying_points_3d_ - all_queried_patches\n        dis_from_3d_querying_points_to_its_corresponidng_patch = torch.sum( torch.mul(dis_from_3d_querying_points_to_its_corresponidng_patch, dis_from_3d_querying_points_to_its_corresponidng_patch) , dim = -1, keepdim = False)\n        nei_id_from_3d_querying_points_to_its_corresponidng_patch = torch.topk(dis_from_3d_querying_points_to_its_corresponidng_patch, dim =-1, k=self.args.proj_neighbor,largest=False)[1].reshape(thisbatchsize*All3dQueryPointNum*self.args.glue_neighbor, 1, self.args.proj_neighbor)\n        nei_dis_from_3d_querying_points_to_its_corresponidng_patch = torch.topk(dis_from_3d_querying_points_to_its_corresponidng_patch, dim =-1, k=self.args.proj_neighbor,largest=False)[0].reshape(thisbatchsize*All3dQueryPointNum*self.args.glue_neighbor, 1, self.args.proj_neighbor)\n        all_queried_patches_ = all_queried_patches.view(thisbatchsize*All3dQueryPointNum*self.args.glue_neighbor, up_ratio_here, 3)\n        all_queried_patchesn_ = all_queried_patchesn.view(thisbatchsize*All3dQueryPointNum*self.args.glue_neighbor, up_ratio_here, 3)\n        nei_coor_from_3d_querying_points_to_its_corresponidng_patch = torch_tensor_functions.indexing_by_id(all_queried_patches_, nei_id_from_3d_querying_points_to_its_corresponidng_patch)\n        nei_ncoor_from_3d_querying_points_to_its_corresponidng_patch = torch_tensor_functions.indexing_by_id(all_queried_patchesn_, nei_id_from_3d_querying_points_to_its_corresponidng_patch)\n        nei_weight_from_3d_querying_points_to_its_corresponidng_patch = torch.exp( -1000 * nei_dis_from_3d_querying_points_to_its_corresponidng_patch)\n        nei_weight_from_3d_querying_points_to_its_corresponidng_patch = nei_weight_from_3d_querying_points_to_its_corresponidng_patch / (torch.sum(nei_weight_from_3d_querying_points_to_its_corresponidng_patch, dim=-1, keepdim=True) +0.0000001 )\n        nei_weight_from_3d_querying_points_to_its_corresponidng_patch = nei_weight_from_3d_querying_points_to_its_corresponidng_patch.view(thisbatchsize*All3dQueryPointNum*self.args.glue_neighbor, 1, self.args.proj_neighbor,1)\n        projected_points = torch.sum(nei_weight_from_3d_querying_points_to_its_corresponidng_patch * nei_coor_from_3d_querying_points_to_its_corresponidng_patch, dim =2, keepdim=False )\n        projected_pointsn = torch.sum(nei_weight_from_3d_querying_points_to_its_corresponidng_patch * nei_ncoor_from_3d_querying_points_to_its_corresponidng_patch, dim =2, keepdim=False )\n        projected_points = projected_points.view(thisbatchsize, All3dQueryPointNum, self.args.glue_neighbor, 3)  # thisbatchsize, All3dQueryPointNum, self.args.glue_neighbor, 3\n        projected_pointsn = projected_pointsn.view(thisbatchsize, All3dQueryPointNum, self.args.glue_neighbor, 3)  # thisbatchsize, All3dQueryPointNum, self.args.glue_neighbor, 3\n        \n        projected_pointsn_sign = projected_pointsn.detach()\n        projected_pointsn_sign_ref = projected_pointsn_sign[:,:,0:1,:].expand(-1,-1,self.args.glue_neighbor,-1)\n        projected_pointsn_sign = torch.sum(torch.mul(projected_pointsn_sign, projected_pointsn_sign_ref) ,dim=-1, keepdim=True ).expand(-1,-1,-1,3)\n        projected_pointsn_sign = torch.sign(projected_pointsn_sign+0.1)\n        \n        # correct the direction of the normals.\n        projected_pointsn = torch.mul(projected_pointsn, projected_pointsn_sign)\n        #### Glue the 3d upsampled points. ####\n        glued_points = torch.sum( projected_points * querying_points_3d_distribute_to_centers_nei_weight.view(thisbatchsize, All3dQueryPointNum, self.args.glue_neighbor, 1), dim = 2 , keepdim=False)\n        glued_normals = torch.sum( projected_pointsn * querying_points_3d_distribute_to_centers_nei_weight.view(thisbatchsize, All3dQueryPointNum, self.args.glue_neighbor, 1), dim = 2 , keepdim=False)\n        return glued_points, glued_normals\n    \n    \n    def convert_uv_to_xyzn(self, uv_coor, uv_coor_idx_in_sparse, sparse_embedding, points_sparse):\n        # uv_coor                | should be in size : thisbatchsize, All2dQueryPointNum, 2\n        # uv_coor_idx_in_sparse  | should be in size : thisbatchsize, All2dQueryPointNum, 1\n        # sparse_embedding       | should be in size : thisbatchsize, sparse_point_num, embedding_dim\n        # points_sparse          | should be in size : thisbatchsize, sparse_point_num, 3\n        thisbatchsize = uv_coor.size()[0]\n        All2dQueryPointNum = uv_coor.size()[1]\n        converted2to3_p = self.convert_uv_to_xyz(uv_coor, uv_coor_idx_in_sparse, sparse_embedding, points_sparse)\n        \n        converted2to3_p_x = converted2to3_p[:,:,0:1].reshape(thisbatchsize*All2dQueryPointNum,1)\n        grad_x_uv = cal_grad(uv_coor, converted2to3_p_x).reshape(thisbatchsize*All2dQueryPointNum,2,1)\n        converted2to3_p_y = converted2to3_p[:,:,1:2].reshape(thisbatchsize*All2dQueryPointNum,1)\n        grad_y_uv = cal_grad(uv_coor, converted2to3_p_y).reshape(thisbatchsize*All2dQueryPointNum,2,1)\n        converted2to3_p_z = converted2to3_p[:,:,2:3].reshape(thisbatchsize*All2dQueryPointNum,1)\n        grad_z_uv = cal_grad(uv_coor, converted2to3_p_z).reshape(thisbatchsize*All2dQueryPointNum,2,1)\n\n        grad_uv = torch.cat((grad_x_uv, grad_y_uv, grad_z_uv), dim=-1)\n        grad_u = grad_uv[:,0:1,:].view(-1,3)\n        grad_v = grad_uv[:,1:2,:].view(-1,3)\n\n        converted2to3_np = torch.cross(grad_u.reshape(-1,3), grad_v.reshape(-1,3))\n        converted2to3_np_norm = torch.norm(converted2to3_np, dim=1).view(-1,1) +0.000001\n        converted2to3_np = converted2to3_np/converted2to3_np_norm\n        converted2to3_np = converted2to3_np.view(thisbatchsize,-1,3)\n\n        return converted2to3_p, converted2to3_np\n\n\n    def convert_uv_to_xyz(self, uv_coor, uv_coor_idx_in_sparse, sparse_embedding, points_sparse):\n        # uv_coor                | should be in size : thisbatchsize, All2dQueryPointNum, 2\n        # uv_coor_idx_in_sparse  | should be in size : thisbatchsize, All2dQueryPointNum, 1\n        # sparse_embedding       | should be in size : thisbatchsize, sparse_point_num, embedding_dim\n        # points_sparse          | should be in size : thisbatchsize, sparse_point_num, 3\n        thisbatchsize = uv_coor.size()[0]\n        All2dQueryPointNum = uv_coor.size()[1]\n        coding_dim = 4*self.args.pe_out_L + 2\n        uv_encoded = position_encoding(uv_coor.reshape(-1,2).contiguous(), self.args.pe_out_L).view(thisbatchsize, All2dQueryPointNum, coding_dim).permute(0,2,1) # bs, coding_dim, All2dQueryPointNum\n        indexed_sparse_feature = torch_tensor_functions.indexing_by_id(sparse_embedding, uv_coor_idx_in_sparse)  # bs, All2dQueryPointNum, 1, embedding_num \n        indexed_sparse_feature = indexed_sparse_feature.view(thisbatchsize, All2dQueryPointNum, -1).transpose(2,1)  # bs, embedding_num, All2dQueryPointNum\n        coding_with_feature = torch.cat((indexed_sparse_feature, uv_encoded), dim=1)\n        out_p = self.convert_feature_to_point_2to3(coding_with_feature).view(thisbatchsize, -1, All2dQueryPointNum).permute(0,2,1)\n        indexed_center_points = torch_tensor_functions.indexing_by_id(points_sparse, uv_coor_idx_in_sparse).view(thisbatchsize, All2dQueryPointNum, 3)\n        out_p = out_p + indexed_center_points\n        return out_p\n    \n    def convert_xyz_to_uv(self, xyz_coor, xyz_coor_idx_in_sparse, sparse_embedding, points_sparse):\n        # xyz_coor               | should be in size : thisbatchsize, All2dQueryPointNum, 3\n        # uv_coor_idx_in_sparse  | should be in size : thisbatchsize, All2dQueryPointNum, 1\n        # sparse_embedding       | should be in size : thisbatchsize, sparse_point_num, embedding_dim\n        # points_sparse          | should be in size : thisbatchsize, sparse_point_num, 3\n        # return : out_uv        | should be in size : thisbatchsize, All2dQueryPointNum, 2\n        thisbatchsize = xyz_coor.size()[0]\n        All2dQueryPointNum = xyz_coor.size()[1]\n        coding_dim = 6*self.args.pe_out_L + 3\n        indexed_center_points = torch_tensor_functions.indexing_by_id(points_sparse, xyz_coor_idx_in_sparse).view(thisbatchsize, All2dQueryPointNum, 3)\n        xyz_coor_remove_center = xyz_coor - indexed_center_points\n        xyz_encoded = position_encoding(xyz_coor.reshape(-1,3), self.args.pe_out_L).view(thisbatchsize, All2dQueryPointNum, coding_dim).permute(0,2,1) # bs, coding_dim, All2dQueryPointNum\n        indexed_sparse_feature = torch_tensor_functions.indexing_by_id(sparse_embedding, xyz_coor_idx_in_sparse)  # bs, All2dQueryPointNum, 1, embedding_num \n        indexed_sparse_feature = indexed_sparse_feature.view(thisbatchsize, All2dQueryPointNum, -1).transpose(2,1)  # bs, embedding_num, All2dQueryPointNum\n        coding_with_feature = torch.cat((xyz_encoded, indexed_sparse_feature), dim = 1)\n        out_uv = self.convert_feature_to_point_3to2(coding_with_feature).view(thisbatchsize, -1, All2dQueryPointNum).permute(0,2,1)\n        return out_uv\n        \n\n#### Convert a string to num_list ####      \ndef convert_str_2_list(str_):\n    words = str_.split(' ')\n    trt = [int(x) for x in words]\n    return trt\n#### Compute the position code for uv or xyz. ####\ndef position_encoding(input_uv, pe_out_L):\n    ## The input_uv should be with shape (-1, X)\n    ## The returned tensor should be with shape (-1, X+2*X*L)\n    ## X = 2/3 if the input is uv/xyz.\n    trt = input_uv\n    for i in range(pe_out_L):\n        trt = torch.cat((trt, torch.sin(input_uv*(2**i)*(3.14159265))) , dim=-1 )\n        trt = torch.cat((trt, torch.cos(input_uv*(2**i)*(3.14159265))) , dim=-1 )\n    return trt\n#### Sample uv by a fixed manner. #### \ndef fix_sample(thisbatchsize, num_point, up_ratio, if_random=False):\n    if if_random==True: \n        print('Random sampling mode is not supported right now.')\n        exit()\n    if up_ratio == 4:\n        one_point_fixed = [ [ [0,0] for i in range(2)] for j in range(2) ]\n        for i in range(2):\n            for j in range(2):\n                one_point_fixed[i][j][0] = (i/1) *2 -1\n                one_point_fixed[i][j][1] = (j/1) *2 -1\n        one_point_fixed = np.array(one_point_fixed).reshape(4,2)\n        one_batch_uv2d_random_fixed = np.expand_dims(one_point_fixed,axis=0)\n        one_batch_uv2d_random_fixed = np.expand_dims(one_batch_uv2d_random_fixed,axis=0)\n        one_batch_uv2d_random_fixed = np.tile(one_batch_uv2d_random_fixed,[thisbatchsize, num_point, 1,1])\n        one_batch_uv2d_random_fixed_tensor = torch.from_numpy(one_batch_uv2d_random_fixed).cuda().float()\n        return one_batch_uv2d_random_fixed_tensor\n    if up_ratio == 9:\n        one_point_fixed = [ [ [0,0] for i in range(3)] for j in range(3) ]\n        for i in range(3):\n            for j in range(3):\n                one_point_fixed[i][j][0] = (i/2) *2 -1\n                one_point_fixed[i][j][1] = (j/2) *2 -1\n        one_point_fixed = np.array(one_point_fixed).reshape(9,2)\n        one_batch_uv2d_random_fixed = np.expand_dims(one_point_fixed,axis=0)\n        one_batch_uv2d_random_fixed = np.expand_dims(one_batch_uv2d_random_fixed,axis=0)\n        one_batch_uv2d_random_fixed = np.tile(one_batch_uv2d_random_fixed,[thisbatchsize, num_point, 1,1])\n        one_batch_uv2d_random_fixed_tensor = torch.from_numpy(one_batch_uv2d_random_fixed).cuda().float()\n        return one_batch_uv2d_random_fixed_tensor\n    if up_ratio == 16:\n        one_point_fixed = [ [ [0,0] for i in range(4)] for j in range(4) ]\n        for i in range(4):\n            for j in range(4):\n                one_point_fixed[i][j][0] = (i/3) *2 -1\n                one_point_fixed[i][j][1] = (j/3) *2 -1\n        one_point_fixed = np.array(one_point_fixed).reshape(16,2)\n        one_batch_uv2d_random_fixed = np.expand_dims(one_point_fixed,axis=0)\n        one_batch_uv2d_random_fixed = np.expand_dims(one_batch_uv2d_random_fixed,axis=0)\n        one_batch_uv2d_random_fixed = np.tile(one_batch_uv2d_random_fixed,[thisbatchsize, num_point, 1,1])\n        one_batch_uv2d_random_fixed_tensor = torch.from_numpy(one_batch_uv2d_random_fixed).cuda().float()\n        return one_batch_uv2d_random_fixed_tensor\n    if up_ratio == 64:\n        one_point_fixed = [ [ [0,0] for i in range(8)] for j in range(8) ]\n        for i in range(8):\n            for j in range(8):\n                one_point_fixed[i][j][0] = (i/7) *2 -1\n                one_point_fixed[i][j][1] = (j/7) *2 -1\n        one_point_fixed = np.array(one_point_fixed).reshape(64,2)\n        one_batch_uv2d_random_fixed = np.expand_dims(one_point_fixed,axis=0)\n        one_batch_uv2d_random_fixed = np.expand_dims(one_batch_uv2d_random_fixed,axis=0)\n        one_batch_uv2d_random_fixed = np.tile(one_batch_uv2d_random_fixed,[thisbatchsize, num_point, 1,1])\n        one_batch_uv2d_random_fixed_tensor = torch.from_numpy(one_batch_uv2d_random_fixed).cuda().float()\n        return one_batch_uv2d_random_fixed_tensor\n    if up_ratio == 1024:\n        one_point_fixed = [ [ [0,0] for i in range(32)] for j in range(32) ]\n        for i in range(32):\n            for j in range(32):\n                one_point_fixed[i][j][0] = (i/31) *2 -1\n                one_point_fixed[i][j][1] = (j/31) *2 -1\n        one_point_fixed = np.array(one_point_fixed).reshape(1024,2)\n        one_batch_uv2d_random_fixed = np.expand_dims(one_point_fixed,axis=0)\n        one_batch_uv2d_random_fixed = np.expand_dims(one_batch_uv2d_random_fixed,axis=0)\n        one_batch_uv2d_random_fixed = np.tile(one_batch_uv2d_random_fixed,[thisbatchsize, num_point, 1,1])\n        one_batch_uv2d_random_fixed_tensor = torch.from_numpy(one_batch_uv2d_random_fixed).cuda().float()\n        return one_batch_uv2d_random_fixed_tensor\n    else:\n        print('This up_ratio ('+str(up_ratio)+') is not supported now. You can try the random mode!')\n        exit()\n#### Sample uv uniformly in (-1,1). #### \ndef uniform_random_sample(thisbatchsize, num_point, up_ratio):\n    # return : randomly and uniformly sampled uv_coors   |   Its shape should be : thisbatchsize, num_point, up_ratio, 2\n    res_ = torch.rand(thisbatchsize*num_point, 4*up_ratio, 3)*2-1\n    res_ = res_.cuda()\n    res_[:,:,2:]*=0\n    furthest_point_index = pn2_utils.furthest_point_sample(res_,up_ratio)\n    uniform_res_ = pn2_utils.gather_operation(res_.permute(0, 2, 1).contiguous(), furthest_point_index)\n    uniform_res_ = uniform_res_.permute(0,2,1).contiguous()\n    uniform_res_ = uniform_res_[:,:,:2].view(thisbatchsize, num_point, up_ratio, 2)\n    return uniform_res_\n#### Compute the grad ####\ndef cal_grad(inputs, outputs):\n    d_points = torch.ones_like(outputs, requires_grad = False, device = outputs.device)\n    points_grad = grad(\n        outputs = outputs,\n        inputs = inputs,\n        grad_outputs = d_points,\n        create_graph = True,\n        retain_graph = True,\n        only_inputs = True)[0]\n    return points_grad\n\n\n\n######## TODO: END PART: OUR OWN NETWORK ########\n"
  },
  {
    "path": "model/conpu_v6/pointnet2/__init__.py",
    "content": ""
  },
  {
    "path": "model/conpu_v6/pointnet2/pointnet2_modules.py",
    "content": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nfrom . import pointnet2_utils\nfrom . import pytorch_utils as pt_utils\nfrom typing import List\n\n\nclass _PointnetSAModuleBase(nn.Module):\n\n    def __init__(self):\n        super().__init__()\n        self.npoint = None\n        self.groupers = None\n        self.mlps = None\n        self.pool_method = 'max_pool'\n\n    def forward(self, xyz: torch.Tensor, features: torch.Tensor = None, npoint=None, new_xyz=None) -> (torch.Tensor, torch.Tensor):\n        \"\"\"\n        :param xyz: (B, N, 3) tensor of the xyz coordinates of the features\n        :param features: (B, N, C) tensor of the descriptors of the the features\n        :param new_xyz:\n        :return:\n            new_xyz: (B, npoint, 3) tensor of the new features' xyz\n            new_features: (B, npoint, \\sum_k(mlps[k][-1])) tensor of the new_features descriptors\n        \"\"\"\n        if npoint is not None:\n            self.npoint = npoint\n        new_features_list = []\n\n        xyz_flipped = xyz.transpose(1, 2).contiguous()\n        if new_xyz is None:\n            new_xyz = pointnet2_utils.gather_operation(\n                xyz_flipped,\n                pointnet2_utils.furthest_point_sample(xyz, self.npoint)\n            ).transpose(1, 2).contiguous() if self.npoint is not None else None\n\n        for i in range(len(self.groupers)):\n            new_features = self.groupers[i](xyz, new_xyz, features)  # (B, C, npoint, nsample)\n            new_features = self.mlps[i](new_features)  # (B, mlp[-1], npoint, nsample)\n            if self.pool_method == 'max_pool':\n                new_features = F.max_pool2d(\n                    new_features, kernel_size=[1, new_features.size(3)]\n                )  # (B, mlp[-1], npoint, 1)\n            elif self.pool_method == 'avg_pool':\n                new_features = F.avg_pool2d(\n                    new_features, kernel_size=[1, new_features.size(3)]\n                )  # (B, mlp[-1], npoint, 1)\n            else:\n                raise NotImplementedError\n\n            new_features = new_features.squeeze(-1)  # (B, mlp[-1], npoint)\n            new_features_list.append(new_features)\n\n        return new_xyz, torch.cat(new_features_list, dim=1)\n\n\nclass PointnetSAModuleMSG(_PointnetSAModuleBase):\n    \"\"\"Pointnet set abstraction layer with multiscale grouping\"\"\"\n\n    def __init__(self, *, npoint: int, radii: List[float], nsamples: List[int], mlps: List[List[int]], bn: bool = True,\n                 use_xyz: bool = True, use_res = False, pool_method='max_pool', instance_norm=False):\n        \"\"\"\n        :param npoint: int\n        :param radii: list of float, list of radii to group with\n        :param nsamples: list of int, number of samples in each ball query\n        :param mlps: list of list of int, spec of the pointnet before the global pooling for each scale\n        :param bn: whether to use batchnorm\n        :param use_xyz:\n        :param pool_method: max_pool / avg_pool\n        :param instance_norm: whether to use instance_norm\n        \"\"\"\n        super().__init__()\n\n        assert len(radii) == len(nsamples) == len(mlps)\n\n        self.npoint = npoint\n        self.groupers = nn.ModuleList()\n        self.mlps = nn.ModuleList()\n        for i in range(len(radii)):\n            radius = radii[i]\n            nsample = nsamples[i]\n            self.groupers.append(\n                pointnet2_utils.QueryAndGroup(radius, nsample, use_xyz=use_xyz)\n                if npoint is not None else pointnet2_utils.GroupAll(use_xyz)\n            )\n            mlp_spec = mlps[i]\n            if use_xyz:\n                mlp_spec[0] += 3\n\n            if use_res:\n                self.mlps.append(pt_utils.SharedResMLP(mlp_spec, bn=bn))\n            else:\n                self.mlps.append(pt_utils.SharedMLP(mlp_spec, bn=bn, instance_norm=instance_norm))\n        self.pool_method = pool_method\n\n\nclass PointnetSAModule(PointnetSAModuleMSG):\n    \"\"\"Pointnet set abstraction layer\"\"\"\n\n    def __init__(self, *, mlp: List[int], npoint: int = None, radius: float = None, nsample: int = None,\n                 bn: bool = True, use_xyz: bool = True, use_res = False, pool_method='max_pool', instance_norm=False):\n        \"\"\"\n        :param mlp: list of int, spec of the pointnet before the global max_pool\n        :param npoint: int, number of features\n        :param radius: float, radius of ball\n        :param nsample: int, number of samples in the ball query\n        :param bn: whether to use batchnorm\n        :param use_xyz:\n        :param pool_method: max_pool / avg_pool\n        :param instance_norm: whether to use instance_norm\n        \"\"\"\n        super().__init__(\n            mlps=[mlp], npoint=npoint, radii=[radius], nsamples=[nsample], bn=bn, use_xyz=use_xyz, use_res=use_res,\n            pool_method=pool_method, instance_norm=instance_norm\n        )\n\n\nclass PointNetSSG_Base(PointnetSAModuleMSG):\n    def __init__(self, npoint, nsample, radius, in_channel, out_channel, bn=True, use_xyz=False):\n        super().__init__(\n            mlps=[[in_channel, out_channel, out_channel, out_channel]], \n            npoint=npoint, radii=[radius], nsamples=[nsample], bn=bn, use_xyz=use_xyz, use_res=False)\n\n\nclass PointnetFPModule(nn.Module):\n    r\"\"\"Propigates the features of one set to another\"\"\"\n\n    def __init__(self, *, mlp: List[int], bn: bool = True):\n        \"\"\"\n        :param mlp: list of int\n        :param bn: whether to use batchnorm\n        \"\"\"\n        super().__init__()\n        self.mlp = pt_utils.SharedMLP(mlp, bn=bn)\n\n    def forward(\n            self, unknown: torch.Tensor, known: torch.Tensor, unknow_feats: torch.Tensor, known_feats: torch.Tensor\n    ) -> torch.Tensor:\n        \"\"\"\n        :param unknown: (B, n, 3) tensor of the xyz positions of the unknown features\n        :param known: (B, m, 3) tensor of the xyz positions of the known features\n        :param unknow_feats: (B, C1, n) tensor of the features to be propigated to\n        :param known_feats: (B, C2, m) tensor of features to be propigated\n        :return:\n            new_features: (B, mlp[-1], n) tensor of the features of the unknown features\n        \"\"\"\n        if known is not None:\n            dist, idx = pointnet2_utils.three_nn(unknown, known)\n            dist_recip = 1.0 / (dist + 1e-8)\n            norm = torch.sum(dist_recip, dim=2, keepdim=True)\n            weight = dist_recip / norm\n\n            interpolated_feats = pointnet2_utils.three_interpolate(known_feats, idx, weight)\n        else:\n            interpolated_feats = known_feats.expand(*known_feats.size()[0:2], unknown.size(1))\n\n        if unknow_feats is not None:\n            new_features = torch.cat([interpolated_feats, unknow_feats], dim=1)  # (B, C2 + C1, n)\n        else:\n            new_features = interpolated_feats\n\n        new_features = new_features.unsqueeze(-1)\n        new_features = self.mlp(new_features)\n\n        return new_features.squeeze(-1)\n\n\nif __name__ == \"__main__\":\n    pass\n"
  },
  {
    "path": "model/conpu_v6/pointnet2/pointnet2_utils.py",
    "content": "import torch\nfrom torch.autograd import Variable\nfrom torch.autograd import Function\nimport torch.nn as nn\nfrom typing import Tuple\n\nimport pointnet2_cuda as pointnet2\n\n\nclass FurthestPointSampling(Function):\n    @staticmethod\n    def forward(ctx, xyz: torch.Tensor, npoint: int) -> torch.Tensor:\n        \"\"\"\n        Uses iterative furthest point sampling to select a set of npoint features that have the largest\n        minimum distance\n        :param ctx:\n        :param xyz: (B, N, 3) where N > npoint\n        :param npoint: int, number of features in the sampled set\n        :return:\n             output: (B, npoint) tensor containing the set\n        \"\"\"\n        assert xyz.is_contiguous()\n\n        B, N, _ = xyz.size()\n        output = torch.cuda.IntTensor(B, npoint)\n        temp = torch.cuda.FloatTensor(B, N).fill_(1e10)\n\n        pointnet2.furthest_point_sampling_wrapper(B, N, npoint, xyz, temp, output)\n        return output\n\n    @staticmethod\n    def backward(xyz, a=None):\n        return None, None\n\n\nfurthest_point_sample = FurthestPointSampling.apply\n\n\nclass GatherOperation(Function):\n\n    @staticmethod\n    def forward(ctx, features: torch.Tensor, idx: torch.Tensor) -> torch.Tensor:\n        \"\"\"\n        :param ctx:\n        :param features: (B, C, N)\n        :param idx: (B, npoint) index tensor of the features to gather\n        :return:\n            output: (B, C, npoint)\n        \"\"\"\n        assert features.is_contiguous()\n        assert idx.is_contiguous()\n\n        B, npoint = idx.size()\n        _, C, N = features.size()\n        output = torch.cuda.FloatTensor(B, C, npoint)\n\n        pointnet2.gather_points_wrapper(B, C, N, npoint, features, idx, output)\n\n        ctx.for_backwards = (idx, C, N)\n        return output\n\n    @staticmethod\n    def backward(ctx, grad_out):\n        idx, C, N = ctx.for_backwards\n        B, npoint = idx.size()\n\n        grad_features = Variable(torch.cuda.FloatTensor(B, C, N).zero_())\n        grad_out_data = grad_out.data.contiguous()\n        pointnet2.gather_points_grad_wrapper(B, C, N, npoint, grad_out_data, idx, grad_features.data)\n        return grad_features, None\n\n\ngather_operation = GatherOperation.apply\n\n\nclass ThreeNN(Function):\n\n    @staticmethod\n    def forward(ctx, unknown: torch.Tensor, known: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:\n        \"\"\"\n        Find the three nearest neighbors of unknown in known\n        :param ctx:\n        :param unknown: (B, N, 3)\n        :param known: (B, M, 3)\n        :return:\n            dist: (B, N, 3) l2 distance to the three nearest neighbors\n            idx: (B, N, 3) index of 3 nearest neighbors\n        \"\"\"\n        assert unknown.is_contiguous()\n        assert known.is_contiguous()\n\n        B, N, _ = unknown.size()\n        m = known.size(1)\n        dist2 = torch.cuda.FloatTensor(B, N, 3)\n        idx = torch.cuda.IntTensor(B, N, 3)\n\n        pointnet2.three_nn_wrapper(B, N, m, unknown, known, dist2, idx)\n        return torch.sqrt(dist2), idx\n\n    @staticmethod\n    def backward(ctx, a=None, b=None):\n        return None, None\n\n\nthree_nn = ThreeNN.apply\n\n\nclass ThreeInterpolate(Function):\n\n    @staticmethod\n    def forward(ctx, features: torch.Tensor, idx: torch.Tensor, weight: torch.Tensor) -> torch.Tensor:\n        \"\"\"\n        Performs weight linear interpolation on 3 features\n        :param ctx:\n        :param features: (B, C, M) Features descriptors to be interpolated from\n        :param idx: (B, n, 3) three nearest neighbors of the target features in features\n        :param weight: (B, n, 3) weights\n        :return:\n            output: (B, C, N) tensor of the interpolated features\n        \"\"\"\n        assert features.is_contiguous()\n        assert idx.is_contiguous()\n        assert weight.is_contiguous()\n\n        B, c, m = features.size()\n        n = idx.size(1)\n        ctx.save_for_backward(idx, weight, features)\n        output = torch.cuda.FloatTensor(B, c, n)\n\n        pointnet2.three_interpolate_wrapper(B, c, m, n, features, idx, weight, output)\n        return output\n\n    @staticmethod\n    def backward(ctx, grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:\n        \"\"\"\n        :param ctx:\n        :param grad_out: (B, C, N) tensor with gradients of outputs\n        :return:\n            grad_features: (B, C, M) tensor with gradients of features\n            None:\n            None:\n        \"\"\"\n        idx, weight, features = ctx.saved_tensors\n        B, c, m = features.size()\n        n = idx.size(1)\n\n        grad_features = Variable(torch.cuda.FloatTensor(B, c, m).zero_())\n        grad_out_data = grad_out.data.contiguous()\n\n        pointnet2.three_interpolate_grad_wrapper(B, c, n, m, grad_out_data, idx, weight, grad_features.data)\n        return grad_features, None, None\n\n\nthree_interpolate = ThreeInterpolate.apply\n\n\nclass GroupingOperation(Function):\n\n    @staticmethod\n    def forward(ctx, features: torch.Tensor, idx: torch.Tensor) -> torch.Tensor:\n        \"\"\"\n        :param ctx:\n        :param features: (B, C, N) tensor of features to group\n        :param idx: (B, npoint, nsample) tensor containing the indicies of features to group with\n        :return:\n            output: (B, C, npoint, nsample) tensor\n        \"\"\"\n        assert features.is_contiguous()\n        assert idx.is_contiguous()\n\n        B, nfeatures, nsample = idx.size()\n        _, C, N = features.size()\n        output = torch.cuda.FloatTensor(B, C, nfeatures, nsample)\n\n        pointnet2.group_points_wrapper(B, C, N, nfeatures, nsample, features, idx, output)\n\n        ctx.for_backwards = (idx, N)\n        return output\n\n    @staticmethod\n    def backward(ctx, grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:\n        \"\"\"\n        :param ctx:\n        :param grad_out: (B, C, npoint, nsample) tensor of the gradients of the output from forward\n        :return:\n            grad_features: (B, C, N) gradient of the features\n        \"\"\"\n        idx, N = ctx.for_backwards\n\n        B, C, npoint, nsample = grad_out.size()\n        grad_features = Variable(torch.cuda.FloatTensor(B, C, N).zero_())\n\n        grad_out_data = grad_out.data.contiguous()\n        pointnet2.group_points_grad_wrapper(B, C, N, npoint, nsample, grad_out_data, idx, grad_features.data)\n        return grad_features, None\n\n\ngrouping_operation = GroupingOperation.apply\n\n\nclass BallQuery(Function):\n\n    @staticmethod\n    def forward(ctx, radius: float, nsample: int, xyz: torch.Tensor, new_xyz: torch.Tensor) -> torch.Tensor:\n        \"\"\"\n        :param ctx:\n        :param radius: float, radius of the balls\n        :param nsample: int, maximum number of features in the balls\n        :param xyz: (B, N, 3) xyz coordinates of the features\n        :param new_xyz: (B, npoint, 3) centers of the ball query\n        :return:\n            idx: (B, npoint, nsample) tensor with the indicies of the features that form the query balls\n        \"\"\"\n        assert new_xyz.is_contiguous()\n        assert xyz.is_contiguous()\n\n        B, N, _ = xyz.size()\n        npoint = new_xyz.size(1)\n        idx = torch.cuda.IntTensor(B, npoint, nsample).zero_()\n\n        pointnet2.ball_query_wrapper(B, N, npoint, radius, nsample, new_xyz, xyz, idx)\n        return idx\n\n    @staticmethod\n    def backward(ctx, a=None):\n        return None, None, None, None\n\n\nball_query = BallQuery.apply\n\n\nclass QueryAndGroup(nn.Module):\n    def __init__(self, radius: float, nsample: int, use_xyz: bool = True):\n        \"\"\"\n        :param radius: float, radius of ball\n        :param nsample: int, maximum number of features to gather in the ball\n        :param use_xyz:\n        \"\"\"\n        super().__init__()\n        self.radius, self.nsample, self.use_xyz = radius, nsample, use_xyz\n\n    def forward(self, xyz: torch.Tensor, new_xyz: torch.Tensor, features: torch.Tensor = None) -> Tuple[torch.Tensor]:\n        \"\"\"\n        :param xyz: (B, N, 3) xyz coordinates of the features\n        :param new_xyz: (B, npoint, 3) centroids\n        :param features: (B, C, N) descriptors of the features\n        :return:\n            new_features: (B, 3 + C, npoint, nsample)\n        \"\"\"\n        idx = ball_query(self.radius, self.nsample, xyz, new_xyz)\n        xyz_trans = xyz.transpose(1, 2).contiguous()\n        grouped_xyz = grouping_operation(xyz_trans, idx)  # (B, 3, npoint, nsample)\n        grouped_xyz -= new_xyz.transpose(1, 2).unsqueeze(-1)\n\n        if features is not None:\n            grouped_features = grouping_operation(features, idx)\n            if self.use_xyz:\n                new_features = torch.cat([grouped_xyz, grouped_features], dim=1)  # (B, C + 3, npoint, nsample)\n            else:\n                new_features = grouped_features\n        else:\n            assert self.use_xyz, \"Cannot have not features and not use xyz as a feature!\"\n            new_features = grouped_xyz\n\n        return new_features\n"
  },
  {
    "path": "model/conpu_v6/pointnet2/pytorch_utils.py",
    "content": "import torch.nn as nn\nfrom typing import List, Tuple\nimport torch.nn.functional as F\n\nclass EmptyModule(nn.Module):\n    def __init__(self):\n        super().__init__()\n\n    def forward(self, x):\n        return x\n\n\nclass SharedResMLP(nn.Module):\n    def __init__(\n            self,\n            args: List[int],\n            *,\n            bn: bool = False,\n            activation=nn.ReLU(inplace=True)):\n        super().__init__()\n\n        self.res_convs = nn.ModuleList()\n        self.short_conn = nn.ModuleList()\n        for i in range(len(args) - 1):\n            in_ch = args[i]\n            out_ch = args[i + 1]\n            mid_ch = args[i + 1] // 2\n            self.res_convs.append(\n                nn.Sequential(\n                    Conv2d(in_ch, mid_ch, bn=bn, activation=activation),\n                    Conv2d(mid_ch, mid_ch, bn=bn, activation=activation),\n                    Conv2d(mid_ch, out_ch, bn=bn, activation=None)))\n            self.short_conn.append(\n                EmptyModule() if in_ch == out_ch else \\\n                Conv2d(in_ch, out_ch, bn=bn, activation=None))\n\n    def forward(self, x):\n        for k in range(len(self.res_convs)):\n            out_res = self.res_convs[k](x)\n            out_short = self.short_conn[k](x)\n            x = F.relu(out_res + out_short)\n        return x\n            \n\nclass SharedMLP(nn.Sequential):\n\n    def __init__(\n            self,\n            args: List[int],\n            *,\n            bn: bool = False,\n            activation=nn.ReLU(inplace=True),\n            preact: bool = False,\n            first: bool = False,\n            name: str = \"\",\n            instance_norm: bool = False,):\n        super().__init__()\n\n        for i in range(len(args) - 1):\n            self.add_module(\n                name + 'layer{}'.format(i),\n                Conv2d(\n                    args[i],\n                    args[i + 1],\n                    bn=(not first or not preact or (i != 0)) and bn,\n                    activation=activation\n                    if (not first or not preact or (i != 0)) else None,\n                    preact=preact,\n                    instance_norm=instance_norm\n                )\n            )\n\n\nclass _ConvBase(nn.Sequential):\n\n    def __init__(\n            self,\n            in_size,\n            out_size,\n            kernel_size,\n            stride,\n            padding,\n            activation,\n            bn,\n            init,\n            conv=None,\n            batch_norm=None,\n            bias=True,\n            preact=False,\n            name=\"\",\n            instance_norm=False,\n            instance_norm_func=None\n    ):\n        super().__init__()\n\n        bias = bias and (not bn)\n        conv_unit = conv(\n            in_size,\n            out_size,\n            kernel_size=kernel_size,\n            stride=stride,\n            padding=padding,\n            bias=bias\n        )\n        init(conv_unit.weight)\n        if bias:\n            nn.init.constant_(conv_unit.bias, 0)\n\n        if bn:\n            if not preact:\n                bn_unit = batch_norm(out_size)\n            else:\n                bn_unit = batch_norm(in_size)\n        if instance_norm:\n            if not preact:\n                in_unit = instance_norm_func(out_size, affine=False, track_running_stats=False)\n            else:\n                in_unit = instance_norm_func(in_size, affine=False, track_running_stats=False)\n\n        if preact:\n            if bn:\n                self.add_module(name + 'bn', bn_unit)\n\n            if activation is not None:\n                self.add_module(name + 'activation', activation)\n\n            if not bn and instance_norm:\n                self.add_module(name + 'in', in_unit)\n\n        self.add_module(name + 'conv', conv_unit)\n\n        if not preact:\n            if bn:\n                self.add_module(name + 'bn', bn_unit)\n\n            if activation is not None:\n                self.add_module(name + 'activation', activation)\n\n            if not bn and instance_norm:\n                self.add_module(name + 'in', in_unit)\n\n\nclass _BNBase(nn.Sequential):\n\n    def __init__(self, in_size, batch_norm=None, name=\"\"):\n        super().__init__()\n        self.add_module(name + \"bn\", batch_norm(in_size))\n\n        nn.init.constant_(self[0].weight, 1.0)\n        nn.init.constant_(self[0].bias, 0)\n\n\nclass BatchNorm1d(_BNBase):\n\n    def __init__(self, in_size: int, *, name: str = \"\"):\n        super().__init__(in_size, batch_norm=nn.BatchNorm1d, name=name)\n\n\nclass BatchNorm2d(_BNBase):\n\n    def __init__(self, in_size: int, name: str = \"\"):\n        super().__init__(in_size, batch_norm=nn.BatchNorm2d, name=name)\n        \n\nclass BatchNorm3d(_BNBase):\n\n    def __init__(self, in_size: int, name: str = \"\"):\n        super().__init__(in_size, batch_norm=nn.BatchNorm3d, name=name)\n\n\nclass Conv1d(_ConvBase):\n\n    def __init__(\n            self,\n            in_size: int,\n            out_size: int,\n            *,\n            kernel_size: int = 1,\n            stride: int = 1,\n            padding: int = 0,\n            activation=nn.ReLU(inplace=True),\n            bn: bool = False,\n            init=nn.init.kaiming_normal_,\n            bias: bool = True,\n            preact: bool = False,\n            name: str = \"\",\n            instance_norm=False\n    ):\n        super().__init__(\n            in_size,\n            out_size,\n            kernel_size,\n            stride,\n            padding,\n            activation,\n            bn,\n            init,\n            conv=nn.Conv1d,\n            batch_norm=BatchNorm1d,\n            bias=bias,\n            preact=preact,\n            name=name,\n            instance_norm=instance_norm,\n            instance_norm_func=nn.InstanceNorm1d\n        )\n\n\nclass Conv2d(_ConvBase):\n\n    def __init__(\n            self,\n            in_size: int,\n            out_size: int,\n            *,\n            kernel_size: Tuple[int, int] = (1, 1),\n            stride: Tuple[int, int] = (1, 1),\n            padding: Tuple[int, int] = (0, 0),\n            activation=nn.ReLU(inplace=True),\n            bn: bool = False,\n            init=nn.init.kaiming_normal_,\n            bias: bool = True,\n            preact: bool = False,\n            name: str = \"\",\n            instance_norm=False\n    ):\n        super().__init__(\n            in_size,\n            out_size,\n            kernel_size,\n            stride,\n            padding,\n            activation,\n            bn,\n            init,\n            conv=nn.Conv2d,\n            batch_norm=BatchNorm2d,\n            bias=bias,\n            preact=preact,\n            name=name,\n            instance_norm=instance_norm,\n            instance_norm_func=nn.InstanceNorm2d\n        )\n\nclass Conv3d(_ConvBase):\n\n    def __init__(\n            self,\n            in_size: int,\n            out_size: int,\n            *,\n            kernel_size: Tuple[int, int, int] = (1, 1, 1),\n            stride: Tuple[int, int, int] = (1, 1, 1),\n            padding: Tuple[int, int, int] = (0, 0, 0),\n            activation=nn.ReLU(inplace=True),\n            bn: bool = False,\n            init=nn.init.kaiming_normal_,\n            bias: bool = True,\n            preact: bool = False,\n            name: str = \"\",\n            instance_norm=False\n    ):\n        super().__init__(\n            in_size,\n            out_size,\n            kernel_size,\n            stride,\n            padding,\n            activation,\n            bn,\n            init,\n            conv=nn.Conv3d,\n            batch_norm=BatchNorm3d,\n            bias=bias,\n            preact=preact,\n            name=name,\n            instance_norm=instance_norm,\n            instance_norm_func=nn.InstanceNorm3d\n        )\n\n\nclass FC(nn.Sequential):\n\n    def __init__(\n            self,\n            in_size: int,\n            out_size: int,\n            *,\n            activation=nn.ReLU(inplace=True),\n            bn: bool = False,\n            init=None,\n            preact: bool = False,\n            name: str = \"\"\n    ):\n        super().__init__()\n\n        fc = nn.Linear(in_size, out_size, bias=not bn)\n        if init is not None:\n            init(fc.weight)\n        if not bn:\n            nn.init.constant(fc.bias, 0)\n\n        if preact:\n            if bn:\n                self.add_module(name + 'bn', BatchNorm1d(in_size))\n\n            if activation is not None:\n                self.add_module(name + 'activation', activation)\n\n        self.add_module(name + 'fc', fc)\n\n        if not preact:\n            if bn:\n                self.add_module(name + 'bn', BatchNorm1d(out_size))\n\n            if activation is not None:\n                self.add_module(name + 'activation', activation)\n\n"
  },
  {
    "path": "model/conpu_v6/pointnet2/setup.py",
    "content": "from setuptools import setup\nfrom torch.utils.cpp_extension import BuildExtension, CUDAExtension\n\nsetup(\n    name='pointnet2',\n    ext_modules=[\n        CUDAExtension('pointnet2_cuda', [\n            'src/pointnet2_api.cpp',\n            \n            'src/ball_query.cpp', \n            'src/ball_query_gpu.cu',\n            'src/group_points.cpp', \n            'src/group_points_gpu.cu',\n            'src/interpolate.cpp', \n            'src/interpolate_gpu.cu',\n            'src/sampling.cpp', \n            'src/sampling_gpu.cu',\n        ],\n        extra_compile_args={'cxx': ['-g'],\n                            'nvcc': ['-O2']})\n    ],\n    cmdclass={'build_ext': BuildExtension}\n)\n"
  },
  {
    "path": "model/conpu_v6/pointnet2/src/ball_query.cpp",
    "content": "#include <torch/serialize/tensor.h>\n#include <vector>\n#include <THC/THC.h>\n#include <cuda.h>\n#include <cuda_runtime_api.h>\n#include \"ball_query_gpu.h\"\n\nextern THCState *state;\n\n#define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x, \" must be a CUDAtensor \")\n#define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x, \" must be contiguous \")\n#define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x)\n\nint ball_query_wrapper_fast(int b, int n, int m, float radius, int nsample, \n    at::Tensor new_xyz_tensor, at::Tensor xyz_tensor, at::Tensor idx_tensor) {\n    CHECK_INPUT(new_xyz_tensor);\n    CHECK_INPUT(xyz_tensor);\n    const float *new_xyz = new_xyz_tensor.data<float>();\n    const float *xyz = xyz_tensor.data<float>();\n    int *idx = idx_tensor.data<int>();\n    \n    cudaStream_t stream = c10::cuda::getCurrentCUDAStream();\n    ball_query_kernel_launcher_fast(b, n, m, radius, nsample, new_xyz, xyz, idx, stream);\n    return 1;\n}\n"
  },
  {
    "path": "model/conpu_v6/pointnet2/src/ball_query_gpu.cu",
    "content": "#include <math.h>\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"ball_query_gpu.h\"\n#include \"cuda_utils.h\"\n\n\n__global__ void ball_query_kernel_fast(int b, int n, int m, float radius, int nsample, \n    const float *__restrict__ new_xyz, const float *__restrict__ xyz, int *__restrict__ idx) {\n    // new_xyz: (B, M, 3)\n    // xyz: (B, N, 3)\n    // output:\n    //      idx: (B, M, nsample)\n    int bs_idx = blockIdx.y;\n    int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;\n    if (bs_idx >= b || pt_idx >= m) return;\n\n    new_xyz += bs_idx * m * 3 + pt_idx * 3;\n    xyz += bs_idx * n * 3;\n    idx += bs_idx * m * nsample + pt_idx * nsample;\n\n    float radius2 = radius * radius;\n    float new_x = new_xyz[0];\n    float new_y = new_xyz[1];\n    float new_z = new_xyz[2];\n\n    int cnt = 0;\n    for (int k = 0; k < n; ++k) {\n        float x = xyz[k * 3 + 0];\n        float y = xyz[k * 3 + 1];\n        float z = xyz[k * 3 + 2];\n        float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z);\n        if (d2 < radius2){\n            if (cnt == 0){\n                for (int l = 0; l < nsample; ++l) {\n                    idx[l] = k;\n                }\n            }\n            idx[cnt] = k;\n            ++cnt;\n            if (cnt >= nsample) break;\n        }\n    }\n}\n\n\nvoid ball_query_kernel_launcher_fast(int b, int n, int m, float radius, int nsample, \\\n    const float *new_xyz, const float *xyz, int *idx, cudaStream_t stream) {\n    // new_xyz: (B, M, 3)\n    // xyz: (B, N, 3)\n    // output:\n    //      idx: (B, M, nsample)\n\n    cudaError_t err;\n\n    dim3 blocks(DIVUP(m, THREADS_PER_BLOCK), b);  // blockIdx.x(col), blockIdx.y(row)\n    dim3 threads(THREADS_PER_BLOCK);\n\n    ball_query_kernel_fast<<<blocks, threads, 0, stream>>>(b, n, m, radius, nsample, new_xyz, xyz, idx);\n    // cudaDeviceSynchronize();  // for using printf in kernel function\n    err = cudaGetLastError();\n    if (cudaSuccess != err) {\n        fprintf(stderr, \"CUDA kernel failed : %s\\n\", cudaGetErrorString(err));\n        exit(-1);\n    }\n}"
  },
  {
    "path": "model/conpu_v6/pointnet2/src/ball_query_gpu.h",
    "content": "#ifndef _BALL_QUERY_GPU_H\n#define _BALL_QUERY_GPU_H\n\n#include <torch/serialize/tensor.h>\n#include <vector>\n#include <cuda.h>\n#include <cuda_runtime_api.h>\n\nint ball_query_wrapper_fast(int b, int n, int m, float radius, int nsample, \n\tat::Tensor new_xyz_tensor, at::Tensor xyz_tensor, at::Tensor idx_tensor);\n\nvoid ball_query_kernel_launcher_fast(int b, int n, int m, float radius, int nsample, \n\tconst float *xyz, const float *new_xyz, int *idx, cudaStream_t stream);\n\n#endif\n"
  },
  {
    "path": "model/conpu_v6/pointnet2/src/cuda_utils.h",
    "content": "#ifndef _CUDA_UTILS_H\n#define _CUDA_UTILS_H\n\n#include <cmath>\n\n#define TOTAL_THREADS 1024\n#define THREADS_PER_BLOCK 256\n#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))\n\ninline int opt_n_threads(int work_size) {\n    const int pow_2 = std::log(static_cast<double>(work_size)) / std::log(2.0);\n\n    return max(min(1 << pow_2, TOTAL_THREADS), 1);\n}\n#endif\n"
  },
  {
    "path": "model/conpu_v6/pointnet2/src/group_points.cpp",
    "content": "#include <torch/serialize/tensor.h>\n#include <cuda.h>\n#include <cuda_runtime_api.h>\n#include <vector>\n#include <THC/THC.h>\n#include \"group_points_gpu.h\"\n\nextern THCState *state;\n\n\nint group_points_grad_wrapper_fast(int b, int c, int n, int npoints, int nsample, \n    at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor) {\n\n    float *grad_points = grad_points_tensor.data<float>();\n    const int *idx = idx_tensor.data<int>();\n    const float *grad_out = grad_out_tensor.data<float>();\n\n    cudaStream_t stream = c10::cuda::getCurrentCUDAStream();\n\n    group_points_grad_kernel_launcher_fast(b, c, n, npoints, nsample, grad_out, idx, grad_points, stream);\n    return 1;\n}\n\n\nint group_points_wrapper_fast(int b, int c, int n, int npoints, int nsample, \n    at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor) {\n\n    const float *points = points_tensor.data<float>();\n    const int *idx = idx_tensor.data<int>();\n    float *out = out_tensor.data<float>();\n\n    cudaStream_t stream = c10::cuda::getCurrentCUDAStream();\n\n    group_points_kernel_launcher_fast(b, c, n, npoints, nsample, points, idx, out, stream);\n    return 1;\n}\n"
  },
  {
    "path": "model/conpu_v6/pointnet2/src/group_points_gpu.cu",
    "content": "#include <stdio.h>\n#include <stdlib.h>\n\n#include \"cuda_utils.h\"\n#include \"group_points_gpu.h\"\n\n\n__global__ void group_points_grad_kernel_fast(int b, int c, int n, int npoints, int nsample, \n    const float *__restrict__ grad_out, const int *__restrict__ idx, float *__restrict__ grad_points) {\n    // grad_out: (B, C, npoints, nsample)\n    // idx: (B, npoints, nsample)\n    // output:\n    //      grad_points: (B, C, N)\n    int bs_idx = blockIdx.z;\n    int c_idx = blockIdx.y;\n    int index = blockIdx.x * blockDim.x + threadIdx.x;\n    int pt_idx = index / nsample;\n    if (bs_idx >= b || c_idx >= c || pt_idx >= npoints) return;\n\n    int sample_idx = index % nsample;\n    grad_out += bs_idx * c * npoints * nsample + c_idx * npoints * nsample + pt_idx * nsample + sample_idx;\n    idx += bs_idx * npoints * nsample + pt_idx * nsample + sample_idx; \n    \n    atomicAdd(grad_points + bs_idx * c * n + c_idx * n + idx[0] , grad_out[0]);\n}\n\nvoid group_points_grad_kernel_launcher_fast(int b, int c, int n, int npoints, int nsample, \n    const float *grad_out, const int *idx, float *grad_points, cudaStream_t stream) {\n    // grad_out: (B, C, npoints, nsample)\n    // idx: (B, npoints, nsample)\n    // output:\n    //      grad_points: (B, C, N)\n    cudaError_t err;\n    dim3 blocks(DIVUP(npoints * nsample, THREADS_PER_BLOCK), c, b);  // blockIdx.x(col), blockIdx.y(row)\n    dim3 threads(THREADS_PER_BLOCK);\n\n    group_points_grad_kernel_fast<<<blocks, threads, 0, stream>>>(b, c, n, npoints, nsample, grad_out, idx, grad_points);\n\n    err = cudaGetLastError();\n    if (cudaSuccess != err) {\n        fprintf(stderr, \"CUDA kernel failed : %s\\n\", cudaGetErrorString(err));\n        exit(-1);\n    }\n}\n\n\n__global__ void group_points_kernel_fast(int b, int c, int n, int npoints, int nsample, \n    const float *__restrict__ points, const int *__restrict__ idx, float *__restrict__ out) {\n    // points: (B, C, N)\n    // idx: (B, npoints, nsample)\n    // output:\n    //      out: (B, C, npoints, nsample)\n    int bs_idx = blockIdx.z;\n    int c_idx = blockIdx.y;\n    int index = blockIdx.x * blockDim.x + threadIdx.x;\n    int pt_idx = index / nsample;\n    if (bs_idx >= b || c_idx >= c || pt_idx >= npoints) return;\n\n    int sample_idx = index % nsample;\n\n    idx += bs_idx * npoints * nsample + pt_idx * nsample + sample_idx; \n    int in_idx = bs_idx * c * n + c_idx * n + idx[0];\n    int out_idx = bs_idx * c * npoints * nsample + c_idx * npoints * nsample + pt_idx * nsample + sample_idx;\n\n    out[out_idx] = points[in_idx];\n}\n\n\nvoid group_points_kernel_launcher_fast(int b, int c, int n, int npoints, int nsample, \n    const float *points, const int *idx, float *out, cudaStream_t stream) {\n    // points: (B, C, N)\n    // idx: (B, npoints, nsample)\n    // output:\n    //      out: (B, C, npoints, nsample)\n    cudaError_t err;\n    dim3 blocks(DIVUP(npoints * nsample, THREADS_PER_BLOCK), c, b);  // blockIdx.x(col), blockIdx.y(row)\n    dim3 threads(THREADS_PER_BLOCK);\n\n    group_points_kernel_fast<<<blocks, threads, 0, stream>>>(b, c, n, npoints, nsample, points, idx, out);\n    // cudaDeviceSynchronize();  // for using printf in kernel function\n    err = cudaGetLastError();\n    if (cudaSuccess != err) {\n        fprintf(stderr, \"CUDA kernel failed : %s\\n\", cudaGetErrorString(err));\n        exit(-1);\n    }\n}\n"
  },
  {
    "path": "model/conpu_v6/pointnet2/src/group_points_gpu.h",
    "content": "#ifndef _GROUP_POINTS_GPU_H\n#define _GROUP_POINTS_GPU_H\n\n#include <torch/serialize/tensor.h>\n#include <cuda.h>\n#include <cuda_runtime_api.h>\n#include <vector>\n\n\nint group_points_wrapper_fast(int b, int c, int n, int npoints, int nsample, \n    at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor);\n\nvoid group_points_kernel_launcher_fast(int b, int c, int n, int npoints, int nsample, \n    const float *points, const int *idx, float *out, cudaStream_t stream);\n\nint group_points_grad_wrapper_fast(int b, int c, int n, int npoints, int nsample, \n    at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor);\n\nvoid group_points_grad_kernel_launcher_fast(int b, int c, int n, int npoints, int nsample, \n    const float *grad_out, const int *idx, float *grad_points, cudaStream_t stream);\n\n#endif\n"
  },
  {
    "path": "model/conpu_v6/pointnet2/src/interpolate.cpp",
    "content": "#include <torch/serialize/tensor.h>\n#include <vector>\n#include <THC/THC.h>\n#include <math.h>\n#include <stdio.h>\n#include <stdlib.h>\n#include <cuda.h>\n#include <cuda_runtime_api.h>\n#include \"interpolate_gpu.h\"\n\nextern THCState *state;\n\n\nvoid three_nn_wrapper_fast(int b, int n, int m, at::Tensor unknown_tensor, \n    at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor) {\n    const float *unknown = unknown_tensor.data<float>();\n    const float *known = known_tensor.data<float>();\n    float *dist2 = dist2_tensor.data<float>();\n    int *idx = idx_tensor.data<int>();\n\n    cudaStream_t stream = c10::cuda::getCurrentCUDAStream();\n    three_nn_kernel_launcher_fast(b, n, m, unknown, known, dist2, idx, stream);\n}\n\n\nvoid three_interpolate_wrapper_fast(int b, int c, int m, int n,\n                         at::Tensor points_tensor,\n                         at::Tensor idx_tensor,\n                         at::Tensor weight_tensor,\n                         at::Tensor out_tensor) {\n\n    const float *points = points_tensor.data<float>();\n    const float *weight = weight_tensor.data<float>();\n    float *out = out_tensor.data<float>();\n    const int *idx = idx_tensor.data<int>();\n\n    cudaStream_t stream = c10::cuda::getCurrentCUDAStream();\n    three_interpolate_kernel_launcher_fast(b, c, m, n, points, idx, weight, out, stream);\n}\n\nvoid three_interpolate_grad_wrapper_fast(int b, int c, int n, int m,\n                            at::Tensor grad_out_tensor,\n                            at::Tensor idx_tensor,\n                            at::Tensor weight_tensor,\n                            at::Tensor grad_points_tensor) {\n\n    const float *grad_out = grad_out_tensor.data<float>();\n    const float *weight = weight_tensor.data<float>();\n    float *grad_points = grad_points_tensor.data<float>();\n    const int *idx = idx_tensor.data<int>();\n\n    cudaStream_t stream = c10::cuda::getCurrentCUDAStream();\n    three_interpolate_grad_kernel_launcher_fast(b, c, n, m, grad_out, idx, weight, grad_points, stream);\n}\n"
  },
  {
    "path": "model/conpu_v6/pointnet2/src/interpolate_gpu.cu",
    "content": "#include <math.h>\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"cuda_utils.h\"\n#include \"interpolate_gpu.h\"\n\n\n__global__ void three_nn_kernel_fast(int b, int n, int m, const float *__restrict__ unknown, \n    const float *__restrict__ known, float *__restrict__ dist2, int *__restrict__ idx) {\n    // unknown: (B, N, 3)\n    // known: (B, M, 3)\n    // output: \n    //      dist2: (B, N, 3)\n    //      idx: (B, N, 3)\n    \n    int bs_idx = blockIdx.y;\n    int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;\n    if (bs_idx >= b || pt_idx >= n) return;\n\n    unknown += bs_idx * n * 3 + pt_idx * 3;\n    known += bs_idx * m * 3;\n    dist2 += bs_idx * n * 3 + pt_idx * 3;\n    idx += bs_idx * n * 3 + pt_idx * 3;\n\n    float ux = unknown[0];\n    float uy = unknown[1];\n    float uz = unknown[2];\n\n    double best1 = 1e40, best2 = 1e40, best3 = 1e40;\n    int besti1 = 0, besti2 = 0, besti3 = 0;\n    for (int k = 0; k < m; ++k) {\n        float x = known[k * 3 + 0];\n        float y = known[k * 3 + 1];\n        float z = known[k * 3 + 2];\n        float d = (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z);\n        if (d < best1) {\n            best3 = best2; besti3 = besti2;\n            best2 = best1; besti2 = besti1;\n            best1 = d; besti1 = k;\n        } \n        else if (d < best2) {\n            best3 = best2; besti3 = besti2;\n            best2 = d; besti2 = k;\n        } \n        else if (d < best3) {\n            best3 = d; besti3 = k;\n        }\n    }\n    dist2[0] = best1; dist2[1] = best2; dist2[2] = best3;\n    idx[0] = besti1; idx[1] = besti2; idx[2] = besti3;\n}\n\n\nvoid three_nn_kernel_launcher_fast(int b, int n, int m, const float *unknown, \n    const float *known, float *dist2, int *idx, cudaStream_t stream) {\n    // unknown: (B, N, 3)\n    // known: (B, M, 3)\n    // output: \n    //      dist2: (B, N, 3)\n    //      idx: (B, N, 3)\n\n    cudaError_t err;\n    dim3 blocks(DIVUP(n, THREADS_PER_BLOCK), b);  // blockIdx.x(col), blockIdx.y(row)\n    dim3 threads(THREADS_PER_BLOCK);\n\n    three_nn_kernel_fast<<<blocks, threads, 0, stream>>>(b, n, m, unknown, known, dist2, idx);\n\n    err = cudaGetLastError();\n    if (cudaSuccess != err) {\n        fprintf(stderr, \"CUDA kernel failed : %s\\n\", cudaGetErrorString(err));\n        exit(-1);\n    }\n}\n\n\n__global__ void three_interpolate_kernel_fast(int b, int c, int m, int n, const float *__restrict__ points, \n    const int *__restrict__ idx, const float *__restrict__ weight, float *__restrict__ out) {\n    // points: (B, C, M)\n    // idx: (B, N, 3)\n    // weight: (B, N, 3)\n    // output:\n    //      out: (B, C, N)\n\n    int bs_idx = blockIdx.z;\n    int c_idx = blockIdx.y;\n    int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;\n\n    if (bs_idx >= b || c_idx >= c || pt_idx >= n) return;\n\n    weight += bs_idx * n * 3 + pt_idx * 3;\n    points += bs_idx * c * m + c_idx * m;\n    idx += bs_idx * n * 3 + pt_idx * 3;\n    out += bs_idx * c * n + c_idx * n;\n\n    out[pt_idx] = weight[0] * points[idx[0]] + weight[1] * points[idx[1]] + weight[2] * points[idx[2]];\n}\n\nvoid three_interpolate_kernel_launcher_fast(int b, int c, int m, int n, \n    const float *points, const int *idx, const float *weight, float *out, cudaStream_t stream) {\n    // points: (B, C, M)\n    // idx: (B, N, 3)\n    // weight: (B, N, 3)\n    // output:\n    //      out: (B, C, N)\n\n    cudaError_t err;\n    dim3 blocks(DIVUP(n, THREADS_PER_BLOCK), c, b);  // blockIdx.x(col), blockIdx.y(row)\n    dim3 threads(THREADS_PER_BLOCK);\n    three_interpolate_kernel_fast<<<blocks, threads, 0, stream>>>(b, c, m, n, points, idx, weight, out);\n\n    err = cudaGetLastError();\n    if (cudaSuccess != err) {\n        fprintf(stderr, \"CUDA kernel failed : %s\\n\", cudaGetErrorString(err));\n        exit(-1);\n    }\n}\n\n\n__global__ void three_interpolate_grad_kernel_fast(int b, int c, int n, int m, const float *__restrict__ grad_out, \n    const int *__restrict__ idx, const float *__restrict__ weight, float *__restrict__ grad_points) {\n    // grad_out: (B, C, N)\n    // weight: (B, N, 3)\n    // output:\n    //      grad_points: (B, C, M)\n\n    int bs_idx = blockIdx.z;\n    int c_idx = blockIdx.y;\n    int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;\n\n    if (bs_idx >= b || c_idx >= c || pt_idx >= n) return;\n    \n    grad_out += bs_idx * c * n + c_idx * n + pt_idx;\n    weight += bs_idx * n * 3 + pt_idx * 3;\n    grad_points += bs_idx * c * m + c_idx * m;\n    idx += bs_idx * n * 3 + pt_idx * 3;\n\n\n    atomicAdd(grad_points + idx[0], grad_out[0] * weight[0]);\n    atomicAdd(grad_points + idx[1], grad_out[0] * weight[1]);\n    atomicAdd(grad_points + idx[2], grad_out[0] * weight[2]);\n}\n\nvoid three_interpolate_grad_kernel_launcher_fast(int b, int c, int n, int m, const float *grad_out, \n    const int *idx, const float *weight, float *grad_points, cudaStream_t stream) {\n    // grad_out: (B, C, N)\n    // weight: (B, N, 3)\n    // output:\n    //      grad_points: (B, C, M)\n\n    cudaError_t err;\n    dim3 blocks(DIVUP(n, THREADS_PER_BLOCK), c, b);  // blockIdx.x(col), blockIdx.y(row)\n    dim3 threads(THREADS_PER_BLOCK);\n    three_interpolate_grad_kernel_fast<<<blocks, threads, 0, stream>>>(b, c, n, m, grad_out, idx, weight, grad_points);\n\n    err = cudaGetLastError();\n    if (cudaSuccess != err) {\n        fprintf(stderr, \"CUDA kernel failed : %s\\n\", cudaGetErrorString(err));\n        exit(-1);\n    }\n}"
  },
  {
    "path": "model/conpu_v6/pointnet2/src/interpolate_gpu.h",
    "content": "#ifndef _INTERPOLATE_GPU_H\n#define _INTERPOLATE_GPU_H\n\n#include <torch/serialize/tensor.h>\n#include<vector>\n#include <cuda.h>\n#include <cuda_runtime_api.h>\n\n\nvoid three_nn_wrapper_fast(int b, int n, int m, at::Tensor unknown_tensor, \n  at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor);\n\nvoid three_nn_kernel_launcher_fast(int b, int n, int m, const float *unknown,\n\tconst float *known, float *dist2, int *idx, cudaStream_t stream);\n\n\nvoid three_interpolate_wrapper_fast(int b, int c, int m, int n, at::Tensor points_tensor, \n    at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor out_tensor);\n\nvoid three_interpolate_kernel_launcher_fast(int b, int c, int m, int n, \n    const float *points, const int *idx, const float *weight, float *out, cudaStream_t stream);\n\n\nvoid three_interpolate_grad_wrapper_fast(int b, int c, int n, int m, at::Tensor grad_out_tensor, \n    at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_points_tensor);\n\nvoid three_interpolate_grad_kernel_launcher_fast(int b, int c, int n, int m, const float *grad_out, \n    const int *idx, const float *weight, float *grad_points, cudaStream_t stream);\n\n#endif\n"
  },
  {
    "path": "model/conpu_v6/pointnet2/src/pointnet2_api.cpp",
    "content": "#include <torch/serialize/tensor.h>\n#include <torch/extension.h>\n\n#include \"ball_query_gpu.h\"\n#include \"group_points_gpu.h\"\n#include \"sampling_gpu.h\"\n#include \"interpolate_gpu.h\"\n\n\nPYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {\n    m.def(\"ball_query_wrapper\", &ball_query_wrapper_fast, \"ball_query_wrapper_fast\");\n\n    m.def(\"group_points_wrapper\", &group_points_wrapper_fast, \"group_points_wrapper_fast\");\n    m.def(\"group_points_grad_wrapper\", &group_points_grad_wrapper_fast, \"group_points_grad_wrapper_fast\");\n\n    m.def(\"gather_points_wrapper\", &gather_points_wrapper_fast, \"gather_points_wrapper_fast\");\n    m.def(\"gather_points_grad_wrapper\", &gather_points_grad_wrapper_fast, \"gather_points_grad_wrapper_fast\");\n\n    m.def(\"furthest_point_sampling_wrapper\", &furthest_point_sampling_wrapper, \"furthest_point_sampling_wrapper\");\n    \n    m.def(\"three_nn_wrapper\", &three_nn_wrapper_fast, \"three_nn_wrapper_fast\");\n    m.def(\"three_interpolate_wrapper\", &three_interpolate_wrapper_fast, \"three_interpolate_wrapper_fast\");\n    m.def(\"three_interpolate_grad_wrapper\", &three_interpolate_grad_wrapper_fast, \"three_interpolate_grad_wrapper_fast\");\n}\n"
  },
  {
    "path": "model/conpu_v6/pointnet2/src/sampling.cpp",
    "content": "#include <torch/serialize/tensor.h>\n#include <ATen/cuda/CUDAContext.h>\n#include <vector>\n#include <THC/THC.h>\n\n#include \"sampling_gpu.h\"\n\nextern THCState *state;\n\n\nint gather_points_wrapper_fast(int b, int c, int n, int npoints, \n    at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor){\n    const float *points = points_tensor.data<float>();\n    const int *idx = idx_tensor.data<int>();\n    float *out = out_tensor.data<float>();\n\n    cudaStream_t stream = c10::cuda::getCurrentCUDAStream();\n    gather_points_kernel_launcher_fast(b, c, n, npoints, points, idx, out, stream);\n    return 1;\n}\n\n\nint gather_points_grad_wrapper_fast(int b, int c, int n, int npoints, \n    at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor) {\n\n    const float *grad_out = grad_out_tensor.data<float>();\n    const int *idx = idx_tensor.data<int>();\n    float *grad_points = grad_points_tensor.data<float>();\n\n    cudaStream_t stream = c10::cuda::getCurrentCUDAStream();\n    gather_points_grad_kernel_launcher_fast(b, c, n, npoints, grad_out, idx, grad_points, stream);\n    return 1;\n}\n\n\nint furthest_point_sampling_wrapper(int b, int n, int m, \n    at::Tensor points_tensor, at::Tensor temp_tensor, at::Tensor idx_tensor) {\n\n    const float *points = points_tensor.data<float>();\n    float *temp = temp_tensor.data<float>();\n    int *idx = idx_tensor.data<int>();\n\n    cudaStream_t stream = c10::cuda::getCurrentCUDAStream();\n    furthest_point_sampling_kernel_launcher(b, n, m, points, temp, idx, stream);\n    return 1;\n}\n"
  },
  {
    "path": "model/conpu_v6/pointnet2/src/sampling_gpu.cu",
    "content": "#include <stdio.h>\n#include <stdlib.h>\n\n#include \"cuda_utils.h\"\n#include \"sampling_gpu.h\"\n\n\n__global__ void gather_points_kernel_fast(int b, int c, int n, int m, \n    const float *__restrict__ points, const int *__restrict__ idx, float *__restrict__ out) {\n    // points: (B, C, N)\n    // idx: (B, M)\n    // output:\n    //      out: (B, C, M)\n\n    int bs_idx = blockIdx.z;\n    int c_idx = blockIdx.y;\n    int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;\n    if (bs_idx >= b || c_idx >= c || pt_idx >= m) return;\n\n    out += bs_idx * c * m + c_idx * m + pt_idx;\n    idx += bs_idx * m + pt_idx;\n    points += bs_idx * c * n + c_idx * n;\n    out[0] = points[idx[0]];\n}\n\nvoid gather_points_kernel_launcher_fast(int b, int c, int n, int npoints, \n    const float *points, const int *idx, float *out, cudaStream_t stream) {\n    // points: (B, C, N)\n    // idx: (B, npoints)\n    // output:\n    //      out: (B, C, npoints)\n\n    cudaError_t err;\n    dim3 blocks(DIVUP(npoints, THREADS_PER_BLOCK), c, b);  // blockIdx.x(col), blockIdx.y(row)\n    dim3 threads(THREADS_PER_BLOCK);\n\n    gather_points_kernel_fast<<<blocks, threads, 0, stream>>>(b, c, n, npoints, points, idx, out);\n\n    err = cudaGetLastError();\n    if (cudaSuccess != err) {\n        fprintf(stderr, \"CUDA kernel failed : %s\\n\", cudaGetErrorString(err));\n        exit(-1);\n    }\n}\n\n__global__ void gather_points_grad_kernel_fast(int b, int c, int n, int m, const float *__restrict__ grad_out, \n    const int *__restrict__ idx, float *__restrict__ grad_points) {\n    // grad_out: (B, C, M)\n    // idx: (B, M)\n    // output:\n    //      grad_points: (B, C, N)\n\n    int bs_idx = blockIdx.z;\n    int c_idx = blockIdx.y;\n    int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;\n    if (bs_idx >= b || c_idx >= c || pt_idx >= m) return;\n\n    grad_out += bs_idx * c * m + c_idx * m + pt_idx;\n    idx += bs_idx * m + pt_idx;\n    grad_points += bs_idx * c * n + c_idx * n;\n\n    atomicAdd(grad_points + idx[0], grad_out[0]);\n}\n\nvoid gather_points_grad_kernel_launcher_fast(int b, int c, int n, int npoints, \n    const float *grad_out, const int *idx, float *grad_points, cudaStream_t stream) {\n    // grad_out: (B, C, npoints)\n    // idx: (B, npoints)\n    // output:\n    //      grad_points: (B, C, N)\n\n    cudaError_t err;\n    dim3 blocks(DIVUP(npoints, THREADS_PER_BLOCK), c, b);  // blockIdx.x(col), blockIdx.y(row)\n    dim3 threads(THREADS_PER_BLOCK);\n\n    gather_points_grad_kernel_fast<<<blocks, threads, 0, stream>>>(b, c, n, npoints, grad_out, idx, grad_points);\n\n    err = cudaGetLastError();\n    if (cudaSuccess != err) {\n        fprintf(stderr, \"CUDA kernel failed : %s\\n\", cudaGetErrorString(err));\n        exit(-1);\n    }\n}\n\n\n__device__ void __update(float *__restrict__ dists, int *__restrict__ dists_i, int idx1, int idx2){\n    const float v1 = dists[idx1], v2 = dists[idx2];\n    const int i1 = dists_i[idx1], i2 = dists_i[idx2];\n    dists[idx1] = max(v1, v2);\n    dists_i[idx1] = v2 > v1 ? i2 : i1;\n}\n\ntemplate <unsigned int block_size>\n__global__ void furthest_point_sampling_kernel(int b, int n, int m, \n    const float *__restrict__ dataset, float *__restrict__ temp, int *__restrict__ idxs) {\n    // dataset: (B, N, 3)\n    // tmp: (B, N)\n    // output:\n    //      idx: (B, M)\n\n    if (m <= 0) return;\n    __shared__ float dists[block_size];\n    __shared__ int dists_i[block_size];\n\n    int batch_index = blockIdx.x;\n    dataset += batch_index * n * 3;\n    temp += batch_index * n;\n    idxs += batch_index * m;\n\n    int tid = threadIdx.x;\n    const int stride = block_size;\n\n    int old = 0;\n    if (threadIdx.x == 0)\n    idxs[0] = old;\n\n    __syncthreads();\n    for (int j = 1; j < m; j++) {\n    int besti = 0;\n    float best = -1;\n    float x1 = dataset[old * 3 + 0];\n    float y1 = dataset[old * 3 + 1];\n    float z1 = dataset[old * 3 + 2];\n    for (int k = tid; k < n; k += stride) {\n        float x2, y2, z2;\n        x2 = dataset[k * 3 + 0];\n        y2 = dataset[k * 3 + 1];\n        z2 = dataset[k * 3 + 2];\n        // float mag = (x2 * x2) + (y2 * y2) + (z2 * z2);\n        // if (mag <= 1e-3)\n        // continue;\n\n        float d = (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + (z2 - z1) * (z2 - z1);\n        float d2 = min(d, temp[k]);\n        temp[k] = d2;\n        besti = d2 > best ? k : besti;\n        best = d2 > best ? d2 : best;\n    }\n    dists[tid] = best;\n    dists_i[tid] = besti;\n    __syncthreads();\n\n    if (block_size >= 1024) {\n        if (tid < 512) {\n            __update(dists, dists_i, tid, tid + 512);\n        }\n        __syncthreads();\n    }\n\n    if (block_size >= 512) {\n        if (tid < 256) {\n            __update(dists, dists_i, tid, tid + 256);\n        }\n        __syncthreads();\n    }\n    if (block_size >= 256) {\n        if (tid < 128) {\n            __update(dists, dists_i, tid, tid + 128);\n        }\n        __syncthreads();\n    }\n    if (block_size >= 128) {\n        if (tid < 64) {\n            __update(dists, dists_i, tid, tid + 64);\n        }\n        __syncthreads();\n    }\n    if (block_size >= 64) {\n        if (tid < 32) {\n            __update(dists, dists_i, tid, tid + 32);\n        }\n        __syncthreads();\n    }\n    if (block_size >= 32) {\n        if (tid < 16) {\n            __update(dists, dists_i, tid, tid + 16);\n        }\n        __syncthreads();\n    }\n    if (block_size >= 16) {\n        if (tid < 8) {\n            __update(dists, dists_i, tid, tid + 8);\n        }\n        __syncthreads();\n    }\n    if (block_size >= 8) {\n        if (tid < 4) {\n            __update(dists, dists_i, tid, tid + 4);\n        }\n        __syncthreads();\n    }\n    if (block_size >= 4) {\n        if (tid < 2) {\n            __update(dists, dists_i, tid, tid + 2);\n        }\n        __syncthreads();\n    }\n    if (block_size >= 2) {\n        if (tid < 1) {\n            __update(dists, dists_i, tid, tid + 1);\n        }\n        __syncthreads();\n    }\n\n    old = dists_i[0];\n    if (tid == 0)\n        idxs[j] = old;\n    }\n}\n\nvoid furthest_point_sampling_kernel_launcher(int b, int n, int m, \n    const float *dataset, float *temp, int *idxs, cudaStream_t stream) {\n    // dataset: (B, N, 3)\n    // tmp: (B, N)\n    // output:\n    //      idx: (B, M)\n\n    cudaError_t err;\n    unsigned int n_threads = opt_n_threads(n);\n\n    switch (n_threads) {\n        case 1024:\n        furthest_point_sampling_kernel<1024><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;\n        case 512:\n        furthest_point_sampling_kernel<512><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;\n        case 256:\n        furthest_point_sampling_kernel<256><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;\n        case 128:\n        furthest_point_sampling_kernel<128><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;\n        case 64:\n        furthest_point_sampling_kernel<64><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;\n        case 32:\n        furthest_point_sampling_kernel<32><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;\n        case 16:\n        furthest_point_sampling_kernel<16><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;\n        case 8:\n        furthest_point_sampling_kernel<8><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;\n        case 4:\n        furthest_point_sampling_kernel<4><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;\n        case 2:\n        furthest_point_sampling_kernel<2><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;\n        case 1:\n        furthest_point_sampling_kernel<1><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;\n        default:\n        furthest_point_sampling_kernel<512><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);\n    }\n\n    err = cudaGetLastError();\n    if (cudaSuccess != err) {\n        fprintf(stderr, \"CUDA kernel failed : %s\\n\", cudaGetErrorString(err));\n        exit(-1);\n    }\n}\n"
  },
  {
    "path": "model/conpu_v6/pointnet2/src/sampling_gpu.h",
    "content": "#ifndef _SAMPLING_GPU_H\n#define _SAMPLING_GPU_H\n\n#include <torch/serialize/tensor.h>\n#include <ATen/cuda/CUDAContext.h>\n#include<vector>\n\n\nint gather_points_wrapper_fast(int b, int c, int n, int npoints, \n    at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor);\n\nvoid gather_points_kernel_launcher_fast(int b, int c, int n, int npoints, \n    const float *points, const int *idx, float *out, cudaStream_t stream);\n\n\nint gather_points_grad_wrapper_fast(int b, int c, int n, int npoints, \n    at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor);\n\nvoid gather_points_grad_kernel_launcher_fast(int b, int c, int n, int npoints, \n    const float *grad_out, const int *idx, float *grad_points, cudaStream_t stream);\n\n\nint furthest_point_sampling_wrapper(int b, int n, int m, \n    at::Tensor points_tensor, at::Tensor temp_tensor, at::Tensor idx_tensor);\n\nvoid furthest_point_sampling_kernel_launcher(int b, int n, int m, \n    const float *dataset, float *temp, int *idxs, cudaStream_t stream);\n\n#endif\n"
  },
  {
    "path": "model/conpu_v6/train_script101.py",
    "content": "import os\n\n#coarse-net\n\nloss_weight=' '\nloss_weight+=' --weight_cd 1.0'\nloss_weight+=' --weight_uniform -10000000'\nloss_weight+=' --weight_reg -0.1'\nloss_weight+=' --weight_arap 0.03'\nloss_weight+=' --weight_overlap 0.3'\nloss_weight+=' --weight_proj -1'\nloss_weight+=' --weight_normal -1'\nloss_weight+=' --weight_cycle -1'\nloss_weight+=' --weight_ndirection 0.0001'\n\n\nfor control_i in range(0,1):\n    os.system('CUDA_VISIBLE_DEVICES=1 python train_view_toy.py \\\n        --training_up_ratio 16 \\\n        --testing_up_ratio 16 \\\n        --over_sampling_scale 4 \\\n        --visualization_while_testing 1 \\\n        --last_sample_id '+str(control_i*10000)+' \\\n        --test_blank 10000 \\\n        --train_max_samples '+str((control_i+1)*10000)+' \\\n        --learning_rate '+str(0.001* 0.9**control_i)+'  \\\n        --batchsize 8  \\\n        --out_baseline \\'out_baseline_101\\' \\\n        --num_point 256 \\\n        --gt_num_point 4096 \\\n        --pack_path \\'../../data/Sketchfab2/packed_data/version_2\\'  \\\n        --over_fitting_id 0 \\\n        --if_over_fitting_this_time 0 \\\n        --if_only_test 0 \\\n        --if_only_test_max_num 14 \\\n        --network_name \\'Net_conpu_v7\\'  \\\n        --emb_dims 512 \\\n        --neighbor_k 10 \\\n        --mlp_fitting_str \\'256 128 64\\' \\\n        --pretrained \\'./pre_trained/v3.pt\\' \\\n        --if_fix_sample 0 \\\n        --if_use_siren 0 \\\n        --feature_unfolding_nei_num 4 \\\n        '+loss_weight)\n    \n    \n    \n"
  },
  {
    "path": "model/conpu_v6/train_script101_test.py",
    "content": "import os\n\n#coarse-net\n\nloss_weight=' '\nloss_weight+=' --weight_cd 1.0'\nloss_weight+=' --weight_uniform -10000000'\nloss_weight+=' --weight_reg -0.1'\nloss_weight+=' --weight_arap 0.03'\nloss_weight+=' --weight_overlap 0.3'\nloss_weight+=' --weight_proj -1'\nloss_weight+=' --weight_normal -1'\nloss_weight+=' --weight_cycle -1'\nloss_weight+=' --weight_ndirection 0.0001'\n\n\nfor control_i in range(0,1):\n    os.system('CUDA_VISIBLE_DEVICES=1 python train_view_toy.py \\\n        --training_up_ratio 16 \\\n        --testing_up_ratio 16 \\\n        --over_sampling_scale 4 \\\n        --visualization_while_testing 1 \\\n        --last_sample_id '+str(control_i*10000)+' \\\n        --test_blank 10000 \\\n        --train_max_samples '+str((control_i+1)*10000)+' \\\n        --learning_rate '+str(0.001* 0.9**control_i)+'  \\\n        --batchsize 8  \\\n        --out_baseline \\'out_baseline_101_test\\' \\\n        --num_point 256 \\\n        --gt_num_point 4096 \\\n        --pack_path \\'../../data/Sketchfab2/packed_data/version_2\\'  \\\n        --over_fitting_id 0 \\\n        --if_over_fitting_this_time 0 \\\n        --if_only_test 1 \\\n        --if_only_test_max_num 14 \\\n        --network_name \\'Net_conpu_v7\\'  \\\n        --emb_dims 512 \\\n        --neighbor_k 10 \\\n        --mlp_fitting_str \\'256 128 64\\' \\\n        --pretrained \\'./pre_trained/v3.pt\\' \\\n        --if_fix_sample 0 \\\n        --if_use_siren 0 \\\n        --feature_unfolding_nei_num 4 \\\n        '+loss_weight)\n    \n    \n    \n"
  },
  {
    "path": "model/conpu_v6/train_view_toy.py",
    "content": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport torch.optim as optim\nfrom torchvision import datasets, transforms\nfrom  torch.utils.data import DataLoader\nimport torch.optim.lr_scheduler as lr_scheduler\nimport math\nimport numpy as np\nimport torch.nn.init as init\nimport struct\nimport os\nimport sys\nsys.path.append('../')\nsys.path.append('../../')\nsys.path.append('../../code/')\n#import drawer\nimport time\nimport mesh_operations\nimport torch_tensor_functions\nimport colormap\nimport random\nfrom pointnet2 import pointnet2_utils as pn2_utils\n\n# from torch_geometric.data import Data\n# from torch_geometric.transforms.generate_mesh_normals  import *\n# from torch_scatter import scatter_add\n\n\n######  The network and loss are figured out here  ###### \nfrom loss import Loss, chamfer_dist\n###### ------ ######\n\n\nfrom utils.config import parse_args\nimport time\nimport igl\nimport matplotlib\nmatplotlib.use('Agg')\nimport matplotlib.pyplot as plt\n\n# The parameter that controls the overfitting.\n# over_fitting_id = 0\n# if_over_fitting_this_time = False\n# if_only_test = False\n# if_only_test_max_num = 3\n\n\n\n\n# Set the GradScaler\ntry:\n    from torch.cuda.amp import GradScaler\nexcept:\n    # dummy GradScaler for PyTorch < 1.6\n    class GradScaler:\n        def __init__(self):\n            pass\n        def scale(self, loss):\n            return loss\n        def unscale_(self, optimizer):\n            pass\n        def step(self, optimizer):\n            optimizer.step()\n        def update(self):\n            pass\n\n# all the args. They can be set in another .py file.\nargs = parse_args()\nprint ('args:')\nprint (args)\n\n\nexec('from network import '+args.network_name)\n\nover_fitting_id = args.over_fitting_id\nif_over_fitting_this_time = args.if_over_fitting_this_time\nif_only_test = args.if_only_test\nif_only_test_max_num = args.if_only_test_max_num\n\n# The color map for visualization.\n# The points generated from a same source point should share the same color. \nrb_colormap = np.array(colormap.rb_colormap_list_little).reshape(8,3) \n\nbatch_size=args.batchsize\n# It is used to control the training process.\ntrain_max_samples = args.train_max_samples  \n\n# The path of the packed dataset.\npack_path=args.pack_path\nprint('The packed data path is : ',pack_path)\n\n# The point number of the sparse and dense patch, respectively.\nnum_point = args.num_point\ngt_num_point = args.gt_num_point\n\n# The path of the training data\ntrain_points_normals_sparse_path = pack_path+'/training_points_normals_'+str(num_point)+'.bin'\ntrain_points_normals_dense_path = pack_path+'/training_points_normals_'+str(gt_num_point)+'.bin'\n\n# The path of the testing data\ntest_points_normals_sparse_path = pack_path+'/testing_points_normals_'+str(num_point)+'.bin'\ntest_center_scale_sparse_path = pack_path+'/testing_center_scale_'+str(num_point)+'.bin'\ntest_points_normals_dense_path = pack_path+'/testing_points_normals_'+str(gt_num_point)+'.bin'\n\n# READ in train_points_normals : sparse\ntrain_points_normals_sparse = np.fromfile(train_points_normals_sparse_path, dtype = np.float32).reshape(-1,num_point,6)\n# READ in train_points_normals : dense\ntrain_points_normals_dense = np.fromfile(train_points_normals_dense_path, dtype = np.float32).reshape(-1,gt_num_point,6)\n# READ in test_points_normals : sparse\ntest_points_normals_sparse = np.fromfile(test_points_normals_sparse_path, dtype = np.float32).reshape(-1,num_point,6)\ntest_center_scale_sparse = np.fromfile(test_center_scale_sparse_path, dtype = np.float32).reshape(-1,4)\n# READ in test_points_normals : dense\ntest_points_normals_dense = np.fromfile(test_points_normals_dense_path, dtype = np.float32).reshape(-1,gt_num_point,6)\n\n\n\n# The pair number of the training and testing pairs, respectively.\ntrain_pair_num = train_points_normals_sparse.shape[0]\ntest_pair_num = test_points_normals_sparse.shape[0]\n\ntrain_points_normals_sparse_tensor = torch.from_numpy(train_points_normals_sparse).float()\ntest_points_normals_sparse_tensor = torch.from_numpy(test_points_normals_sparse).float()\ntrain_points_normals_dense_tensor = torch.from_numpy(train_points_normals_dense).float()\ntest_points_normals_dense_tensor = torch.from_numpy(test_points_normals_dense).float()\n\n# All the torch-tensors used for the input and ground truth. \ntrain_points_sparse_tensor = train_points_normals_sparse_tensor[:,:,:3]\ntrain_normals_sparse_tensor = train_points_normals_sparse_tensor[:,:,3:]\ntest_points_sparse_tensor = test_points_normals_sparse_tensor[:,:,:3]\ntest_normals_sparse_tensor = test_points_normals_sparse_tensor[:,:,3:]\ntrain_points_dense_tensor = train_points_normals_dense_tensor[:,:,:3]\ntrain_normals_dense_tensor = train_points_normals_dense_tensor[:,:,3:]\ntest_points_dense_tensor = test_points_normals_dense_tensor[:,:,:3]\ntest_normals_dense_tensor = test_points_normals_dense_tensor[:,:,3:]\n\n# All the batch-data used for the input and ground truth. \ntrain_points_sparse_batch = torch.zeros([batch_size,num_point,3],dtype=torch.float,requires_grad=False).cuda()\ntrain_normals_sparse_batch = torch.zeros([batch_size,num_point,3],dtype=torch.float,requires_grad=False).cuda()\ntest_points_sparse_batch = torch.zeros([batch_size,num_point,3],dtype=torch.float,requires_grad=False).cuda()\ntest_normals_sparse_batch = torch.zeros([batch_size,num_point,3],dtype=torch.float,requires_grad=False).cuda()\ntrain_points_dense_batch = torch.zeros([batch_size,gt_num_point,3],dtype=torch.float,requires_grad=False).cuda()\ntrain_normals_dense_batch = torch.zeros([batch_size,gt_num_point,3],dtype=torch.float,requires_grad=False).cuda()\ntest_points_dense_batch = torch.zeros([batch_size,gt_num_point,3],dtype=torch.float,requires_grad=False).cuda()\ntest_normals_dense_batch = torch.zeros([batch_size,gt_num_point,3],dtype=torch.float,requires_grad=False).cuda()\n\n\ndef update_test_cache(used_samples_num, model, loss_obj, args):\n    print('updating cache for used_samples_num = ' + str(used_samples_num))\n    test_cache_file='./'+args.out_baseline+'/result_cache.txt'\n    loss_sum_, loss_stages_=compute_test_loss_values(model, loss_obj, args)\n    print('the test loss: ',loss_sum_, loss_stages_)\n    cf=open(test_cache_file,'a+')\n    # The first number is the iteration times.\n    cf.write(str(used_samples_num//batch_size)+' ')\n    cf.write(str(loss_sum_)+' ')\n    for i in range(len(loss_stages_)):\n        cf.write(str(loss_stages_[i])+' ')\n    cf.write('\\n')\n    cf.close()\n    update_pics()\n    if args.visualization_while_testing:\n        update_visualization(model,  args)\n        if if_only_test==True:exit()\n    \ndef update_pics():\n    test_cache_file='./'+args.out_baseline+'/result_cache.txt'\n    cf=open(test_cache_file,'r')\n    lines=cf.readlines()\n    x=[]\n    y_sum=[]\n    y_cd=[]\n    y_reg = []\n    y_arap = []\n    y_overlap = []\n    y_proj = []\n    y_normal = []\n    y_ndirection = []\n    for i in range(len(lines)):\n        if i%1==0:\n            index = int(lines[i].split(' ')[0])\n            sum_loss = float(lines[i].split(' ')[1])\n            cd_loss = float(lines[i].split(' ')[2])\n            reg_loss = float(lines[i].split(' ')[3])\n            arap_loss = float(lines[i].split(' ')[4])\n            overlap_loss = float(lines[i].split(' ')[5])\n            proj_loss = float(lines[i].split(' ')[6])\n            normal_loss = float(lines[i].split(' ')[7])\n            ndirection_loss = float(lines[i].split(' ')[8])\n            iter_index=index\n            x.append(iter_index)\n            y_sum.append(sum_loss)\n            y_cd.append(cd_loss)\n            y_reg.append(reg_loss)\n            y_arap.append(arap_loss)\n            y_overlap.append(overlap_loss)\n            y_proj.append(proj_loss)\n            y_normal.append(normal_loss)\n            y_ndirection.append(ndirection_loss)\n    \n    fig = plt.figure(0)\n    fig.clear()\n    plt.title('The sum loss')\n    plt.xlabel('iteration')\n    plt.ylabel('sum loss')\n    plt.plot(x, y_sum, c='r', ls='-')\n    plt.savefig('./'+args.out_baseline+'/loss_sum.png')\n    \n    fig = plt.figure(0)\n    fig.clear()\n    plt.title('The loss on cd')\n    plt.xlabel('iteration')\n    plt.ylabel('cd loss')\n    plt.plot(x, y_cd, c='#526922', ls='-')\n    plt.savefig('./'+args.out_baseline+'/loss_cd.png')\n\n    fig = plt.figure(0)\n    fig.clear()\n    plt.title('The loss on reg')\n    plt.xlabel('iteration')\n    plt.ylabel('reg loss')\n    plt.plot(x, y_reg, c='#526922', ls='-')\n    plt.savefig('./'+args.out_baseline+'/loss_reg.png')\n\n    fig = plt.figure(0)\n    fig.clear()\n    plt.title('The loss on arap')\n    plt.xlabel('iteration')\n    plt.ylabel('arap loss')\n    plt.plot(x, y_arap, c='#526922', ls='-')\n    plt.savefig('./'+args.out_baseline+'/loss_arap.png')\n\n    fig = plt.figure(0)\n    fig.clear()\n    plt.title('The loss on overlap')\n    plt.xlabel('iteration')\n    plt.ylabel('overlap loss')\n    plt.plot(x, y_overlap, c='#526922', ls='-')\n    plt.savefig('./'+args.out_baseline+'/loss_overlap.png')\n\n    fig = plt.figure(0)\n    fig.clear()\n    plt.title('The loss on proj')\n    plt.xlabel('iteration')\n    plt.ylabel('proj loss')\n    plt.plot(x, y_proj, c='#526922', ls='-')\n    plt.savefig('./'+args.out_baseline+'/loss_proj.png')\n\n    fig = plt.figure(0)\n    fig.clear()\n    plt.title('The loss on normal')\n    plt.xlabel('iteration')\n    plt.ylabel('normal loss')\n    plt.plot(x, y_normal, c='#526922', ls='-')\n    plt.savefig('./'+args.out_baseline+'/loss_normal.png')\n\n    fig = plt.figure(0)\n    fig.clear()\n    plt.title('The loss on ndirection')\n    plt.xlabel('iteration')\n    plt.ylabel('ndirection loss')\n    plt.plot(x, y_ndirection, c='#526922', ls='-')\n    plt.savefig('./'+args.out_baseline+'/loss_ndirection.png')\n    \ndef update_visualization(model,  args):\n    global test_center_scale_sparse\n\n    global train_points_sparse_tensor\n    global train_normals_sparse_tensor\n    global test_points_sparse_tensor\n    global test_normals_sparse_tensor\n    global train_points_dense_tensor\n    global train_normals_dense_tensor\n    global test_points_dense_tensor\n    global test_normals_dense_tensor\n\n    global train_points_sparse_batch\n    global train_normals_sparse_batch\n    global test_points_sparse_batch\n    global test_normals_sparse_batch\n    global train_points_dense_batch\n    global train_normals_dense_batch\n    global test_points_dense_batch\n    global test_normals_dense_batch\n    \n    test_cache_file='./'+args.out_baseline+'/result_cache.txt'\n    cf=open(test_cache_file,'r')\n    lines=cf.readlines()\n    last_line = lines[len(lines)-1]\n    iter_num = int(last_line.split(' ')[0])\n    visual_folder = './'+args.out_baseline+'/visual_'+str(iter_num*batch_size)\n    if not os.path.exists(visual_folder):\n        os.mkdir(visual_folder)\n    print('Satrt to visualize the results now.')\n    # To be finished. Draw whatever you wanna observe here.\n    visual_sample_num = min(batch_size,5)\n    if if_only_test==True:\n        visual_sample_num = test_pair_num\n        testing_anchor_num = args.testing_anchor_num\n        testing_model_num = test_pair_num // testing_anchor_num\n        if if_only_test_max_num>=0 and if_only_test_max_num<testing_model_num: testing_model_num=if_only_test_max_num\n        visual_sample_num = testing_model_num*testing_anchor_num\n    if not if_over_fitting_this_time:over_fitting_id_here=0\n    else:over_fitting_id_here=args.over_fitting_id\n    for si in range(over_fitting_id_here, over_fitting_id_here+visual_sample_num):\n        this_sample_path = visual_folder+'/sample_'+str(si)\n        if not os.path.exists(this_sample_path):os.mkdir(this_sample_path)\n        a_points_sparse_tensor = test_points_sparse_tensor[si:si+1].cuda()\n        a_normals_sparse_tensor = test_normals_sparse_tensor[si:si+1].cuda()\n        a_points_dense_tensor = test_points_dense_tensor[si:si+1].cuda()\n        a_normals_dense_tensor = test_normals_dense_tensor[si:si+1].cuda()\n        # get the generated results.\n        model.eval()\n        # with torch.no_grad():\n        if True:\n            a_points_gen_tensor, a_normals_gen_tensor, _, a_querying_points_3d, a_querying_points_n_3d, a_glued_points, a_glued_normals = model(a_points_sparse_tensor)\n        \n        # save the points : format-xyz, with normal.\n        torch_tensor_functions.draw_tensor_point_xyz_with_normal(this_sample_path+'/query.xyz', a_points_gen_tensor[0].detach(),torch_tensor_normals=a_normals_gen_tensor[0].detach())\n        torch_tensor_functions.draw_tensor_point_xyz_with_normal(this_sample_path+'/query_3d.xyz', a_querying_points_3d[0].detach(), torch_tensor_normals=a_querying_points_n_3d[0].detach())\n        torch_tensor_functions.draw_tensor_point_xyz_with_normal(this_sample_path+'/glued.xyz', a_glued_points[0].detach())\n        torch_tensor_functions.draw_tensor_point_xyz_with_normal(this_sample_path+'/sparse.xyz', a_points_sparse_tensor[0])\n        torch_tensor_functions.draw_tensor_point_xyz_with_normal(this_sample_path+'/dense.xyz', a_points_dense_tensor[0])\n\n        # the color tensor of the sparse points\n        num_point_here = a_points_sparse_tensor.size()[1]\n        a_points_sparse_color_tensor = torch.from_numpy(rb_colormap).float().cuda()\n        while a_points_sparse_color_tensor.size()[0]<num_point_here:a_points_sparse_color_tensor = torch.cat((a_points_sparse_color_tensor,a_points_sparse_color_tensor),dim=0)\n        a_points_sparse_color_tensor = a_points_sparse_color_tensor[:num_point_here]\n        \n        # the color tensor of the generated points\n        up_ratio_here = a_points_gen_tensor.size()[1]//a_points_sparse_tensor.size()[1]\n        a_points_gen_color_tensor = a_points_sparse_color_tensor.clone().view(1,-1,3)\n        while a_points_gen_color_tensor.size()[0]<up_ratio_here:a_points_gen_color_tensor = torch.cat((a_points_gen_color_tensor,a_points_gen_color_tensor),dim=0)\n        a_points_gen_color_tensor = a_points_gen_color_tensor[:up_ratio_here].transpose(1,0)\n        a_points_gen_color_tensor = a_points_gen_color_tensor.reshape(-1,3)\n        \n        # save the points : format-obj, with color.\n        torch_tensor_functions.draw_tensor_point_obj_with_color(this_sample_path+'/query.obj', a_points_gen_tensor[0].detach(),torch_tensor_color=a_points_gen_color_tensor)\n        torch_tensor_functions.draw_tensor_point_obj_with_color(this_sample_path+'/sparse.obj', a_points_sparse_tensor[0],torch_tensor_color=a_points_sparse_color_tensor)\n    \n    # if if_only_test==True : Test all the testing models. \n    if if_only_test==True:\n        tested_mesh_path = visual_folder + '/0tested_models'\n        if not os.path.exists(tested_mesh_path):os.mkdir(tested_mesh_path)\n        for model_i in range(testing_model_num):\n            all_patches_points = []\n            one_tested_mesh_obj_path = tested_mesh_path+'/test_model_'+str(model_i)+'.obj'\n            for anchor_i in range(testing_anchor_num):\n                this_sample_id = model_i*testing_anchor_num + anchor_i\n                this_sample_path = visual_folder+'/sample_'+str(this_sample_id)\n                v_, n_ = mesh_operations.read_xyz_(this_sample_path+'/glued.xyz')\n                v_ = v_[:,:3]\n                this_center_scale = test_center_scale_sparse[this_sample_id]\n                this_center = this_center_scale[:3].reshape(1,3)\n                this_scale = this_center_scale[3]\n                v_ = v_ * this_scale\n                v_ = v_ + this_center\n                all_patches_points.append(v_)\n            all_patches_points = np.concatenate(all_patches_points,axis=0)\n            all_patches_points_torch = torch.from_numpy(all_patches_points).float().cuda().view(1,-1,3)\n            fps_id = pn2_utils.furthest_point_sample(all_patches_points_torch.contiguous(), 2000*args.testing_up_ratio)\n            new_xyz = pn2_utils.gather_operation(all_patches_points_torch.permute(0, 2, 1).contiguous(), fps_id)\n            all_patches_points = new_xyz.permute(0,2,1).view(-1,3).cpu().numpy().astype(np.float32)\n            mesh_operations.write_obj_(one_tested_mesh_obj_path, all_patches_points)\n                \n\n\n\n\n\n\n\n    \n    \ndef stophere():\n    while True:\n        continue\n\ndef run_train_val(model, optimizer, loss_obj,  args):\n    global train_points_sparse_tensor\n    global train_normals_sparse_tensor\n    global test_points_sparse_tensor\n    global test_normals_sparse_tensor\n    global train_points_dense_tensor\n    global train_normals_dense_tensor\n    global test_points_dense_tensor\n    global test_normals_dense_tensor\n\n    global train_points_sparse_batch\n    global train_normals_sparse_batch\n    global test_points_sparse_batch\n    global test_normals_sparse_batch\n    global train_points_dense_batch\n    global train_normals_dense_batch\n    global test_points_dense_batch\n    global test_normals_dense_batch\n    \n    used_samples_num=args.last_sample_id\n    start_pos=used_samples_num % train_pair_num\n\n    if used_samples_num==0 or if_only_test==True:\n        update_test_cache(used_samples_num, model, loss_obj,  args)\n    \n    while used_samples_num<train_max_samples:\n        while True:\n            end_pos=start_pos+batch_size\n            print('Training with pair samples: '+str(start_pos)+'~'+str(end_pos))\n            train_one_batch(model, optimizer, loss_obj, start_pos, end_pos, args) ############## train one batch\n            used_samples_num+=end_pos-start_pos\n            if used_samples_num%(args.test_blank)==0:\n                update_test_cache(used_samples_num, model, loss_obj, args) ############## test once\n                print('Test here, at '+str(used_samples_num))\n                torch.save(model.state_dict(), './'+args.out_baseline+'/sample_'+str(used_samples_num)+'.pt')\n            if end_pos>=train_pair_num:\n                start_pos=end_pos - train_pair_num\n            else:\n                start_pos=end_pos\n            print(used_samples_num,train_max_samples)\n            if used_samples_num >= train_max_samples:\n                break\n    \n    \n    \ndef train_one_batch(model, optimizer, loss_obj, start_pos, end_pos, args):\n    global train_points_sparse_tensor\n    global train_normals_sparse_tensor\n    global test_points_sparse_tensor\n    global test_normals_sparse_tensor\n    global train_points_dense_tensor\n    global train_normals_dense_tensor\n    global test_points_dense_tensor\n    global test_normals_dense_tensor\n\n    global train_points_sparse_batch\n    global train_normals_sparse_batch\n    global test_points_sparse_batch\n    global test_normals_sparse_batch\n    global train_points_dense_batch\n    global train_normals_dense_batch\n    global test_points_dense_batch\n    global test_normals_dense_batch\n    \n    print(start_pos, end_pos)\n    if end_pos<=train_pair_num:\n        train_points_sparse_batch = train_points_sparse_tensor[start_pos:end_pos].cuda()\n        train_normals_sparse_batch = train_normals_sparse_tensor[start_pos:end_pos].cuda()\n        train_points_dense_batch = train_points_dense_tensor[start_pos:end_pos].cuda()\n        train_normals_dense_batch = train_normals_dense_tensor[start_pos:end_pos].cuda()\n    else:\n        bottom = train_pair_num - start_pos\n        top = end_pos - train_pair_num\n        \n        train_points_sparse_batch[:bottom] = train_points_sparse_tensor[start_pos:].cuda()\n        train_normals_sparse_batch[:bottom] = train_normals_sparse_tensor[start_pos:].cuda()\n        train_points_dense_batch[:bottom] = train_points_dense_tensor[start_pos:].cuda()\n        train_normals_dense_batch[:bottom] = train_normals_dense_tensor[start_pos:].cuda()\n        \n        \n        train_points_sparse_batch[bottom:] = train_points_sparse_tensor[:top].cuda()\n        train_normals_sparse_batch[bottom:] = train_normals_sparse_tensor[:top].cuda()\n        train_points_dense_batch[bottom:] = train_points_dense_tensor[:top].cuda()\n        train_normals_dense_batch[bottom:] = train_normals_dense_tensor[:top].cuda()\n    \n    # For over-fitting!!\n    if if_over_fitting_this_time==True:\n        train_points_sparse_batch = test_points_sparse_tensor[0+over_fitting_id:end_pos-start_pos+over_fitting_id].cuda()\n        train_normals_sparse_batch = test_normals_sparse_tensor[0+over_fitting_id:end_pos-start_pos+over_fitting_id].cuda()\n        train_points_dense_batch = test_points_dense_tensor[0+over_fitting_id:end_pos-start_pos+over_fitting_id].cuda()\n        train_normals_dense_batch = test_normals_dense_tensor[0+over_fitting_id:end_pos-start_pos+over_fitting_id].cuda()\n\n        # torch_tensor_functions.draw_tensor_point_batch_xyz_with_normal('./train_sparsepoint_shows', train_points_sparse_batch, train_normals_sparse_batch)\n        # torch_tensor_functions.draw_tensor_point_batch_xyz_with_normal('./train_densepoint_shows', train_points_dense_batch, train_normals_dense_batch)\n    if if_over_fitting_this_time==False:\n        pi_ = 3.14159265\n        all_rot_matrix_ = None\n        for b in range(train_points_sparse_batch.size()[0]):\n            euler_x = random.randint(0,10000)/10000\n            euler_y = random.randint(0,10000)/10000\n            euler_z = random.randint(0,10000)/10000\n            euler_angle = torch.tensor([[-pi_+2*pi_*euler_x, -pi_+2*pi_*euler_y, -pi_+2*pi_*euler_z]], dtype=torch.float32).cuda()\n            a_rot_matrix_ = torch_tensor_functions.euler2rot(euler_angle)\n            if b==0:all_rot_matrix_ = a_rot_matrix_\n            else:all_rot_matrix_ = torch.cat((all_rot_matrix_,a_rot_matrix_),dim=0)\n        train_points_sparse_batch = torch.bmm(train_points_sparse_batch, all_rot_matrix_)\n        train_normals_sparse_batch = torch.bmm(train_normals_sparse_batch, all_rot_matrix_)\n        train_points_dense_batch = torch.bmm(train_points_dense_batch, all_rot_matrix_)\n        train_normals_dense_batch = torch.bmm(train_normals_dense_batch, all_rot_matrix_)\n\n\n    for train_times in range(1):\n        optimizer.zero_grad()    \n        model.train()\n        gen_points_batch, gen_normals_batch, uv_sampling_coors, _, _, glued_points, glued_normals = model(train_points_sparse_batch)\n        \n        conpu_loss, conpu_loss_stages  = loss_obj(gen_points_batch, gen_normals_batch, uv_sampling_coors, train_points_sparse_batch, train_normals_sparse_batch, train_points_dense_batch, train_normals_dense_batch)\n        print('cd:',conpu_loss_stages[0])\n        print('reg:',conpu_loss_stages[1])\n        print('arap:',conpu_loss_stages[2])\n        print('overlap:',conpu_loss_stages[3])\n        print('proj:',conpu_loss_stages[4])\n        print('normal:',conpu_loss_stages[5])\n        print('ndirection:',conpu_loss_stages[6])\n        \n        model.zero_grad()\n        if True:\n            with torch.autograd.set_detect_anomaly(True): scaler.scale(conpu_loss).backward()\n            if_have_nan = False\n            if if_have_nan==False:\n                scaler.unscale_(optimizer)                \n                torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)\n                scaler.step(optimizer)\n                print('optimizer.lr : ',optimizer.state_dict()['param_groups'][0]['lr'])\n                scheduler.step()\n                scaler.update()\n            else:\n                print('The grad is dirty!!!')\n        else:\n            conpu_loss.backward()\n            optimizer.step()\n\n\n    \ndef test_one_batch(model, loss_obj, start_pos, end_pos, args):\n    global train_points_sparse_tensor\n    global train_normals_sparse_tensor\n    global test_points_sparse_tensor\n    global test_normals_sparse_tensor\n    global train_points_dense_tensor\n    global train_normals_dense_tensor\n    global test_points_dense_tensor\n    global test_normals_dense_tensor\n\n    global train_points_sparse_batch\n    global train_normals_sparse_batch\n    global test_points_sparse_batch\n    global test_normals_sparse_batch\n    global train_points_dense_batch\n    global train_normals_dense_batch\n    global test_points_dense_batch\n    global test_normals_dense_batch\n\n    \n#    model.eval()\n    \n#    print(start_pos, end_pos)\n    if end_pos<=test_pair_num:\n        test_points_sparse_batch = test_points_sparse_tensor[start_pos:end_pos].cuda()\n        test_normals_sparse_batch = test_normals_sparse_tensor[start_pos:end_pos].cuda()\n        test_points_dense_batch = test_points_dense_tensor[start_pos:end_pos].cuda()\n        test_normals_dense_batch = test_normals_dense_tensor[start_pos:end_pos].cuda()\n    else:\n        bottom = test_pair_num - start_pos\n        top = end_pos - test_pair_num\n        \n        test_points_sparse_batch[:bottom] = test_points_sparse_tensor[start_pos:].cuda()\n        test_normals_sparse_batch[:bottom] = test_normals_sparse_tensor[start_pos:].cuda()\n        test_points_dense_batch[:bottom] = test_points_dense_tensor[start_pos:].cuda()\n        test_normals_dense_batch[:bottom] = test_normals_dense_tensor[start_pos:].cuda()\n        \n        \n        test_points_sparse_batch[bottom:] = test_points_sparse_tensor[:top].cuda()\n        test_normals_sparse_batch[bottom:] = test_normals_sparse_tensor[:top].cuda()\n        test_points_dense_batch[bottom:] = test_points_dense_tensor[:top].cuda()\n        test_normals_dense_batch[bottom:] = test_normals_dense_tensor[:top].cuda()\n        \n    # For over-fitting!!\n    if if_over_fitting_this_time==True:\n        test_points_sparse_batch = test_points_sparse_tensor[0+over_fitting_id:end_pos-start_pos+over_fitting_id].cuda()\n        test_normals_sparse_batch = test_normals_sparse_tensor[0+over_fitting_id:end_pos-start_pos+over_fitting_id].cuda()\n        test_points_dense_batch = test_points_dense_tensor[0+over_fitting_id:end_pos-start_pos+over_fitting_id].cuda()\n        test_normals_dense_batch = test_normals_dense_tensor[0+over_fitting_id:end_pos-start_pos+over_fitting_id].cuda()\n    \n    \n    model.eval()\n    # with torch.no_grad():\n    if True:\n        gen_points_batch, gen_normals_batch, uv_sampling_coors, _, _, glued_points, glued_normals = model(test_points_sparse_batch)\n\n        conpu_loss, conpu_loss_stages = loss_obj(gen_points_batch, gen_normals_batch, uv_sampling_coors, test_points_sparse_batch, test_normals_sparse_batch, test_points_dense_batch, test_normals_dense_batch)\n\n    return conpu_loss, conpu_loss_stages\n    \n    \n    \n\n    \ndef compute_test_loss_values(model, loss_obj, args):\n    start_pos=0\n    loss_sum=0.0\n    loss_stages=[]\n    batch_cnt=0.0\n    print('Computing the testing loss on the testing set:')\n    for s in range(0, 2, batch_size):\n        start_pos = s\n        end_pos = s + batch_size\n        if end_pos > test_pair_num:\n            end_pos = test_pair_num\n        this_batch_size = end_pos - start_pos\n        lsum,lstages = test_one_batch(model, loss_obj, start_pos, end_pos, args)\n        if start_pos==0:\n            loss_sum=lsum.item()*this_batch_size\n            for i in range(len(lstages)):\n                loss_stages.append(lstages[i].item()*this_batch_size)\n        else:\n            loss_sum+=lsum.item()*this_batch_size\n            for i in range(len(lstages)):\n                loss_stages[i]+=lstages[i].item()*this_batch_size\n        batch_cnt += this_batch_size\n    loss_sum/=batch_cnt\n    for i in range(len(loss_stages)):\n        loss_stages[i]/=batch_cnt\n    return loss_sum, loss_stages\n\ndef show_parameter_by_name(net_name, layer_name):\n    for name, parameters in net_name.named_parameters():\n        if name==layer_name:\n            return parameters\n    return None\n    \ndef get_para_of_one_layer_from_another_net(net_source, net_to_be_changed, layer_name):\n    a = show_parameter_by_name(net_source, layer_name)\n#    print(show_parameter_by_name(net_to_be_changed, layer_name))\n    for name, parameters in net_to_be_changed.named_parameters():\n        if name==layer_name:\n            parameters.data = a.data\n            return None\n    print('No matching for layer: ',layer_name)\n#    print(show_parameter_by_name(net_to_be_changed, layer_name))\n    \n    \n\nif __name__=='__main__':\n    exec('conpu_net = '+str(args.network_name)+'(args).cuda()')\n    if False:\n        print('#parameters:', sum(param.numel() for param in conpu_net.parameters())*4/(1024*1024),' Mb')\n        exit()\n    if args.last_sample_id==0:\n        if os.path.exists('./'+args.out_baseline):\n            os.system('rm -rf ./'+args.out_baseline)\n        os.makedirs('./'+args.out_baseline)\n        if len(args.pretrained)>=1: conpu_net.load_state_dict(torch.load(args.pretrained),True)\n        torch.save(conpu_net.state_dict(), './'+args.out_baseline+'/sample_0.pt')\n        # os.system('cp ./out_baseline_5/sample_600000.pt ./'+args.out_baseline+'/sample_0.pt')\n        \n    if args.if_only_test==True: conpu_net.load_state_dict(torch.load(args.pretrained),True)\n    else: conpu_net.load_state_dict(torch.load('./'+args.out_baseline+'/sample_'+str(args.last_sample_id)+'.pt'),True)\n    \n    # setup optimizer\n    optimizer = optim.AdamW(conpu_net.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay, eps=args.epsilon)\n    scheduler = optim.lr_scheduler.OneCycleLR(optimizer, max_lr = args.learning_rate, total_steps = (args.train_max_samples - args.last_sample_id)//args.batchsize, pct_start=0.03, cycle_momentum=False, anneal_strategy='linear')\n    scaler = GradScaler(enabled=args.mixed_precision)\n    \n    # setup loss object\n    loss_obj = Loss(args)\n    \n    # run train and test\n    run_train_val(conpu_net, optimizer, loss_obj,  args)\n    print('Done.')\n"
  },
  {
    "path": "utils/config.py",
    "content": "import argparse\nimport os\nfrom configparser import SafeConfigParser\n\ndef parse_args():\n    # argparse argument\n    parser = argparse.ArgumentParser()\n    parser.add_argument('--phase', default='train', help='train or test')\n    parser.add_argument('--wq_test', type=int, default=0,help='if test by wq method')\n    \n    parser.add_argument('--device_id',help='Specify the index of the cuda device, e.g. 0, 1 ,2',default=0, type=int)\n    parser.add_argument('--num_point', type=int, default=256,help='Point Number')\n    parser.add_argument('--gt_num_point', type=int, default=4096,help='Point Number of GT points')\n    parser.add_argument('--training_up_ratio', type=int, default=4,help='The Upsampling Ratio during training') \n    parser.add_argument('--testing_up_ratio', type=int, default=4, help='The Upsampling Ratio during testing')  \n    parser.add_argument('--over_sampling_scale', type=float, default=1.5, help='The scale for over-sampling')\n    parser.add_argument('--limited_testing_model_num', type=int, default=-1, help='The max allowed num of tested model')\n    parser.add_argument('--emb_dims', type=int, default=8192, metavar='N',help='Dimension of embeddings')\n    parser.add_argument('--testing_anchor_num', type=int, default=114, metavar='N',help='The number of patches on the testing models')\n    parser.add_argument('--pe_out_L', type=int, default=5, metavar='N',help='The parameter L in the position code')\n    parser.add_argument('--feature_unfolding_nei_num', type=int, default=4, metavar='N',help='The number of neighbour points used while feature unfolding')\n    parser.add_argument('--repulsion_nei_num', type=int, default=5, metavar='N',help='The number of neighbour points used in repulsion loss')\n\n    # for phase train\n    parser.add_argument('--batchsize', type=int, default=8, help='Batch Size during training')\n    parser.add_argument('--max_epoch', type=int, default=400, help='Epoch to run')\n    parser.add_argument('--learning_rate', type=float, default=0.005)\n    parser.add_argument('--reg_normal1', type=float, default=0.1)\n    parser.add_argument('--reg_normal2', type=float, default=0.1)\n    parser.add_argument('--jitter_sigma', type=float, default=0.01)\n    parser.add_argument('--jitter_max', type=float, default=0.03)\n    parser.add_argument('--if_bn', type=int, default=0, help='If using batch normalization')\n    parser.add_argument('--neighbor_k', type=int, default=5, help='The number of neighbour points used in DGCNN')\n    # parser.add_argument('--mlpchanels_uv_encoder_str', type=str, default='None', metavar='None',help='mlp layers of the uv position encoding (default: None)')\n    parser.add_argument('--mlp_fitting_str', type=str, default='None', metavar='None',help='mlp layers of the part surface fitting (default: None)')\n    parser.add_argument('--mlp_projecting_str', type=str, default='None', metavar='None',help='mlp layers of the part surface projecting (default: None)')\n    # parser.add_argument('--mlp_refining_str', type=str, default='None', metavar='None',help='mlp layers of the point-wise refining (default: None)')\n    # parser.add_argument('--if_refine_by_net', type=int, default=0, help='if to use the refining module in the network')\n    parser.add_argument('--glue_neighbor', type=int, default=4, help='The number of neighbour points used in glue process')\n    parser.add_argument('--proj_neighbor', type=int, default=4, help='The number of neighbour points used in projection process')\n\n    # control the training\n    parser.add_argument('--last_sample_id',help='the id in the last saved trained model',default=0, type=int)    \n    parser.add_argument('--train_max_samples',help='the max number of samples used in the training',default=500000, type=int)\n    parser.add_argument('--test_blank',help='how often the testing process is performed',default=100, type=int)\n    parser.add_argument('--visualization_while_testing', default=1, type=int, metavar='visual', help='1 if visualize; 0 if not')\n\n    # the trained results\n    parser.add_argument('--pack_path', type=str, default='None', metavar='None',help='the path of packed_data (default: None)')\n    parser.add_argument('--out_baseline',help='the file of the baseline training results',default='./output_baseline', type=str)  \n\n    #for phase test\n    parser.add_argument('--pretrained', default='', help='Model stored')\n    parser.add_argument('--eval_xyz', default='test_5000', help='Folder to evaluate')\n    parser.add_argument('--num_shape_point', type=int, default=5000,help='Point Number per shape')\n    parser.add_argument('--patch_num_ratio', type=int, default=3,help='Number of points covered by patch')\n\n    #loss terms weights\n    parser.add_argument('--weight_cd', type=float, default=-1)\n    parser.add_argument('--weight_refined_cd', type=float, default=-1)\n    parser.add_argument('--weight_repulsion', type=float, default=-1)\n    parser.add_argument('--weight_pre', type=float, default=-1)\n    parser.add_argument('--weight_center', type=float, default=-1)\n    parser.add_argument('--weight_exclude', type=float, default=-1)\n    parser.add_argument('--weight_uniform', type=float, default=-1)\n    parser.add_argument('--weight_reg', type=float, default=-1)\n    parser.add_argument('--weight_arap', type=float, default=-1)\n    parser.add_argument('--weight_overlap', type=float, default=-1)\n    parser.add_argument('--weight_proj', type=float, default=-1)\n    parser.add_argument('--weight_normal', type=float, default=-1)\n    parser.add_argument('--weight_cycle', type=float, default=-1)\n    parser.add_argument('--weight_ndirection', type=float, default=-1)\n\n\n    parser.add_argument('--weight_decay',default=0.00005, type=float)\n    parser.add_argument('--epsilon', type=float, default=1e-8)\n    parser.add_argument('--num_steps', type=int, default=100000)\n    parser.add_argument('--mixed_precision', action='store_true', help='use mixed precision')\n    parser.add_argument('--clip', type=float, default=1.0)\n\n    # control the using mode\n    parser.add_argument('--over_fitting_id', type=int, default=0, help='The id that you want to overfit')\n    parser.add_argument('--if_over_fitting_this_time', type=int, default=0, help='whether you want to overfit, default is False')\n    parser.add_argument('--if_only_test', type=int, default=0, help='whether you only want to test, default is False')\n    parser.add_argument('--if_only_test_max_num', type=int, default=3, help='the max number of models that you want to test on')\n    parser.add_argument('--network_name', type=str, default='Net_conpu_v1', help='the name of the network that you would like to use')\n    parser.add_argument('--if_fix_sample', type=int, default=0, help='whether to use fix sampling')\n    parser.add_argument('--if_use_siren', type=int, default=0, help='whether to use siren activation function')\n\n\n    \n    \n    '''\n    #basic settings\n    \n                                     \n    # arguments for training process\n    \n\n    parser.add_argument('--patch_num', default=10, type=int,\n                        metavar='pn', help='number of patches')\n    parser.add_argument('--point_num', default=8192, type=int,\n                        metavar='pn', help='number of patches')\n    parser.add_argument('--dim_k', default=1024, type=int,\n                        metavar='K', help='dim. of the feature vector (default: 1024)')\n    parser.add_argument('--symfn', default='max', choices=['max', 'avg'],\n                        help='symmetric function (default: max)')\n    parser.add_argument('--delta', default=1.0e-2, type=float,\n                        metavar='D', help='step size for approx. Jacobian (default: 1.0e-2)')\n    parser.add_argument('--learn_delta', dest='learn_delta', action='store_true',\n                        help='flag for training step size delta')\n    parser.add_argument('--neighbour_num', default=4, type=int,\n                        metavar='nn', help='neighbour_num of weight smoothing term')\n    \n    \n    parser.add_argument('--cycle', type=bool, default=False, metavar='N',\n                        help='Whether to use cycle consistency')\n    parser.add_argument('--n_blocks', type=int, default=1, metavar='N',\n                        help='Num of blocks of encoder&decoder')\n    parser.add_argument('--n_heads', type=int, default=1, metavar='N',\n                        help='Num of heads in multiheadedattention')\n    parser.add_argument('--ff_dims', type=int, default=1024, metavar='N',\n                        help='Num of dimensions of fc in transformer')\n    parser.add_argument('--dropout', type=float, default=0.0, metavar='N',\n                        help='Dropout ratio in transformer')\n                        \n    # PointNet settings\n    parser.add_argument('--radius', type=float, default=0.3, help='Neighborhood radius for computing pointnet features')\n    parser.add_argument('--num_neighbors', type=int, default=64, metavar='N', help='Max num of neighbors to use')\n    # RPMNet settings\n    parser.add_argument('--features', type=str, choices=['ppf', 'dxyz', 'xyz'], default=['ppf', 'dxyz', 'xyz'],\n                        nargs='+', help='Which features to use. Default: all')\n    parser.add_argument('--feat_dim', type=int, default=96,\n                        help='Feature dimension (to compute distances on). Other numbers will be scaled accordingly')\n    parser.add_argument('--no_slack', action='store_true', help='If set, will not have a slack column.')\n    parser.add_argument('--num_sk_iter', type=int, default=5,\n                        help='Number of inner iterations used in sinkhorn normalization')\n    parser.add_argument('--num_reg_iter', type=int, default=5,\n                        help='Number of outer iterations used for registration (only during inference)')\n    parser.add_argument('--loss_type', type=str, choices=['mse', 'mae'], default='mae',\n                        help='Loss to be optimized')\n    parser.add_argument('--wt_inliers', type=float, default=1e-2, help='Weight to encourage inliers')\n                        \n    parser.add_argument('--lambda_data', type=float, default=1.0, help='weight of depth loss')\n    parser.add_argument('--lambda_reg', type=float, default=0.1, help='weight of regularization loss')\n        \n    parser.add_argument('--num_adja', type=int, default=8, help='number of nodes who affect a point')\n    parser.add_argument('--max_num_edges', type=int, default=3000, help='number of edges')\n    parser.add_argument('--max_num_nodes', type=int, default=400, help='number of nodes')\n    parser.add_argument('--max_num_points', type=int, default=4096, help='number of points')\n    '''             \n    args = parser.parse_args()\n\n    \n\n    return args\n"
  }
]