[
  {
    "path": ".gitignore",
    "content": "**/__pycache__/**\n*.ipynb\n**/.ipynb_checkpoints/**\n*.npy\n*.npz\n**/.vscode/**\n**/grasp_label*/**\n**/log*/**\n**/dump*/**\n**/build/**\n*.o\n*.so\n*.egg\n**/*.egg-info/**\nlogs\ndataset/tolerance\n**/.idea/"
  },
  {
    "path": "LICENSE",
    "content": "GRASPNET-BASELINE\nSOFTWARE LICENSE AGREEMENT\nACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY\n\nBY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT.  IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.\n\nThis is a license agreement (\"Agreement\") between your academic institution or non-profit organization or self (called \"Licensee\" or \"You\" in this Agreement) and Shanghai Jiao Tong University (called \"Licensor\" in this Agreement).  All rights not specifically granted to you in this Agreement are reserved for Licensor. \n\nRESERVATION OF OWNERSHIP AND GRANT OF LICENSE: \nLicensor retains exclusive ownership of any copy of the Software (as defined below) licensed under this Agreement and hereby grants to Licensee a personal, non-exclusive, \nnon-transferable license to use the Software for noncommercial research purposes, without the right to sublicense, pursuant to the terms and conditions of this Agreement.  As used in this Agreement, the term \"Software\" means (i) the actual copy of all or any portion of code for program routines made accessible to Licensee by Licensor pursuant to this Agreement, inclusive of backups, updates, and/or merged copies permitted hereunder or subsequently supplied by Licensor,  including all or any file structures, programming instructions, user interfaces and screen formats and sequences as well as any and all documentation and instructions related to it, and (ii) all or any derivatives and/or modifications created or made by You to any of the items specified in (i).\n\nCONFIDENTIALITY: Licensee acknowledges that the Software is proprietary to Licensor, and as such, Licensee agrees to receive all such materials in confidence and use the Software only in accordance with the terms of this Agreement.  Licensee agrees to use reasonable effort to protect the Software from unauthorized use, reproduction, distribution, or publication.\n\nPERMITTED USES:  The Software may be used for your own noncommercial internal research purposes. You understand and agree that Licensor is not obligated to implement any suggestions and/or feedback you might provide regarding the Software, but to the extent Licensor does so, you are not entitled to any compensation related thereto.\n\nDERIVATIVES: You may create derivatives of or make modifications to the Software, however, You agree that all and any such derivatives and modifications will be owned by Licensor and become a part of the Software licensed to You under this Agreement.  You may only use such derivatives and modifications for your own noncommercial internal research purposes, and you may not otherwise use, distribute or copy such derivatives and modifications in violation of this Agreement.\n\nBACKUPS:  If Licensee is an organization, it may make that number of copies of the Software necessary for internal noncommercial use at a single site within its organization provided that all information appearing in or on the original labels, including the copyright and trademark notices are copied onto the labels of the copies.\n\nUSES NOT PERMITTED:  You may not distribute, copy or use the Software except as explicitly permitted herein. Licensee has not been granted any trademark license as part of this Agreement and may not use the name or mark “AlphaPose\", \"Shanghai Jiao Tong\" or any renditions thereof without the prior written permission of Licensor.\n\nYou may not sell, rent, lease, sublicense, lend, time-share or transfer, in whole or in part, or provide third parties access to prior or present versions (or any parts thereof) of the Software.\n\nASSIGNMENT: You may not assign this Agreement or your rights hereunder without the prior written consent of Licensor. Any attempted assignment without such consent shall be null and void.\n\nTERM: The term of the license granted by this Agreement is from Licensee's acceptance of this Agreement by downloading the Software or by using the Software until terminated as provided below.\n\nThe Agreement automatically terminates without notice if you fail to comply with any provision of this Agreement.  Licensee may terminate this Agreement by ceasing using the Software.  Upon any termination of this Agreement, Licensee will delete any and all copies of the Software. You agree that all provisions which operate to protect the proprietary rights of Licensor shall remain in force should breach occur and that the obligation of confidentiality described in this Agreement is binding in perpetuity and, as such, survives the term of the Agreement.\n\nFEE: Provided Licensee abides completely by the terms and conditions of this Agreement, there is no fee due to Licensor for Licensee's use of the Software in accordance with this Agreement.\n\nDISCLAIMER OF WARRANTIES:  THE SOFTWARE IS PROVIDED \"AS-IS\" WITHOUT WARRANTY OF ANY KIND INCLUDING ANY WARRANTIES OF PERFORMANCE OR MERCHANTABILITY OR FITNESS FOR A PARTICULAR USE OR PURPOSE OR OF NON-INFRINGEMENT.  LICENSEE BEARS ALL RISK RELATING TO QUALITY AND PERFORMANCE OF THE SOFTWARE AND RELATED MATERIALS.\n\nSUPPORT AND MAINTENANCE: No Software support or training by the Licensor is provided as part of this Agreement.  \n\nEXCLUSIVE REMEDY AND LIMITATION OF LIABILITY: To the maximum extent permitted under applicable law, Licensor shall not be liable for direct, indirect, special, incidental, or consequential damages or lost profits related to Licensee's use of and/or inability to use the Software, even if Licensor is advised of the possibility of such damage.\n\nEXPORT REGULATION: Licensee agrees to comply with any and all applicable \nU.S. export control laws, regulations, and/or other laws related to embargoes and sanction programs administered by the Office of Foreign Assets Control.\n\nSEVERABILITY: If any provision(s) of this Agreement shall be held to be invalid, illegal, or unenforceable by a court or other tribunal of competent jurisdiction, the validity, legality and enforceability of the remaining provisions shall not in any way be affected or impaired thereby.\n\nNO IMPLIED WAIVERS: No failure or delay by Licensor in enforcing any right or remedy under this Agreement shall be construed as a waiver of any future or other exercise of such right or remedy by Licensor.\n\nENTIRE AGREEMENT AND AMENDMENTS: This Agreement constitutes the sole and entire agreement between Licensee and Licensor as to the matter set forth herein and supersedes any previous agreements, understandings, and arrangements between the parties relating hereto.\n\n\n\n************************************************************************\n\nTHIRD-PARTY SOFTWARE NOTICES AND INFORMATION\n\nThis project incorporates material from the project(s) listed below (collectively, \"Third Party Code\").  This Third Party Code is licensed to you under their original license terms set forth below.  We reserves all other rights not expressly granted, whether by implication, estoppel or otherwise.\n\n1. PyTorch (https://github.com/pytorch/pytorch)\n\nTHIRD-PARTY SOFTWARE NOTICES AND INFORMATION\n\nThis project incorporates material from the project(s) listed below (collectively, \"Third Party Code\").  This Third Party Code is licensed to you under their original license terms set forth below.  We reserves all other rights not expressly granted, whether by implication, estoppel or otherwise.\n\nFrom PyTorch:\n\nCopyright (c) 2016-     Facebook, Inc            (Adam Paszke)\nCopyright (c) 2014-     Facebook, Inc            (Soumith Chintala)\nCopyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)\nCopyright (c) 2012-2014 Deepmind Technologies    (Koray Kavukcuoglu)\nCopyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu)\nCopyright (c) 2011-2013 NYU                      (Clement Farabet)\nCopyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston)\nCopyright (c) 2006      Idiap Research Institute (Samy Bengio)\nCopyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz)\n\nFrom Caffe2:\n\nCopyright (c) 2016-present, Facebook Inc. All rights reserved.\n\nAll contributions by Facebook:\nCopyright (c) 2016 Facebook Inc.\n\nAll contributions by Google:\nCopyright (c) 2015 Google Inc.\nAll rights reserved.\n\nAll contributions by Yangqing Jia:\nCopyright (c) 2015 Yangqing Jia\nAll rights reserved.\n\nAll contributions by Kakao Brain:\nCopyright 2019-2020 Kakao Brain\n\nAll contributions from Caffe:\nCopyright(c) 2013, 2014, 2015, the respective contributors\nAll rights reserved.\n\nAll other contributions:\nCopyright(c) 2015, 2016 the respective contributors\nAll rights reserved.\n\nCaffe2 uses a copyright model similar to Caffe: each contributor holds\ncopyright over their contributions to Caffe2. The project versioning records\nall such contribution and copyright details. If a contributor wants to further\nmark their specific copyright on a particular contribution, they should\nindicate their copyright solely in the commit message of the change when it is\ncommitted.\n\nAll rights reserved.\n\nRedistribution and use in source and binary forms, with or without\nmodification, are permitted provided that the following conditions are met:\n\n1. Redistributions of source code must retain the above copyright\n   notice, this list of conditions and the following disclaimer.\n\n2. Redistributions in binary form must reproduce the above copyright\n   notice, this list of conditions and the following disclaimer in the\n   documentation and/or other materials provided with the distribution.\n\n3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America\n   and IDIAP Research Institute nor the names of its contributors may be\n   used to endorse or promote products derived from this software without\n   specific prior written permission.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\nAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\nIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\nARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE\nLIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\nSUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\nINTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\nCONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\nARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\nPOSSIBILITY OF SUCH DAMAGE.\n\n2. VoteNet (https://github.com/facebookresearch/votenet)\n\nMIT License\n\nCopyright (c) Facebook, Inc. and its affiliates.\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n\n************END OF THIRD-PARTY SOFTWARE NOTICES AND INFORMATION**********\n"
  },
  {
    "path": "README.md",
    "content": "# GraspNet graspness\nMy implementation of paper \"Graspness Discovery in Clutters for Fast and Accurate Grasp Detection\" (ICCV 2021).\n\n[[paper](https://openaccess.thecvf.com/content/ICCV2021/papers/Wang_Graspness_Discovery_in_Clutters_for_Fast_and_Accurate_Grasp_Detection_ICCV_2021_paper.pdf)]\n[[dataset](https://graspnet.net/)]\n[[API](https://github.com/graspnet/graspnetAPI)]\n\n\n## Requirements\n- Python 3\n- PyTorch 1.8\n- Open3d 0.8\n- TensorBoard 2.3\n- NumPy\n- SciPy\n- Pillow\n- tqdm\n- MinkowskiEngine\n\n## Installation\nGet the code.\n```bash\ngit clone https://github.com/rhett-chen/graspness_implementation.git\ncd graspnet-graspness\n```\nInstall packages via Pip.\n```bash\npip install -r requirements.txt\n```\nCompile and install pointnet2 operators (code adapted from [votenet](https://github.com/facebookresearch/votenet)).\n```bash\ncd pointnet2\npython setup.py install\n```\nCompile and install knn operator (code adapted from [pytorch_knn_cuda](https://github.com/chrischoy/pytorch_knn_cuda)).\n```bash\ncd knn\npython setup.py install\n```\nInstall graspnetAPI for evaluation.\n```bash\ngit clone https://github.com/graspnet/graspnetAPI.git\ncd graspnetAPI\npip install .\n```\nFor MinkowskiEngine, please refer https://github.com/NVIDIA/MinkowskiEngine\n## Point level Graspness Generation\nPoint level graspness label are not included in the original dataset, and need additional generation. Make sure you have downloaded the orginal dataset from [GraspNet](https://graspnet.net/). The generation code is in [dataset/generate_graspness.py](dataset/generate_graspness.py).\n```bash\ncd dataset\npython generate_graspness.py --dataset_root /data3/graspnet --camera_type kinect\n```\n\n## Simplify dataset\noriginal dataset grasp_label files have redundant data,  We can significantly save the memory cost. The code is in [dataset/simplify_dataset.py](dataset/simplify_dataset.py)\n```bash\ncd dataset\npython simplify_dataset.py --dataset_root /data3/graspnet\n```\n\n## Training and Testing\nTraining examples are shown in [command_train.sh](command_train.sh). `--dataset_root`, `--camera` and `--log_dir` should be specified according to your settings. You can use TensorBoard to visualize training process.\n\nTesting examples are shown in [command_test.sh](command_test.sh), which contains inference and result evaluation. `--dataset_root`, `--camera`, `--checkpoint_path` and `--dump_dir` should be specified according to your settings. Set `--collision_thresh` to -1 for fast inference.\n\n## Results\nResults \"In repo\" report the model performance of my results without collision detection.\n\nEvaluation results on Kinect camera:\n|          |        | Seen             |                  |        | Similar          |                  |        | Novel            |                  | \n|:--------:|:------:|:----------------:|:----------------:|:------:|:----------------:|:----------------:|:------:|:----------------:|:----------------:|\n|          | __AP__ | AP<sub>0.8</sub> | AP<sub>0.4</sub> | __AP__ | AP<sub>0.8</sub> | AP<sub>0.4</sub> | __AP__ | AP<sub>0.8</sub> | AP<sub>0.4</sub> |\n| In paper | 61.19  | 71.46            | 56.04            | 47.39  | 56.78            | 40.43            | 19.01  | 23.73            | 10.60             |\n| In repo  | 61.83  | 73.28            | 54.14            | 51.13  | 62.53            | 41.57            | 19.94  | 24.90            | 11.02             |\n\n\n## Troubleshooting\nIf you meet the torch.floor error in MinkowskiEngine, you can simply solve it by changing the source code of MinkowskiEngine: \nMinkowskiEngine/utils/quantization.py 262，from discrete_coordinates =_auto_floor(coordinates) to discrete_coordinates = coordinates\n## Acknowledgement\nMy code is mainly based on Graspnet-baseline  https://github.com/graspnet/graspnet-baseline.\n"
  },
  {
    "path": "command_test.sh",
    "content": "CUDA_VISIBLE_DEVICES=4 python test.py --camera kinect --dump_dir logs/log_kn/dump_epoch10 --checkpoint_path logs/log_kn/minkresunet_epoch10.tar --batch_size 1 --dataset_root /data3/graspnet --infer --eval --collision_thresh -1"
  },
  {
    "path": "command_train.sh",
    "content": "CUDA_VISIBLE_DEVICES=4 python train.py --camera kinect --log_dir logs/log_kn --batch_size 4 --learning_rate 0.001 --model_name minkuresunet --dataset_root /data3/graspnet"
  },
  {
    "path": "dataset/generate_graspness.py",
    "content": "import numpy as np\nimport os\nfrom PIL import Image\nimport scipy.io as scio\nimport sys\nROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))\nsys.path.append(ROOT_DIR)\nfrom utils.data_utils import get_workspace_mask, CameraInfo, create_point_cloud_from_depth_image\nfrom knn.knn_modules import knn\nimport torch\nfrom graspnetAPI.utils.xmlhandler import xmlReader\nfrom graspnetAPI.utils.utils import get_obj_pose_list, transform_points\nimport argparse\n\n\nparser = argparse.ArgumentParser()\nparser.add_argument('--dataset_root', default=None, required=True)\nparser.add_argument('--camera_type', default='kinect', help='Camera split [realsense/kinect]')\n\n\nif __name__ == '__main__':\n    cfgs = parser.parse_args()\n    dataset_root = cfgs.dataset_root   # set dataset root\n    camera_type = cfgs.camera_type   # kinect / realsense\n    save_path_root = os.path.join(dataset_root, 'graspness')\n\n    num_views, num_angles, num_depths = 300, 12, 4\n    fric_coef_thresh = 0.8\n    point_grasp_num = num_views * num_angles * num_depths\n    for scene_id in range(100):\n        save_path = os.path.join(save_path_root, 'scene_' + str(scene_id).zfill(4), camera_type)\n        if not os.path.exists(save_path):\n            os.makedirs(save_path)\n        labels = np.load(\n            os.path.join(dataset_root, 'collision_label', 'scene_' + str(scene_id).zfill(4), 'collision_labels.npz'))\n        collision_dump = []\n        for j in range(len(labels)):\n            collision_dump.append(labels['arr_{}'.format(j)])\n\n        for ann_id in range(256):\n            # get scene point cloud\n            print('generating scene: {} ann: {}'.format(scene_id, ann_id))\n            depth = np.array(Image.open(os.path.join(dataset_root, 'scenes', 'scene_' + str(scene_id).zfill(4),\n                                                     camera_type, 'depth', str(ann_id).zfill(4) + '.png')))\n            seg = np.array(Image.open(os.path.join(dataset_root, 'scenes', 'scene_' + str(scene_id).zfill(4),\n                                                   camera_type, 'label', str(ann_id).zfill(4) + '.png')))\n            meta = scio.loadmat(os.path.join(dataset_root, 'scenes', 'scene_' + str(scene_id).zfill(4),\n                                             camera_type, 'meta', str(ann_id).zfill(4) + '.mat'))\n            intrinsic = meta['intrinsic_matrix']\n            factor_depth = meta['factor_depth']\n            camera = CameraInfo(1280.0, 720.0, intrinsic[0][0], intrinsic[1][1], intrinsic[0][2], intrinsic[1][2],\n                                factor_depth)\n            cloud = create_point_cloud_from_depth_image(depth, camera, organized=True)\n\n            # remove outlier and get objectness label\n            depth_mask = (depth > 0)\n            camera_poses = np.load(os.path.join(dataset_root, 'scenes', 'scene_' + str(scene_id).zfill(4),\n                                                camera_type, 'camera_poses.npy'))\n            camera_pose = camera_poses[ann_id]\n            align_mat = np.load(os.path.join(dataset_root, 'scenes', 'scene_' + str(scene_id).zfill(4),\n                                             camera_type, 'cam0_wrt_table.npy'))\n            trans = np.dot(align_mat, camera_pose)\n            workspace_mask = get_workspace_mask(cloud, seg, trans=trans, organized=True, outlier=0.02)\n            mask = (depth_mask & workspace_mask)\n            cloud_masked = cloud[mask]\n            objectness_label = seg[mask]\n\n            # get scene object and grasp info\n            scene_reader = xmlReader(os.path.join(dataset_root, 'scenes', 'scene_' + str(scene_id).zfill(4),\n                                                  camera_type, 'annotations', '%04d.xml' % ann_id))\n            pose_vectors = scene_reader.getposevectorlist()\n            obj_list, pose_list = get_obj_pose_list(camera_pose, pose_vectors)\n            grasp_labels = {}\n            for i in obj_list:\n                file = np.load(os.path.join(dataset_root, 'grasp_label', '{}_labels.npz'.format(str(i).zfill(3))))\n                grasp_labels[i] = (file['points'].astype(np.float32), file['offsets'].astype(np.float32),\n                                   file['scores'].astype(np.float32))\n\n            grasp_points = []\n            grasp_points_graspness = []\n            for i, (obj_idx, trans_) in enumerate(zip(obj_list, pose_list)):\n                sampled_points, offsets, fric_coefs = grasp_labels[obj_idx]\n                collision = collision_dump[i]  # Npoints * num_views * num_angles * num_depths\n                num_points = sampled_points.shape[0]\n\n                valid_grasp_mask = ((fric_coefs <= fric_coef_thresh) & (fric_coefs > 0) & ~collision)\n                valid_grasp_mask = valid_grasp_mask.reshape(num_points, -1)\n                graspness = np.sum(valid_grasp_mask, axis=1) / point_grasp_num\n                target_points = transform_points(sampled_points, trans_)\n                target_points = transform_points(target_points, np.linalg.inv(camera_pose))  # fix bug\n                grasp_points.append(target_points)\n                grasp_points_graspness.append(graspness.reshape(num_points, 1))\n            grasp_points = np.vstack(grasp_points)\n            grasp_points_graspness = np.vstack(grasp_points_graspness)\n\n            grasp_points = torch.from_numpy(grasp_points).cuda()\n            grasp_points_graspness = torch.from_numpy(grasp_points_graspness).cuda()\n            grasp_points = grasp_points.transpose(0, 1).contiguous().unsqueeze(0)\n\n            masked_points_num = cloud_masked.shape[0]\n            cloud_masked_graspness = np.zeros((masked_points_num, 1))\n            part_num = int(masked_points_num / 10000)\n            for i in range(1, part_num + 2):   # lack of cuda memory\n                if i == part_num + 1:\n                    cloud_masked_partial = cloud_masked[10000 * part_num:]\n                    if len(cloud_masked_partial) == 0:\n                        break\n                else:\n                    cloud_masked_partial = cloud_masked[10000 * (i - 1):(i * 10000)]\n                cloud_masked_partial = torch.from_numpy(cloud_masked_partial).cuda()\n                cloud_masked_partial = cloud_masked_partial.transpose(0, 1).contiguous().unsqueeze(0)\n                nn_inds = knn(grasp_points, cloud_masked_partial, k=1).squeeze() - 1\n                cloud_masked_graspness[10000 * (i - 1):(i * 10000)] = torch.index_select(\n                    grasp_points_graspness, 0, nn_inds).cpu().numpy()\n\n            max_graspness = np.max(cloud_masked_graspness)\n            min_graspness = np.min(cloud_masked_graspness)\n            cloud_masked_graspness = (cloud_masked_graspness - min_graspness) / (max_graspness - min_graspness)\n\n            np.save(os.path.join(save_path, str(ann_id).zfill(4) + '.npy'), cloud_masked_graspness)\n"
  },
  {
    "path": "dataset/graspnet_dataset.py",
    "content": "\"\"\" GraspNet dataset processing.\n    Author: chenxi-wang\n\"\"\"\n\nimport os\nimport numpy as np\nimport scipy.io as scio\nfrom PIL import Image\n\nimport torch\nimport collections.abc as container_abcs\nfrom torch.utils.data import Dataset\nfrom tqdm import tqdm\nimport MinkowskiEngine as ME\nfrom data_utils import CameraInfo, transform_point_cloud, create_point_cloud_from_depth_image, get_workspace_mask\n\n\nclass GraspNetDataset(Dataset):\n    def __init__(self, root, grasp_labels=None, camera='kinect', split='train', num_points=20000,\n                 voxel_size=0.005, remove_outlier=True, augment=False, load_label=True):\n        assert (num_points <= 50000)\n        self.root = root\n        self.split = split\n        self.voxel_size = voxel_size\n        self.num_points = num_points\n        self.remove_outlier = remove_outlier\n        self.grasp_labels = grasp_labels\n        self.camera = camera\n        self.augment = augment\n        self.load_label = load_label\n        self.collision_labels = {}\n\n        if split == 'train':\n            self.sceneIds = list(range(100))\n        elif split == 'test':\n            self.sceneIds = list(range(100, 190))\n        elif split == 'test_seen':\n            self.sceneIds = list(range(100, 130))\n        elif split == 'test_similar':\n            self.sceneIds = list(range(130, 160))\n        elif split == 'test_novel':\n            self.sceneIds = list(range(160, 190))\n        self.sceneIds = ['scene_{}'.format(str(x).zfill(4)) for x in self.sceneIds]\n\n        self.depthpath = []\n        self.labelpath = []\n        self.metapath = []\n        self.scenename = []\n        self.frameid = []\n        self.graspnesspath = []\n        for x in tqdm(self.sceneIds, desc='Loading data path and collision labels...'):\n            for img_num in range(256):\n                self.depthpath.append(os.path.join(root, 'scenes', x, camera, 'depth', str(img_num).zfill(4) + '.png'))\n                self.labelpath.append(os.path.join(root, 'scenes', x, camera, 'label', str(img_num).zfill(4) + '.png'))\n                self.metapath.append(os.path.join(root, 'scenes', x, camera, 'meta', str(img_num).zfill(4) + '.mat'))\n                self.graspnesspath.append(os.path.join(root, 'graspness', x, camera, str(img_num).zfill(4) + '.npy'))\n                self.scenename.append(x.strip())\n                self.frameid.append(img_num)\n            if self.load_label:\n                collision_labels = np.load(os.path.join(root, 'collision_label', x.strip(), 'collision_labels.npz'))\n                self.collision_labels[x.strip()] = {}\n                for i in range(len(collision_labels)):\n                    self.collision_labels[x.strip()][i] = collision_labels['arr_{}'.format(i)]\n\n    def scene_list(self):\n        return self.scenename\n\n    def __len__(self):\n        return len(self.depthpath)\n\n    def augment_data(self, point_clouds, object_poses_list):\n        # Flipping along the YZ plane\n        if np.random.random() > 0.5:\n            flip_mat = np.array([[-1, 0, 0],\n                                 [0, 1, 0],\n                                 [0, 0, 1]])\n            point_clouds = transform_point_cloud(point_clouds, flip_mat, '3x3')\n            for i in range(len(object_poses_list)):\n                object_poses_list[i] = np.dot(flip_mat, object_poses_list[i]).astype(np.float32)\n\n        # Rotation along up-axis/Z-axis\n        rot_angle = (np.random.random() * np.pi / 3) - np.pi / 6  # -30 ~ +30 degree\n        c, s = np.cos(rot_angle), np.sin(rot_angle)\n        rot_mat = np.array([[1, 0, 0],\n                            [0, c, -s],\n                            [0, s, c]])\n        point_clouds = transform_point_cloud(point_clouds, rot_mat, '3x3')\n        for i in range(len(object_poses_list)):\n            object_poses_list[i] = np.dot(rot_mat, object_poses_list[i]).astype(np.float32)\n\n        return point_clouds, object_poses_list\n\n    def __getitem__(self, index):\n        if self.load_label:\n            return self.get_data_label(index)\n        else:\n            return self.get_data(index)\n\n    def get_data(self, index, return_raw_cloud=False):\n        depth = np.array(Image.open(self.depthpath[index]))\n        seg = np.array(Image.open(self.labelpath[index]))\n        meta = scio.loadmat(self.metapath[index])\n        scene = self.scenename[index]\n        try:\n            intrinsic = meta['intrinsic_matrix']\n            factor_depth = meta['factor_depth']\n        except Exception as e:\n            print(repr(e))\n            print(scene)\n        camera = CameraInfo(1280.0, 720.0, intrinsic[0][0], intrinsic[1][1], intrinsic[0][2], intrinsic[1][2],\n                            factor_depth)\n\n        # generate cloud\n        cloud = create_point_cloud_from_depth_image(depth, camera, organized=True)\n\n        # get valid points\n        depth_mask = (depth > 0)\n        if self.remove_outlier:\n            camera_poses = np.load(os.path.join(self.root, 'scenes', scene, self.camera, 'camera_poses.npy'))\n            align_mat = np.load(os.path.join(self.root, 'scenes', scene, self.camera, 'cam0_wrt_table.npy'))\n            trans = np.dot(align_mat, camera_poses[self.frameid[index]])\n            workspace_mask = get_workspace_mask(cloud, seg, trans=trans, organized=True, outlier=0.02)\n            mask = (depth_mask & workspace_mask)\n        else:\n            mask = depth_mask\n        cloud_masked = cloud[mask]\n\n        if return_raw_cloud:\n            return cloud_masked\n        # sample points random\n        if len(cloud_masked) >= self.num_points:\n            idxs = np.random.choice(len(cloud_masked), self.num_points, replace=False)\n        else:\n            idxs1 = np.arange(len(cloud_masked))\n            idxs2 = np.random.choice(len(cloud_masked), self.num_points - len(cloud_masked), replace=True)\n            idxs = np.concatenate([idxs1, idxs2], axis=0)\n        cloud_sampled = cloud_masked[idxs]\n\n        ret_dict = {'point_clouds': cloud_sampled.astype(np.float32),\n                    'coors': cloud_sampled.astype(np.float32) / self.voxel_size,\n                    'feats': np.ones_like(cloud_sampled).astype(np.float32),\n                    }\n        return ret_dict\n\n    def get_data_label(self, index):\n        depth = np.array(Image.open(self.depthpath[index]))\n        seg = np.array(Image.open(self.labelpath[index]))\n        meta = scio.loadmat(self.metapath[index])\n        graspness = np.load(self.graspnesspath[index])  # for each point in workspace masked point cloud\n        scene = self.scenename[index]\n        try:\n            obj_idxs = meta['cls_indexes'].flatten().astype(np.int32)\n            poses = meta['poses']\n            intrinsic = meta['intrinsic_matrix']\n            factor_depth = meta['factor_depth']\n        except Exception as e:\n            print(repr(e))\n            print(scene)\n        camera = CameraInfo(1280.0, 720.0, intrinsic[0][0], intrinsic[1][1], intrinsic[0][2], intrinsic[1][2],\n                            factor_depth)\n\n        # generate cloud\n        cloud = create_point_cloud_from_depth_image(depth, camera, organized=True)\n\n        # get valid points\n        depth_mask = (depth > 0)\n        if self.remove_outlier:\n            camera_poses = np.load(os.path.join(self.root, 'scenes', scene, self.camera, 'camera_poses.npy'))\n            align_mat = np.load(os.path.join(self.root, 'scenes', scene, self.camera, 'cam0_wrt_table.npy'))\n            trans = np.dot(align_mat, camera_poses[self.frameid[index]])\n            workspace_mask = get_workspace_mask(cloud, seg, trans=trans, organized=True, outlier=0.02)\n            mask = (depth_mask & workspace_mask)\n        else:\n            mask = depth_mask\n        cloud_masked = cloud[mask]\n        seg_masked = seg[mask]\n\n        # sample points\n        if len(cloud_masked) >= self.num_points:\n            idxs = np.random.choice(len(cloud_masked), self.num_points, replace=False)\n        else:\n            idxs1 = np.arange(len(cloud_masked))\n            idxs2 = np.random.choice(len(cloud_masked), self.num_points - len(cloud_masked), replace=True)\n            idxs = np.concatenate([idxs1, idxs2], axis=0)\n        cloud_sampled = cloud_masked[idxs]\n        seg_sampled = seg_masked[idxs]\n        graspness_sampled = graspness[idxs]\n        objectness_label = seg_sampled.copy()\n\n        objectness_label[objectness_label > 1] = 1\n\n        object_poses_list = []\n        grasp_points_list = []\n        grasp_widths_list = []\n        grasp_scores_list = []\n        for i, obj_idx in enumerate(obj_idxs):\n            if (seg_sampled == obj_idx).sum() < 50:\n                continue\n            object_poses_list.append(poses[:, :, i])\n            points, widths, scores = self.grasp_labels[obj_idx]\n            collision = self.collision_labels[scene][i]  # (Np, V, A, D)\n\n            idxs = np.random.choice(len(points), min(max(int(len(points) / 4), 300), len(points)), replace=False)\n            grasp_points_list.append(points[idxs])\n            grasp_widths_list.append(widths[idxs])\n            collision = collision[idxs].copy()\n            scores = scores[idxs].copy()\n            scores[collision] = 0\n            grasp_scores_list.append(scores)\n\n        if self.augment:\n            cloud_sampled, object_poses_list = self.augment_data(cloud_sampled, object_poses_list)\n\n        ret_dict = {'point_clouds': cloud_sampled.astype(np.float32),\n                    'coors': cloud_sampled.astype(np.float32) / self.voxel_size,\n                    'feats': np.ones_like(cloud_sampled).astype(np.float32),\n                    'graspness_label': graspness_sampled.astype(np.float32),\n                    'objectness_label': objectness_label.astype(np.int64),\n                    'object_poses_list': object_poses_list,\n                    'grasp_points_list': grasp_points_list,\n                    'grasp_widths_list': grasp_widths_list,\n                    'grasp_scores_list': grasp_scores_list}\n        return ret_dict\n\n\ndef load_grasp_labels(root):\n    obj_names = list(range(1, 89))\n    grasp_labels = {}\n    for obj_name in tqdm(obj_names, desc='Loading grasping labels...'):\n        label = np.load(os.path.join(root, 'grasp_label_simplified', '{}_labels.npz'.format(str(obj_name - 1).zfill(3))))\n        grasp_labels[obj_name] = (label['points'].astype(np.float32), label['width'].astype(np.float32),\n                                  label['scores'].astype(np.float32))\n\n    return grasp_labels\n\n\ndef minkowski_collate_fn(list_data):\n    coordinates_batch, features_batch = ME.utils.sparse_collate([d[\"coors\"] for d in list_data],\n                                                                [d[\"feats\"] for d in list_data])\n    coordinates_batch, features_batch, _, quantize2original = ME.utils.sparse_quantize(\n        coordinates_batch, features_batch, return_index=True, return_inverse=True)\n    res = {\n        \"coors\": coordinates_batch,\n        \"feats\": features_batch,\n        \"quantize2original\": quantize2original\n    }\n\n    def collate_fn_(batch):\n        if type(batch[0]).__module__ == 'numpy':\n            return torch.stack([torch.from_numpy(b) for b in batch], 0)\n        elif isinstance(batch[0], container_abcs.Sequence):\n            return [[torch.from_numpy(sample) for sample in b] for b in batch]\n        elif isinstance(batch[0], container_abcs.Mapping):\n            for key in batch[0]:\n                if key == 'coors' or key == 'feats':\n                    continue\n                res[key] = collate_fn_([d[key] for d in batch])\n            return res\n    res = collate_fn_(list_data)\n\n    return res\n"
  },
  {
    "path": "dataset/simplify_dataset.py",
    "content": "import numpy as np\nimport os\nimport argparse\n\n\nparser = argparse.ArgumentParser()\nparser.add_argument('--dataset_root', default=None, required=True)\n\n\ndef simplify_grasp_labels(root, save_path):\n    \"\"\"\n    original dataset grasp_label files have redundant data,  We can significantly save the memory cost\n    \"\"\"\n    obj_names = list(range(88))\n    if not os.path.exists(save_path):\n        os.makedirs(save_path)\n    for i in obj_names:\n        print('\\nsimplifying object {}:'.format(i))\n        label = np.load(os.path.join(root, 'grasp_label', '{}_labels.npz'.format(str(i).zfill(3))))\n        # point_num = len(label['points'])\n        print('original shape:               ', label['points'].shape, label['offsets'].shape, label['scores'].shape)\n        # if point_num > 4820:\n        #     idxs = np.random.choice(point_num, 4820, False)\n        #     points = label['points'][idxs]\n        #     offsets = label['offsets'][idxs]\n        #     scores = label['scores'][idxs]\n        #     print('Warning!!!  down sample object {}'.format(i))\n        # else:\n        points = label['points']\n        scores = label['scores']\n        offsets = label['offsets']\n        width = offsets[:, :, :, :, 2]\n        print('after simplify, offset shape: ', points.shape, scores.shape, width.shape)\n        np.savez(os.path.join(save_path, '{}_labels.npz'.format(str(i).zfill(3))),\n                 points=points, scores=scores, width=width)\n\n\nif __name__ == '__main__':\n    cfgs = parser.parse_args()\n    root = cfgs.dataset_root  # set root and save path\n    save_path = os.path.join(root, 'grasp_label_simplified')\n    simplify_grasp_labels(root, save_path)\n\n"
  },
  {
    "path": "dataset/vis_graspness.py",
    "content": "import open3d as o3d\nimport scipy.io as scio\nfrom PIL import Image\nimport os\nimport numpy as np\nimport sys\nROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))\nsys.path.append(ROOT_DIR)\nfrom utils.data_utils import get_workspace_mask, CameraInfo, create_point_cloud_from_depth_image\n\ndata_path = '/media/bot/980A6F5E0A6F38801/datasets/graspnet/'\nscene_id = 'scene_0060'\nann_id = '0000'\ncamera_type = 'realsense'\ncolor = np.array(Image.open(os.path.join(data_path, 'scenes', scene_id, camera_type, 'rgb', ann_id + '.png')), dtype=np.float32) / 255.0\ndepth = np.array(Image.open(os.path.join(data_path, 'scenes', scene_id, camera_type, 'depth', ann_id + '.png')))\nseg = np.array(Image.open(os.path.join(data_path, 'scenes', scene_id, camera_type, 'label', ann_id + '.png')))\nmeta = scio.loadmat(os.path.join(data_path, 'scenes', scene_id, camera_type, 'meta', ann_id + '.mat'))\nintrinsic = meta['intrinsic_matrix']\nfactor_depth = meta['factor_depth']\ncamera = CameraInfo(1280.0, 720.0, intrinsic[0][0], intrinsic[1][1], intrinsic[0][2], intrinsic[1][2], factor_depth)\npoint_cloud = create_point_cloud_from_depth_image(depth, camera, organized=True)\ndepth_mask = (depth > 0)\ncamera_poses = np.load(os.path.join(data_path, 'scenes', scene_id, camera_type, 'camera_poses.npy'))\nalign_mat = np.load(os.path.join(data_path, 'scenes', scene_id, camera_type, 'cam0_wrt_table.npy'))\ntrans = np.dot(align_mat, camera_poses[int(ann_id)])\nworkspace_mask = get_workspace_mask(point_cloud, seg, trans=trans, organized=True, outlier=0.02)\nmask = (depth_mask & workspace_mask)\npoint_cloud = point_cloud[mask]\ncolor = color[mask]\nseg = seg[mask]\n\ngraspness_full = np.load(os.path.join(data_path, 'graspness', scene_id, camera_type, ann_id + '.npy')).squeeze()\ngraspness_full[seg == 0] = 0.\nprint('graspness full scene: ', graspness_full.shape, (graspness_full > 0.1).sum())\ncolor[graspness_full > 0.1] = [0., 1., 0.]\n\n\ncloud = o3d.geometry.PointCloud()\ncloud.points = o3d.utility.Vector3dVector(point_cloud.astype(np.float32))\ncloud.colors = o3d.utility.Vector3dVector(color.astype(np.float32))\no3d.visualization.draw_geometries([cloud])\n"
  },
  {
    "path": "infer_vis_grasp.py",
    "content": "import os\nimport sys\nimport numpy as np\nimport argparse\nfrom PIL import Image\nimport time\nimport scipy.io as scio\nimport torch\nimport open3d as o3d\nfrom graspnetAPI.graspnet_eval import GraspGroup\n\nROOT_DIR = os.path.dirname(os.path.abspath(__file__))\nsys.path.append(ROOT_DIR)\nsys.path.append(os.path.join(ROOT_DIR, 'utils'))\nfrom models.graspnet import GraspNet, pred_decode\nfrom dataset.graspnet_dataset import minkowski_collate_fn\nfrom collision_detector import ModelFreeCollisionDetector\nfrom data_utils import CameraInfo, create_point_cloud_from_depth_image, get_workspace_mask\n\nparser = argparse.ArgumentParser()\nparser.add_argument('--dataset_root', default='/data/datasets/graspnet')\nparser.add_argument('--checkpoint_path', default='/data/zibo/logs/graspness_kn.tar')\nparser.add_argument('--dump_dir', help='Dump dir to save outputs', default='/data/zibo/logs/')\nparser.add_argument('--seed_feat_dim', default=512, type=int, help='Point wise feature dim')\nparser.add_argument('--camera', default='kinect', help='Camera split [realsense/kinect]')\nparser.add_argument('--num_point', type=int, default=15000, help='Point Number [default: 15000]')\nparser.add_argument('--batch_size', type=int, default=1, help='Batch Size during inference [default: 1]')\nparser.add_argument('--voxel_size', type=float, default=0.005, help='Voxel Size for sparse convolution')\nparser.add_argument('--collision_thresh', type=float, default=-1,\n                    help='Collision Threshold in collision detection [default: 0.01]')\nparser.add_argument('--voxel_size_cd', type=float, default=0.01, help='Voxel Size for collision detection')\nparser.add_argument('--infer', action='store_true', default=False)\nparser.add_argument('--vis', action='store_true', default=False)\nparser.add_argument('--scene', type=str, default='0188')\nparser.add_argument('--index', type=str, default='0000')\ncfgs = parser.parse_args()\n\n# ------------------------------------------------------------------------- GLOBAL CONFIG BEG\nif not os.path.exists(cfgs.dump_dir):\n    os.mkdir(cfgs.dump_dir)\n\n\ndef data_process():\n    root = cfgs.dataset_root\n    camera_type = cfgs.camera\n\n    depth = np.array(Image.open(os.path.join(root, 'scenes', scene_id, camera_type, 'depth', index + '.png')))\n    seg = np.array(Image.open(os.path.join(root, 'scenes', scene_id, camera_type, 'label', index + '.png')))\n    meta = scio.loadmat(os.path.join(root, 'scenes', scene_id, camera_type, 'meta', index + '.mat'))\n    try:\n        intrinsic = meta['intrinsic_matrix']\n        factor_depth = meta['factor_depth']\n    except Exception as e:\n        print(repr(e))\n    camera = CameraInfo(1280.0, 720.0, intrinsic[0][0], intrinsic[1][1], intrinsic[0][2], intrinsic[1][2],\n                        factor_depth)\n    # generate cloud\n    cloud = create_point_cloud_from_depth_image(depth, camera, organized=True)\n\n    # get valid points\n    depth_mask = (depth > 0)\n    camera_poses = np.load(os.path.join(root, 'scenes', scene_id, camera_type, 'camera_poses.npy'))\n    align_mat = np.load(os.path.join(root, 'scenes', scene_id, camera_type, 'cam0_wrt_table.npy'))\n    trans = np.dot(align_mat, camera_poses[int(index)])\n    workspace_mask = get_workspace_mask(cloud, seg, trans=trans, organized=True, outlier=0.02)\n    mask = (depth_mask & workspace_mask)\n\n    cloud_masked = cloud[mask]\n\n    # sample points random\n    if len(cloud_masked) >= cfgs.num_point:\n        idxs = np.random.choice(len(cloud_masked), cfgs.num_point, replace=False)\n    else:\n        idxs1 = np.arange(len(cloud_masked))\n        idxs2 = np.random.choice(len(cloud_masked), cfgs.num_point - len(cloud_masked), replace=True)\n        idxs = np.concatenate([idxs1, idxs2], axis=0)\n    cloud_sampled = cloud_masked[idxs]\n\n    ret_dict = {'point_clouds': cloud_sampled.astype(np.float32),\n                'coors': cloud_sampled.astype(np.float32) / cfgs.voxel_size,\n                'feats': np.ones_like(cloud_sampled).astype(np.float32),\n                }\n    return ret_dict\n\n\n# Init datasets and dataloaders\ndef my_worker_init_fn(worker_id):\n    np.random.seed(np.random.get_state()[1][0] + worker_id)\n    pass\n\n\ndef inference(data_input):\n    batch_data = minkowski_collate_fn([data_input])\n    net = GraspNet(seed_feat_dim=cfgs.seed_feat_dim, is_training=False)\n    device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n    net.to(device)\n    # Load checkpoint\n    checkpoint = torch.load(cfgs.checkpoint_path)\n    net.load_state_dict(checkpoint['model_state_dict'])\n    start_epoch = checkpoint['epoch']\n    print(\"-> loaded checkpoint %s (epoch: %d)\" % (cfgs.checkpoint_path, start_epoch))\n\n    net.eval()\n    tic = time.time()\n\n    for key in batch_data:\n        if 'list' in key:\n            for i in range(len(batch_data[key])):\n                for j in range(len(batch_data[key][i])):\n                    batch_data[key][i][j] = batch_data[key][i][j].to(device)\n        else:\n            batch_data[key] = batch_data[key].to(device)\n    # Forward pass\n    with torch.no_grad():\n        end_points = net(batch_data)\n        grasp_preds = pred_decode(end_points)\n\n    preds = grasp_preds[0].detach().cpu().numpy()\n    gg = GraspGroup(preds)\n    # collision detection\n    if cfgs.collision_thresh > 0:\n        cloud = data_input['point_clouds']\n        mfcdetector = ModelFreeCollisionDetector(cloud, voxel_size=cfgs.voxel_size_cd)\n        collision_mask = mfcdetector.detect(gg, approach_dist=0.05, collision_thresh=cfgs.collision_thresh)\n        gg = gg[~collision_mask]\n\n    # save grasps\n    save_dir = os.path.join(cfgs.dump_dir, scene_id, cfgs.camera)\n    save_path = os.path.join(save_dir, cfgs.index + '.npy')\n    if not os.path.exists(save_dir):\n        os.makedirs(save_dir)\n    gg.save_npy(save_path)\n\n    toc = time.time()\n    print('inference time: %fs' % (toc - tic))\n\n\nif __name__ == '__main__':\n    scene_id = 'scene_' + cfgs.scene\n    index = cfgs.index\n    data_dict = data_process()\n\n    if cfgs.infer:\n        inference(data_dict)\n    if cfgs.vis:\n        pc = data_dict['point_clouds']\n        gg = np.load(os.path.join(cfgs.dump_dir, scene_id, cfgs.camera, cfgs.index + '.npy'))\n        gg = GraspGroup(gg)\n        gg = gg.nms()\n        gg = gg.sort_by_score()\n        if gg.__len__() > 30:\n            gg = gg[:30]\n        grippers = gg.to_open3d_geometry_list()\n        cloud = o3d.geometry.PointCloud()\n        cloud.points = o3d.utility.Vector3dVector(pc.astype(np.float32))\n        o3d.visualization.draw_geometries([cloud, *grippers])\n"
  },
  {
    "path": "knn/knn_modules.py",
    "content": "import unittest\nimport gc\nimport operator as op\nimport functools\nimport torch\nfrom torch.autograd import Variable, Function\nfrom knn_pytorch import knn_pytorch\n# import knn_pytorch\ndef knn(ref, query, k=1):\n  \"\"\" Compute k nearest neighbors for each query point.\n  \"\"\"\n  device = ref.device\n  ref = ref.float().to(device)\n  query = query.float().to(device)\n  inds = torch.empty(query.shape[0], k, query.shape[2]).long().to(device)\n  knn_pytorch.knn(ref, query, inds)\n  return inds\n"
  },
  {
    "path": "knn/setup.py",
    "content": "#!/usr/bin/env python\n\nimport glob\nimport os\n\nimport torch\nfrom setuptools import find_packages\nfrom setuptools import setup\nfrom torch.utils.cpp_extension import CUDA_HOME\nfrom torch.utils.cpp_extension import CppExtension\nfrom torch.utils.cpp_extension import CUDAExtension\n\nrequirements = [\"torch\", \"torchvision\"]\n\n\ndef get_extensions():\n    this_dir = os.path.dirname(os.path.abspath(__file__))\n    extensions_dir = os.path.join(this_dir, \"src\")\n\n    main_file = glob.glob(os.path.join(extensions_dir, \"*.cpp\"))\n    source_cpu = glob.glob(os.path.join(extensions_dir, \"cpu\", \"*.cpp\"))\n    source_cuda = glob.glob(os.path.join(extensions_dir, \"cuda\", \"*.cu\"))\n\n    sources = main_file + source_cpu\n    extension = CppExtension\n\n    extra_compile_args = {\"cxx\": []}\n    define_macros = []\n\n    if torch.cuda.is_available() and CUDA_HOME is not None:\n        extension = CUDAExtension\n        sources += source_cuda\n        define_macros += [(\"WITH_CUDA\", None)]\n        extra_compile_args[\"nvcc\"] = [\n            \"-DCUDA_HAS_FP16=1\",\n            \"-D__CUDA_NO_HALF_OPERATORS__\",\n            \"-D__CUDA_NO_HALF_CONVERSIONS__\",\n            \"-D__CUDA_NO_HALF2_OPERATORS__\",\n        ]\n\n    sources = [os.path.join(extensions_dir, s) for s in sources]\n\n    include_dirs = [extensions_dir]\n\n    ext_modules = [\n        extension(\n            \"knn_pytorch.knn_pytorch\",\n            sources,\n            include_dirs=include_dirs,\n            define_macros=define_macros,\n            extra_compile_args=extra_compile_args,\n        )\n    ]\n\n    return ext_modules\n\n\nsetup(\n    name=\"knn_pytorch\",\n    version=\"0.1\",\n    author=\"foolyc\",\n    url=\"https://github.com/foolyc/torchKNN\",\n    description=\"KNN implement in Pytorch 1.0 including both cpu version and gpu version\",\n    ext_modules=get_extensions(),\n    cmdclass={\"build_ext\": torch.utils.cpp_extension.BuildExtension},\n)\n"
  },
  {
    "path": "knn/src/cpu/knn_cpu.cpp",
    "content": "#include \"cpu/vision.h\"\n\n\nvoid knn_cpu(float* ref_dev, int ref_width, float* query_dev, int query_width,\n    int height, int k, float* dist_dev, long* ind_dev, long* ind_buf)\n{\n    // Compute all the distances\n    for(int query_idx = 0;query_idx<query_width;query_idx++)\n    {\n        for(int ref_idx = 0;ref_idx < ref_width;ref_idx++)\n        {\n            dist_dev[query_idx * ref_width + ref_idx] = 0;\n            for(int hi=0;hi<height;hi++)\n                dist_dev[query_idx * ref_width + ref_idx] += (ref_dev[hi * ref_width + ref_idx] - query_dev[hi * query_width + query_idx]) * (ref_dev[hi * ref_width + ref_idx] - query_dev[hi * query_width + query_idx]);\n        }\n    }\n\n    float temp_value;\n    long temp_idx;\n    // sort the distance and get the index\n    for(int query_idx = 0;query_idx<query_width;query_idx++)\n    {\n        for(int i = 0;i < ref_width;i++)\n        {\n            ind_buf[i] = i+1;\n        }\n        for(int i = 0;i < ref_width;i++)\n            for(int j = 0;j < ref_width - i - 1;j++)\n            {\n                if(dist_dev[query_idx * ref_width + j] > dist_dev[query_idx * ref_width + j + 1])\n                {\n                    temp_value = dist_dev[query_idx * ref_width + j];\n                    dist_dev[query_idx * ref_width + j] = dist_dev[query_idx * ref_width + j + 1];\n                    dist_dev[query_idx * ref_width + j + 1] = temp_value;\n                    temp_idx = ind_buf[j];\n                    ind_buf[j] = ind_buf[j + 1];\n                    ind_buf[j + 1] = temp_idx;\n                }\n\n            }\n\n        for(int i = 0;i < k;i++)\n            ind_dev[query_idx + i * query_width] = ind_buf[i];\n        #if DEBUG\n        for(int i = 0;i < ref_width;i++)\n            printf(\"%d, \", ind_buf[i]);\n        printf(\"\\n\");\n        #endif\n\n    }\n\n\n\n\n\n}"
  },
  {
    "path": "knn/src/cpu/vision.h",
    "content": "#pragma once\n#include <torch/extension.h>\n\nvoid knn_cpu(float* ref_dev, int ref_width,\n    float* query_dev, int query_width,\n    int height, int k, float* dist_dev, long* ind_dev, long* ind_buf);"
  },
  {
    "path": "knn/src/cuda/knn.cu",
    "content": "/** Modifed version of knn-CUDA from https://github.com/vincentfpgarcia/kNN-CUDA\n * The modifications are\n *      removed texture memory usage\n *      removed split query KNN computation\n *      added feature extraction with bilinear interpolation\n *\n * Last modified by Christopher B. Choy <chrischoy@ai.stanford.edu> 12/23/2016\n */\n\n// Includes\n#include <cstdio>\n#include \"cuda.h\"\n\n#define IDX2D(i, j, dj) (dj * i + j)\n#define IDX3D(i, j, k, dj, dk) (IDX2D(IDX2D(i, j, dj), k, dk))\n\n#define BLOCK 512\n#define MAX_STREAMS 512\n\n// Constants used by the program\n#define BLOCK_DIM                      16\n#define DEBUG                          0\n\n\n/**\n  * Computes the distance between two matrix A (reference points) and\n  * B (query points) containing respectively wA and wB points.\n  *\n  * @param A     pointer on the matrix A\n  * @param wA    width of the matrix A = number of points in A\n  * @param B     pointer on the matrix B\n  * @param wB    width of the matrix B = number of points in B\n  * @param dim   dimension of points = height of matrices A and B\n  * @param AB    pointer on the matrix containing the wA*wB distances computed\n  */\n__global__ void cuComputeDistanceGlobal( float* A, int wA,\n    float* B, int wB, int dim, float* AB){\n\n// Declaration of the shared memory arrays As and Bs used to store the sub-matrix of A and B\n__shared__ float shared_A[BLOCK_DIM][BLOCK_DIM];\n__shared__ float shared_B[BLOCK_DIM][BLOCK_DIM];\n\n\n  // Sub-matrix of A (begin, step, end) and Sub-matrix of B (begin, step)\n  __shared__ int begin_A;\n  __shared__ int begin_B;\n  __shared__ int step_A;\n  __shared__ int step_B;\n  __shared__ int end_A;\n\n  // Thread index\n  int tx = threadIdx.x;\n  int ty = threadIdx.y;\n\n  // Other variables\n  float tmp;\n  float ssd = 0;\n\n  // Loop parameters\n  begin_A = BLOCK_DIM * blockIdx.y;\n  begin_B = BLOCK_DIM * blockIdx.x;\n  step_A  = BLOCK_DIM * wA;\n  step_B  = BLOCK_DIM * wB;\n  end_A   = begin_A + (dim-1) * wA;\n\n    // Conditions\n  int cond0 = (begin_A + tx < wA); // used to write in shared memory\n  int cond1 = (begin_B + tx < wB); // used to write in shared memory & to computations and to write in output matrix\n  int cond2 = (begin_A + ty < wA); // used to computations and to write in output matrix\n\n  // Loop over all the sub-matrices of A and B required to compute the block sub-matrix\n  for (int a = begin_A, b = begin_B; a <= end_A; a += step_A, b += step_B) {\n    // Load the matrices from device memory to shared memory; each thread loads one element of each matrix\n    if (a/wA + ty < dim){\n      shared_A[ty][tx] = (cond0)? A[a + wA * ty + tx] : 0;\n      shared_B[ty][tx] = (cond1)? B[b + wB * ty + tx] : 0;\n    }\n    else{\n      shared_A[ty][tx] = 0;\n      shared_B[ty][tx] = 0;\n    }\n\n    // Synchronize to make sure the matrices are loaded\n    __syncthreads();\n\n    // Compute the difference between the two matrixes; each thread computes one element of the block sub-matrix\n    if (cond2 && cond1){\n      for (int k = 0; k < BLOCK_DIM; ++k){\n        tmp = shared_A[k][ty] - shared_B[k][tx];\n        ssd += tmp*tmp;\n      }\n    }\n\n    // Synchronize to make sure that the preceding computation is done before loading two new sub-matrices of A and B in the next iteration\n    __syncthreads();\n  }\n\n  // Write the block sub-matrix to device memory; each thread writes one element\n  if (cond2 && cond1)\n    AB[(begin_A + ty) * wB + begin_B + tx] = ssd;\n}\n\n\n/**\n  * Gathers k-th smallest distances for each column of the distance matrix in the top.\n  *\n  * @param dist        distance matrix\n  * @param ind         index matrix\n  * @param width       width of the distance matrix and of the index matrix\n  * @param height      height of the distance matrix and of the index matrix\n  * @param k           number of neighbors to consider\n  */\n__global__ void cuInsertionSort(float *dist, long *ind, int width, int height, int k){\n\n  // Variables\n  int l, i, j;\n  float *p_dist;\n  long  *p_ind;\n  float curr_dist, max_dist;\n  long  curr_row,  max_row;\n  unsigned int xIndex = blockIdx.x * blockDim.x + threadIdx.x;\n\n  if (xIndex<width){\n    // Pointer shift, initialization, and max value\n    p_dist   = dist + xIndex;\n    p_ind    = ind  + xIndex;\n    max_dist = p_dist[0];\n    p_ind[0] = 1;\n\n    // Part 1 : sort kth firt elementZ\n    for (l=1; l<k; l++){\n      curr_row  = l * width;\n      curr_dist = p_dist[curr_row];\n      if (curr_dist<max_dist){\n        i=l-1;\n        for (int a=0; a<l-1; a++){\n          if (p_dist[a*width]>curr_dist){\n            i=a;\n            break;\n          }\n        }\n        for (j=l; j>i; j--){\n          p_dist[j*width] = p_dist[(j-1)*width];\n          p_ind[j*width]   = p_ind[(j-1)*width];\n        }\n        p_dist[i*width] = curr_dist;\n        p_ind[i*width]   = l+1;\n      } else {\n        p_ind[l*width] = l+1;\n      }\n      max_dist = p_dist[curr_row];\n    }\n\n    // Part 2 : insert element in the k-th first lines\n    max_row = (k-1)*width;\n    for (l=k; l<height; l++){\n      curr_dist = p_dist[l*width];\n      if (curr_dist<max_dist){\n        i=k-1;\n        for (int a=0; a<k-1; a++){\n          if (p_dist[a*width]>curr_dist){\n            i=a;\n            break;\n          }\n        }\n        for (j=k-1; j>i; j--){\n          p_dist[j*width] = p_dist[(j-1)*width];\n          p_ind[j*width]   = p_ind[(j-1)*width];\n        }\n        p_dist[i*width] = curr_dist;\n        p_ind[i*width]   = l+1;\n        max_dist             = p_dist[max_row];\n      }\n    }\n  }\n}\n\n\n/**\n  * Computes the square root of the first line (width-th first element)\n  * of the distance matrix.\n  *\n  * @param dist    distance matrix\n  * @param width   width of the distance matrix\n  * @param k       number of neighbors to consider\n  */\n__global__ void cuParallelSqrt(float *dist, int width, int k){\n    unsigned int xIndex = blockIdx.x * blockDim.x + threadIdx.x;\n    unsigned int yIndex = blockIdx.y * blockDim.y + threadIdx.y;\n  if (xIndex<width && yIndex<k)\n    dist[yIndex*width + xIndex] = sqrt(dist[yIndex*width + xIndex]);\n}\n\n\n//-----------------------------------------------------------------------------------------------//\n//                                   K-th NEAREST NEIGHBORS                                      //\n//-----------------------------------------------------------------------------------------------//\n\n/**\n  * K nearest neighbor algorithm\n  * - Initialize CUDA\n  * - Allocate device memory\n  * - Copy point sets (reference and query points) from host to device memory\n  * - Compute the distances + indexes to the k nearest neighbors for each query point\n  * - Copy distances from device to host memory\n  *\n  * @param ref_host      reference points ; pointer to linear matrix\n  * @param ref_nb        number of reference points ; width of the matrix\n  * @param query_host    query points ; pointer to linear matrix\n  * @param query_nb      number of query points ; width of the matrix\n  * @param dim           dimension of points ; height of the matrices\n  * @param k             number of neighbor to consider\n  * @param dist_host     distances to k nearest neighbors ; pointer to linear matrix\n  * @param dist_host     indexes of the k nearest neighbors ; pointer to linear matrix\n  *\n  */\nvoid knn_device(float* ref_dev, int ref_nb, float* query_dev, int query_nb,\n    int dim, int k, float* dist_dev, long* ind_dev, cudaStream_t stream){\n\n  // Grids and threads\n  dim3 g_16x16(query_nb/16, ref_nb/16, 1);\n  dim3 t_16x16(16, 16, 1);\n  if (query_nb%16 != 0) g_16x16.x += 1;\n  if (ref_nb  %16 != 0) g_16x16.y += 1;\n  //\n  dim3 g_256x1(query_nb/256, 1, 1);\n  dim3 t_256x1(256, 1, 1);\n  if (query_nb%256 != 0) g_256x1.x += 1;\n\n  dim3 g_k_16x16(query_nb/16, k/16, 1);\n  dim3 t_k_16x16(16, 16, 1);\n  if (query_nb%16 != 0) g_k_16x16.x += 1;\n  if (k  %16 != 0) g_k_16x16.y += 1;\n\n  // Kernel 1: Compute all the distances\n  cuComputeDistanceGlobal<<<g_16x16, t_16x16, 0, stream>>>(ref_dev, ref_nb, query_dev, query_nb, dim, dist_dev);\n\n  // Kernel 2: Sort each column\n  cuInsertionSort<<<g_256x1, t_256x1, 0, stream>>>(dist_dev, ind_dev, query_nb, ref_nb, k);\n\n  // Kernel 3: Compute square root of k first elements\n  // cuParallelSqrt<<<g_k_16x16,t_k_16x16, 0, stream>>>(dist_dev, query_nb, k);\n\n#if DEBUG\n  unsigned int  size_of_float = sizeof(float);\n  unsigned long size_of_long  = sizeof(long);\n\n  float* dist_host = new float[query_nb * k];\n  long*  idx_host  = new long[query_nb * k];\n\n  // Memory copy of output from device to host\n  cudaMemcpy(&dist_host[0], dist_dev,\n      query_nb * k *size_of_float, cudaMemcpyDeviceToHost);\n\n  cudaMemcpy(&idx_host[0], ind_dev,\n      query_nb * k * size_of_long, cudaMemcpyDeviceToHost);\n\n  int i = 0;\n  for(i = 0; i < 100; i++){\n    printf(\"IDX[%d]: %d\\n\", i, (int)idx_host[i]);\n  }\n#endif\n}\n\n\n\n\n\n\n"
  },
  {
    "path": "knn/src/cuda/vision.h",
    "content": "#pragma once\n#include <torch/extension.h>\n#include <THC/THC.h>\n\nvoid knn_device(float* ref_dev, int ref_width,\n    float* query_dev, int query_width,\n    int height, int k, float* dist_dev, long* ind_dev, cudaStream_t stream);"
  },
  {
    "path": "knn/src/knn.h",
    "content": "#pragma once\n#include \"cpu/vision.h\"\n\n#ifdef WITH_CUDA\n#include \"cuda/vision.h\"\n#include <THC/THC.h>\nextern THCState *state;\n#endif\n\n\n\nint knn(at::Tensor& ref, at::Tensor& query, at::Tensor& idx)\n{\n\n    // TODO check dimensions\n    long batch, ref_nb, query_nb, dim, k;\n    batch = ref.size(0);\n    dim = ref.size(1);\n    k = idx.size(1);\n    ref_nb = ref.size(2);\n    query_nb = query.size(2);\n\n    float *ref_dev = ref.data<float>();\n    float *query_dev = query.data<float>();\n    long *idx_dev = idx.data<long>();\n\n\n\n\n  if (ref.type().is_cuda()) {\n#ifdef WITH_CUDA\n    // TODO raise error if not compiled with CUDA\n    float *dist_dev = (float*)THCudaMalloc(state, ref_nb * query_nb * sizeof(float));\n\n    for (int b = 0; b < batch; b++)\n    {\n    // knn_device(ref_dev + b * dim * ref_nb, ref_nb, query_dev + b * dim * query_nb, query_nb, dim, k,\n    //   dist_dev, idx_dev + b * k * query_nb, THCState_getCurrentStream(state));\n      knn_device(ref_dev + b * dim * ref_nb, ref_nb, query_dev + b * dim * query_nb, query_nb, dim, k,\n      dist_dev, idx_dev + b * k * query_nb, c10::cuda::getCurrentCUDAStream());\n    }\n    THCudaFree(state, dist_dev);\n    cudaError_t err = cudaGetLastError();\n    if (err != cudaSuccess)\n    {\n        printf(\"error in knn: %s\\n\", cudaGetErrorString(err));\n        THError(\"aborting\");\n    }\n    return 1;\n#else\n    AT_ERROR(\"Not compiled with GPU support\");\n#endif\n  }\n\n\n    float *dist_dev = (float*)malloc(ref_nb * query_nb * sizeof(float));\n    long *ind_buf = (long*)malloc(ref_nb * sizeof(long));\n    for (int b = 0; b < batch; b++) {\n    knn_cpu(ref_dev + b * dim * ref_nb, ref_nb, query_dev + b * dim * query_nb, query_nb, dim, k,\n      dist_dev, idx_dev + b * k * query_nb, ind_buf);\n    }\n\n    free(dist_dev);\n    free(ind_buf);\n\n    return 1;\n\n}\n"
  },
  {
    "path": "knn/src/vision.cpp",
    "content": "#include \"knn.h\"\n\nPYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {\n  m.def(\"knn\", &knn, \"k-nearest neighbors\");\n}\n"
  },
  {
    "path": "models/backbone_resunet14.py",
    "content": "import MinkowskiEngine as ME\nfrom MinkowskiEngine.modules.resnet_block import BasicBlock, Bottleneck\nfrom models.resnet import ResNetBase\n\n\nclass MinkUNetBase(ResNetBase):\n    BLOCK = None\n    PLANES = None\n    DILATIONS = (1, 1, 1, 1, 1, 1, 1, 1)\n    LAYERS = (2, 2, 2, 2, 2, 2, 2, 2)\n    PLANES = (32, 64, 128, 256, 256, 128, 96, 96)\n    INIT_DIM = 32\n    OUT_TENSOR_STRIDE = 1\n\n    # To use the model, must call initialize_coords before forward pass.\n    # Once data is processed, call clear to reset the model before calling\n    # initialize_coords\n    def __init__(self, in_channels, out_channels, D=3):\n        ResNetBase.__init__(self, in_channels, out_channels, D)\n\n    def network_initialization(self, in_channels, out_channels, D):\n        # Output of the first conv concated to conv6\n        self.inplanes = self.INIT_DIM\n        self.conv0p1s1 = ME.MinkowskiConvolution(\n            in_channels, self.inplanes, kernel_size=5, dimension=D)\n\n        self.bn0 = ME.MinkowskiBatchNorm(self.inplanes)\n\n        self.conv1p1s2 = ME.MinkowskiConvolution(\n            self.inplanes, self.inplanes, kernel_size=2, stride=2, dimension=D)\n        self.bn1 = ME.MinkowskiBatchNorm(self.inplanes)\n\n        self.block1 = self._make_layer(self.BLOCK, self.PLANES[0],\n                                       self.LAYERS[0])\n\n        self.conv2p2s2 = ME.MinkowskiConvolution(\n            self.inplanes, self.inplanes, kernel_size=2, stride=2, dimension=D)\n        self.bn2 = ME.MinkowskiBatchNorm(self.inplanes)\n\n        self.block2 = self._make_layer(self.BLOCK, self.PLANES[1],\n                                       self.LAYERS[1])\n\n        self.conv3p4s2 = ME.MinkowskiConvolution(\n            self.inplanes, self.inplanes, kernel_size=2, stride=2, dimension=D)\n\n        self.bn3 = ME.MinkowskiBatchNorm(self.inplanes)\n        self.block3 = self._make_layer(self.BLOCK, self.PLANES[2],\n                                       self.LAYERS[2])\n\n        self.conv4p8s2 = ME.MinkowskiConvolution(\n            self.inplanes, self.inplanes, kernel_size=2, stride=2, dimension=D)\n        self.bn4 = ME.MinkowskiBatchNorm(self.inplanes)\n        self.block4 = self._make_layer(self.BLOCK, self.PLANES[3],\n                                       self.LAYERS[3])\n\n        self.convtr4p16s2 = ME.MinkowskiConvolutionTranspose(\n            self.inplanes, self.PLANES[4], kernel_size=2, stride=2, dimension=D)\n        self.bntr4 = ME.MinkowskiBatchNorm(self.PLANES[4])\n\n        self.inplanes = self.PLANES[4] + self.PLANES[2] * self.BLOCK.expansion\n        self.block5 = self._make_layer(self.BLOCK, self.PLANES[4],\n                                       self.LAYERS[4])\n        self.convtr5p8s2 = ME.MinkowskiConvolutionTranspose(\n            self.inplanes, self.PLANES[5], kernel_size=2, stride=2, dimension=D)\n        self.bntr5 = ME.MinkowskiBatchNorm(self.PLANES[5])\n\n        self.inplanes = self.PLANES[5] + self.PLANES[1] * self.BLOCK.expansion\n        self.block6 = self._make_layer(self.BLOCK, self.PLANES[5],\n                                       self.LAYERS[5])\n        self.convtr6p4s2 = ME.MinkowskiConvolutionTranspose(\n            self.inplanes, self.PLANES[6], kernel_size=2, stride=2, dimension=D)\n        self.bntr6 = ME.MinkowskiBatchNorm(self.PLANES[6])\n\n        self.inplanes = self.PLANES[6] + self.PLANES[0] * self.BLOCK.expansion\n        self.block7 = self._make_layer(self.BLOCK, self.PLANES[6],\n                                       self.LAYERS[6])\n        self.convtr7p2s2 = ME.MinkowskiConvolutionTranspose(\n            self.inplanes, self.PLANES[7], kernel_size=2, stride=2, dimension=D)\n        self.bntr7 = ME.MinkowskiBatchNorm(self.PLANES[7])\n\n        self.inplanes = self.PLANES[7] + self.INIT_DIM\n        self.block8 = self._make_layer(self.BLOCK, self.PLANES[7],\n                                       self.LAYERS[7])\n\n        self.final = ME.MinkowskiConvolution(\n            self.PLANES[7] * self.BLOCK.expansion,\n            out_channels,\n            kernel_size=1,\n            bias=True,\n            dimension=D)\n        self.relu = ME.MinkowskiReLU(inplace=True)\n\n    def forward(self, x):\n        out = self.conv0p1s1(x)\n        out = self.bn0(out)\n        out_p1 = self.relu(out)\n\n        out = self.conv1p1s2(out_p1)\n        out = self.bn1(out)\n        out = self.relu(out)\n        out_b1p2 = self.block1(out)\n\n        out = self.conv2p2s2(out_b1p2)\n        out = self.bn2(out)\n        out = self.relu(out)\n        out_b2p4 = self.block2(out)\n\n        out = self.conv3p4s2(out_b2p4)\n        out = self.bn3(out)\n        out = self.relu(out)\n        out_b3p8 = self.block3(out)\n\n        # tensor_stride=16\n        out = self.conv4p8s2(out_b3p8)\n        out = self.bn4(out)\n        out = self.relu(out)\n        out = self.block4(out)\n\n        # tensor_stride=8\n        out = self.convtr4p16s2(out)\n        out = self.bntr4(out)\n        out = self.relu(out)\n\n        out = ME.cat(out, out_b3p8)\n        out = self.block5(out)\n\n        # tensor_stride=4\n        out = self.convtr5p8s2(out)\n        out = self.bntr5(out)\n        out = self.relu(out)\n\n        out = ME.cat(out, out_b2p4)\n        out = self.block6(out)\n\n        # tensor_stride=2\n        out = self.convtr6p4s2(out)\n        out = self.bntr6(out)\n        out = self.relu(out)\n\n        out = ME.cat(out, out_b1p2)\n        out = self.block7(out)\n\n        # tensor_stride=1\n        out = self.convtr7p2s2(out)\n        out = self.bntr7(out)\n        out = self.relu(out)\n\n        out = ME.cat(out, out_p1)\n        out = self.block8(out)\n\n        return self.final(out)\n\n\nclass MinkUNet14(MinkUNetBase):\n    BLOCK = BasicBlock\n    LAYERS = (1, 1, 1, 1, 1, 1, 1, 1)\n\n\nclass MinkUNet18(MinkUNetBase):\n    BLOCK = BasicBlock\n    LAYERS = (2, 2, 2, 2, 2, 2, 2, 2)\n\n\nclass MinkUNet34(MinkUNetBase):\n    BLOCK = BasicBlock\n    LAYERS = (2, 3, 4, 6, 2, 2, 2, 2)\n\n\nclass MinkUNet50(MinkUNetBase):\n    BLOCK = Bottleneck\n    LAYERS = (2, 3, 4, 6, 2, 2, 2, 2)\n\n\nclass MinkUNet101(MinkUNetBase):\n    BLOCK = Bottleneck\n    LAYERS = (2, 3, 4, 23, 2, 2, 2, 2)\n\n\nclass MinkUNet14A(MinkUNet14):\n    PLANES = (32, 64, 128, 256, 128, 128, 96, 96)\n\n\nclass MinkUNet14B(MinkUNet14):\n    PLANES = (32, 64, 128, 256, 128, 128, 128, 128)\n\n\nclass MinkUNet14C(MinkUNet14):\n    PLANES = (32, 64, 128, 256, 192, 192, 128, 128)\n\n\nclass MinkUNet14Dori(MinkUNet14):\n    PLANES = (32, 64, 128, 256, 384, 384, 384, 384)\n\n\nclass MinkUNet14E(MinkUNet14):\n    PLANES = (32, 64, 128, 256, 384, 384, 384, 384)\n\n\nclass MinkUNet14D(MinkUNet14):\n    PLANES = (32, 64, 128, 256, 192, 192, 192, 192)\n\n\nclass MinkUNet18A(MinkUNet18):\n    PLANES = (32, 64, 128, 256, 128, 128, 96, 96)\n\n\nclass MinkUNet18B(MinkUNet18):\n    PLANES = (32, 64, 128, 256, 128, 128, 128, 128)\n\n\nclass MinkUNet18D(MinkUNet18):\n    PLANES = (32, 64, 128, 256, 384, 384, 384, 384)\n\n\nclass MinkUNet34A(MinkUNet34):\n    PLANES = (32, 64, 128, 256, 256, 128, 64, 64)\n\n\nclass MinkUNet34B(MinkUNet34):\n    PLANES = (32, 64, 128, 256, 256, 128, 64, 32)\n\n\nclass MinkUNet34C(MinkUNet34):\n    PLANES = (32, 64, 128, 256, 256, 128, 96, 96)\n"
  },
  {
    "path": "models/graspnet.py",
    "content": "\"\"\" GraspNet baseline model definition.\n    Author: chenxi-wang\n\"\"\"\n\nimport os\nimport sys\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport MinkowskiEngine as ME\n\nBASE_DIR = os.path.dirname(os.path.abspath(__file__))\nROOT_DIR = os.path.dirname(BASE_DIR)\nsys.path.append(ROOT_DIR)\n\nfrom models.backbone_resunet14 import MinkUNet14D\nfrom models.modules import ApproachNet, GraspableNet, CloudCrop, SWADNet\nfrom loss_utils import GRASP_MAX_WIDTH, NUM_VIEW, NUM_ANGLE, NUM_DEPTH, GRASPNESS_THRESHOLD, M_POINT\nfrom label_generation import process_grasp_labels, match_grasp_view_and_label, batch_viewpoint_params_to_matrix\nfrom pointnet2.pointnet2_utils import furthest_point_sample, gather_operation\n\n\nclass GraspNet(nn.Module):\n    def __init__(self, cylinder_radius=0.05, seed_feat_dim=512, is_training=True):\n        super().__init__()\n        self.is_training = is_training\n        self.seed_feature_dim = seed_feat_dim\n        self.num_depth = NUM_DEPTH\n        self.num_angle = NUM_ANGLE\n        self.M_points = M_POINT\n        self.num_view = NUM_VIEW\n\n        self.backbone = MinkUNet14D(in_channels=3, out_channels=self.seed_feature_dim, D=3)\n        self.graspable = GraspableNet(seed_feature_dim=self.seed_feature_dim)\n        self.rotation = ApproachNet(self.num_view, seed_feature_dim=self.seed_feature_dim, is_training=self.is_training)\n        self.crop = CloudCrop(nsample=16, cylinder_radius=cylinder_radius, seed_feature_dim=self.seed_feature_dim)\n        self.swad = SWADNet(num_angle=self.num_angle, num_depth=self.num_depth)\n\n    def forward(self, end_points):\n        seed_xyz = end_points['point_clouds']  # use all sampled point cloud, B*Ns*3\n        B, point_num, _ = seed_xyz.shape  # batch _size\n        # point-wise features\n        coordinates_batch = end_points['coors']\n        features_batch = end_points['feats']\n        mink_input = ME.SparseTensor(features_batch, coordinates=coordinates_batch)\n        seed_features = self.backbone(mink_input).F\n        seed_features = seed_features[end_points['quantize2original']].view(B, point_num, -1).transpose(1, 2)\n\n        end_points = self.graspable(seed_features, end_points)\n        seed_features_flipped = seed_features.transpose(1, 2)  # B*Ns*feat_dim\n        objectness_score = end_points['objectness_score']\n        graspness_score = end_points['graspness_score'].squeeze(1)\n        objectness_pred = torch.argmax(objectness_score, 1)\n        objectness_mask = (objectness_pred == 1)\n        graspness_mask = graspness_score > GRASPNESS_THRESHOLD\n        graspable_mask = objectness_mask & graspness_mask\n\n        seed_features_graspable = []\n        seed_xyz_graspable = []\n        graspable_num_batch = 0.\n        for i in range(B):\n            cur_mask = graspable_mask[i]\n            graspable_num_batch += cur_mask.sum()\n            cur_feat = seed_features_flipped[i][cur_mask]  # Ns*feat_dim\n            cur_seed_xyz = seed_xyz[i][cur_mask]  # Ns*3\n\n            cur_seed_xyz = cur_seed_xyz.unsqueeze(0) # 1*Ns*3\n            fps_idxs = furthest_point_sample(cur_seed_xyz, self.M_points)\n            cur_seed_xyz_flipped = cur_seed_xyz.transpose(1, 2).contiguous()  # 1*3*Ns\n            cur_seed_xyz = gather_operation(cur_seed_xyz_flipped, fps_idxs).transpose(1, 2).squeeze(0).contiguous() # Ns*3\n            cur_feat_flipped = cur_feat.unsqueeze(0).transpose(1, 2).contiguous()  # 1*feat_dim*Ns\n            cur_feat = gather_operation(cur_feat_flipped, fps_idxs).squeeze(0).contiguous() # feat_dim*Ns\n\n            seed_features_graspable.append(cur_feat)\n            seed_xyz_graspable.append(cur_seed_xyz)\n        seed_xyz_graspable = torch.stack(seed_xyz_graspable, 0)  # B*Ns*3\n        seed_features_graspable = torch.stack(seed_features_graspable)  # B*feat_dim*Ns\n        end_points['xyz_graspable'] = seed_xyz_graspable\n        end_points['graspable_count_stage1'] = graspable_num_batch / B\n\n        end_points, res_feat = self.rotation(seed_features_graspable, end_points)\n        seed_features_graspable = seed_features_graspable + res_feat\n\n        if self.is_training:\n            end_points = process_grasp_labels(end_points)\n            grasp_top_views_rot, end_points = match_grasp_view_and_label(end_points)\n        else:\n            grasp_top_views_rot = end_points['grasp_top_view_rot']\n\n        group_features = self.crop(seed_xyz_graspable.contiguous(), seed_features_graspable.contiguous(), grasp_top_views_rot)\n        end_points = self.swad(group_features, end_points)\n\n        return end_points\n\n\ndef pred_decode(end_points):\n    batch_size = len(end_points['point_clouds'])\n    grasp_preds = []\n    for i in range(batch_size):\n        grasp_center = end_points['xyz_graspable'][i].float()\n\n        grasp_score = end_points['grasp_score_pred'][i].float()\n        grasp_score = grasp_score.view(M_POINT, NUM_ANGLE*NUM_DEPTH)\n        grasp_score, grasp_score_inds = torch.max(grasp_score, -1)  # [M_POINT]\n        grasp_score = grasp_score.view(-1, 1)\n        grasp_angle = (grasp_score_inds // NUM_DEPTH) * np.pi / 12\n        grasp_depth = (grasp_score_inds % NUM_DEPTH + 1) * 0.01\n        grasp_depth = grasp_depth.view(-1, 1)\n        grasp_width = 1.2 * end_points['grasp_width_pred'][i] / 10.\n        grasp_width = grasp_width.view(M_POINT, NUM_ANGLE*NUM_DEPTH)\n        grasp_width = torch.gather(grasp_width, 1, grasp_score_inds.view(-1, 1))\n        grasp_width = torch.clamp(grasp_width, min=0., max=GRASP_MAX_WIDTH)\n\n        approaching = -end_points['grasp_top_view_xyz'][i].float()\n        grasp_rot = batch_viewpoint_params_to_matrix(approaching, grasp_angle)\n        grasp_rot = grasp_rot.view(M_POINT, 9)\n\n        # merge preds\n        grasp_height = 0.02 * torch.ones_like(grasp_score)\n        obj_ids = -1 * torch.ones_like(grasp_score)\n        grasp_preds.append(\n            torch.cat([grasp_score, grasp_width, grasp_height, grasp_depth, grasp_rot, grasp_center, obj_ids], axis=-1))\n    return grasp_preds\n"
  },
  {
    "path": "models/loss.py",
    "content": "import torch.nn as nn\nimport torch\n\n\ndef get_loss(end_points):\n    objectness_loss, end_points = compute_objectness_loss(end_points)\n    graspness_loss, end_points = compute_graspness_loss(end_points)\n    view_loss, end_points = compute_view_graspness_loss(end_points)\n    score_loss, end_points = compute_score_loss(end_points)\n    width_loss, end_points = compute_width_loss(end_points)\n    loss = objectness_loss + 10 * graspness_loss + 100 * view_loss + 15 * score_loss + 10 * width_loss\n    end_points['loss/overall_loss'] = loss\n    return loss, end_points\n\n\ndef compute_objectness_loss(end_points):\n    criterion = nn.CrossEntropyLoss(reduction='mean')\n    objectness_score = end_points['objectness_score']\n    objectness_label = end_points['objectness_label']\n    loss = criterion(objectness_score, objectness_label)\n    end_points['loss/stage1_objectness_loss'] = loss\n\n    objectness_pred = torch.argmax(objectness_score, 1)\n    end_points['stage1_objectness_acc'] = (objectness_pred == objectness_label.long()).float().mean()\n    end_points['stage1_objectness_prec'] = (objectness_pred == objectness_label.long())[\n        objectness_pred == 1].float().mean()\n    end_points['stage1_objectness_recall'] = (objectness_pred == objectness_label.long())[\n        objectness_label == 1].float().mean()\n    return loss, end_points\n\n\ndef compute_graspness_loss(end_points):\n    criterion = nn.SmoothL1Loss(reduction='none')\n    graspness_score = end_points['graspness_score'].squeeze(1)\n    graspness_label = end_points['graspness_label'].squeeze(-1)\n    loss_mask = end_points['objectness_label'].bool()\n    loss = criterion(graspness_score, graspness_label)\n    loss = loss[loss_mask]\n    loss = loss.mean()\n    \n    graspness_score_c = graspness_score.detach().clone()[loss_mask]\n    graspness_label_c = graspness_label.detach().clone()[loss_mask]\n    graspness_score_c = torch.clamp(graspness_score_c, 0., 0.99)\n    graspness_label_c = torch.clamp(graspness_label_c, 0., 0.99)\n    rank_error = (torch.abs(torch.trunc(graspness_score_c * 20) - torch.trunc(graspness_label_c * 20)) / 20.).mean()\n    end_points['stage1_graspness_acc_rank_error'] = rank_error\n\n    end_points['loss/stage1_graspness_loss'] = loss\n    return loss, end_points\n\n\ndef compute_view_graspness_loss(end_points):\n    criterion = nn.SmoothL1Loss(reduction='mean')\n    view_score = end_points['view_score']\n    view_label = end_points['batch_grasp_view_graspness']\n    loss = criterion(view_score, view_label)\n    end_points['loss/stage2_view_loss'] = loss\n    return loss, end_points\n\n\ndef compute_score_loss(end_points):\n    criterion = nn.SmoothL1Loss(reduction='mean')\n    grasp_score_pred = end_points['grasp_score_pred']\n    grasp_score_label = end_points['batch_grasp_score']\n    loss = criterion(grasp_score_pred, grasp_score_label)\n\n    end_points['loss/stage3_score_loss'] = loss\n    return loss, end_points\n\n\ndef compute_width_loss(end_points):\n    criterion = nn.SmoothL1Loss(reduction='none')\n    grasp_width_pred = end_points['grasp_width_pred']\n    grasp_width_label = end_points['batch_grasp_width'] * 10\n    loss = criterion(grasp_width_pred, grasp_width_label)\n    grasp_score_label = end_points['batch_grasp_score']\n    loss_mask = grasp_score_label > 0\n    loss = loss[loss_mask].mean()\n    end_points['loss/stage3_width_loss'] = loss\n    return loss, end_points\n"
  },
  {
    "path": "models/modules.py",
    "content": "import os\nimport sys\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nBASE_DIR = os.path.dirname(os.path.abspath(__file__))\nROOT_DIR = os.path.dirname(BASE_DIR)\nsys.path.append(ROOT_DIR)\n\nimport pointnet2.pytorch_utils as pt_utils\nfrom pointnet2.pointnet2_utils import CylinderQueryAndGroup\nfrom loss_utils import generate_grasp_views, batch_viewpoint_params_to_matrix\n\n\nclass GraspableNet(nn.Module):\n    def __init__(self, seed_feature_dim):\n        super().__init__()\n        self.in_dim = seed_feature_dim\n        self.conv_graspable = nn.Conv1d(self.in_dim, 3, 1)\n\n    def forward(self, seed_features, end_points):\n        graspable_score = self.conv_graspable(seed_features)  # (B, 3, num_seed)\n        end_points['objectness_score'] = graspable_score[:, :2]\n        end_points['graspness_score'] = graspable_score[:, 2]\n        return end_points\n\n\nclass ApproachNet(nn.Module):\n    def __init__(self, num_view, seed_feature_dim, is_training=True):\n        super().__init__()\n        self.num_view = num_view\n        self.in_dim = seed_feature_dim\n        self.is_training = is_training\n        self.conv1 = nn.Conv1d(self.in_dim, self.in_dim, 1)\n        self.conv2 = nn.Conv1d(self.in_dim, self.num_view, 1)\n\n    def forward(self, seed_features, end_points):\n        B, _, num_seed = seed_features.size()\n        res_features = F.relu(self.conv1(seed_features), inplace=True)\n        features = self.conv2(res_features)\n        view_score = features.transpose(1, 2).contiguous() # (B, num_seed, num_view)\n        end_points['view_score'] = view_score\n\n        if self.is_training:\n            # normalize view graspness score to 0~1\n            view_score_ = view_score.clone().detach()\n            view_score_max, _ = torch.max(view_score_, dim=2)\n            view_score_min, _ = torch.min(view_score_, dim=2)\n            view_score_max = view_score_max.unsqueeze(-1).expand(-1, -1, self.num_view)\n            view_score_min = view_score_min.unsqueeze(-1).expand(-1, -1, self.num_view)\n            view_score_ = (view_score_ - view_score_min) / (view_score_max - view_score_min + 1e-8)\n\n            top_view_inds = []\n            for i in range(B):\n                top_view_inds_batch = torch.multinomial(view_score_[i], 1, replacement=False)\n                top_view_inds.append(top_view_inds_batch)\n            top_view_inds = torch.stack(top_view_inds, dim=0).squeeze(-1)  # B, num_seed\n        else:\n            _, top_view_inds = torch.max(view_score, dim=2)  # (B, num_seed)\n\n            top_view_inds_ = top_view_inds.view(B, num_seed, 1, 1).expand(-1, -1, -1, 3).contiguous()\n            template_views = generate_grasp_views(self.num_view).to(features.device)  # (num_view, 3)\n            template_views = template_views.view(1, 1, self.num_view, 3).expand(B, num_seed, -1, -1).contiguous()\n            vp_xyz = torch.gather(template_views, 2, top_view_inds_).squeeze(2)  # (B, num_seed, 3)\n            vp_xyz_ = vp_xyz.view(-1, 3)\n            batch_angle = torch.zeros(vp_xyz_.size(0), dtype=vp_xyz.dtype, device=vp_xyz.device)\n            vp_rot = batch_viewpoint_params_to_matrix(-vp_xyz_, batch_angle).view(B, num_seed, 3, 3)\n            end_points['grasp_top_view_xyz'] = vp_xyz\n            end_points['grasp_top_view_rot'] = vp_rot\n\n        end_points['grasp_top_view_inds'] = top_view_inds\n        return end_points, res_features\n\n\nclass CloudCrop(nn.Module):\n    def __init__(self, nsample, seed_feature_dim, cylinder_radius=0.05, hmin=-0.02, hmax=0.04):\n        super().__init__()\n        self.nsample = nsample\n        self.in_dim = seed_feature_dim\n        self.cylinder_radius = cylinder_radius\n        mlps = [3 + self.in_dim, 256, 256]   # use xyz, so plus 3\n\n        self.grouper = CylinderQueryAndGroup(radius=cylinder_radius, hmin=hmin, hmax=hmax, nsample=nsample,\n                                             use_xyz=True, normalize_xyz=True)\n        self.mlps = pt_utils.SharedMLP(mlps, bn=True)\n\n    def forward(self, seed_xyz_graspable, seed_features_graspable, vp_rot):\n        grouped_feature = self.grouper(seed_xyz_graspable, seed_xyz_graspable, vp_rot,\n                                       seed_features_graspable)  # B*3 + feat_dim*M*K\n        new_features = self.mlps(grouped_feature)  # (batch_size, mlps[-1], M, K)\n        new_features = F.max_pool2d(new_features, kernel_size=[1, new_features.size(3)])  # (batch_size, mlps[-1], M, 1)\n        new_features = new_features.squeeze(-1)   # (batch_size, mlps[-1], M)\n        return new_features\n\n\nclass SWADNet(nn.Module):\n    def __init__(self, num_angle, num_depth):\n        super().__init__()\n        self.num_angle = num_angle\n        self.num_depth = num_depth\n\n        self.conv1 = nn.Conv1d(256, 256, 1)  # input feat dim need to be consistent with CloudCrop module\n        self.conv_swad = nn.Conv1d(256, 2*num_angle*num_depth, 1)\n\n    def forward(self, vp_features, end_points):\n        B, _, num_seed = vp_features.size()\n        vp_features = F.relu(self.conv1(vp_features), inplace=True)\n        vp_features = self.conv_swad(vp_features)\n        vp_features = vp_features.view(B, 2, self.num_angle, self.num_depth, num_seed)\n        vp_features = vp_features.permute(0, 1, 4, 2, 3)\n\n        # split prediction\n        end_points['grasp_score_pred'] = vp_features[:, 0]  # B * num_seed * num angle * num_depth\n        end_points['grasp_width_pred'] = vp_features[:, 1]\n        return end_points\n"
  },
  {
    "path": "models/resnet.py",
    "content": "import torch.nn as nn\n\ntry:\n    import open3d as o3d\nexcept ImportError:\n    raise ImportError(\"Please install open3d with `pip install open3d`.\")\n\nimport MinkowskiEngine as ME\nfrom MinkowskiEngine.modules.resnet_block import BasicBlock, Bottleneck\n\n\nclass ResNetBase(nn.Module):\n    BLOCK = None\n    LAYERS = ()\n    INIT_DIM = 64\n    PLANES = (64, 128, 256, 512)\n\n    def __init__(self, in_channels, out_channels, D=3):\n        nn.Module.__init__(self)\n        self.D = D\n        assert self.BLOCK is not None\n\n        self.network_initialization(in_channels, out_channels, D)\n        self.weight_initialization()\n\n    def network_initialization(self, in_channels, out_channels, D):\n\n        self.inplanes = self.INIT_DIM\n        self.conv1 = nn.Sequential(\n            ME.MinkowskiConvolution(\n                in_channels, self.inplanes, kernel_size=3, stride=2, dimension=D\n            ),\n            ME.MinkowskiInstanceNorm(self.inplanes),\n            ME.MinkowskiReLU(inplace=True),\n            ME.MinkowskiMaxPooling(kernel_size=2, stride=2, dimension=D),\n        )\n\n        self.layer1 = self._make_layer(\n            self.BLOCK, self.PLANES[0], self.LAYERS[0], stride=2\n        )\n        self.layer2 = self._make_layer(\n            self.BLOCK, self.PLANES[1], self.LAYERS[1], stride=2\n        )\n        self.layer3 = self._make_layer(\n            self.BLOCK, self.PLANES[2], self.LAYERS[2], stride=2\n        )\n        self.layer4 = self._make_layer(\n            self.BLOCK, self.PLANES[3], self.LAYERS[3], stride=2\n        )\n\n        self.conv5 = nn.Sequential(\n            ME.MinkowskiDropout(),\n            ME.MinkowskiConvolution(\n                self.inplanes, self.inplanes, kernel_size=3, stride=3, dimension=D\n            ),\n            ME.MinkowskiInstanceNorm(self.inplanes),\n            ME.MinkowskiGELU(),\n        )\n\n        self.glob_pool = ME.MinkowskiGlobalMaxPooling()\n\n        self.final = ME.MinkowskiLinear(self.inplanes, out_channels, bias=True)\n\n    def weight_initialization(self):\n        for m in self.modules():\n            if isinstance(m, ME.MinkowskiConvolution):\n                ME.utils.kaiming_normal_(m.kernel, mode=\"fan_out\", nonlinearity=\"relu\")\n\n            if isinstance(m, ME.MinkowskiBatchNorm):\n                nn.init.constant_(m.bn.weight, 1)\n                nn.init.constant_(m.bn.bias, 0)\n\n    def _make_layer(self, block, planes, blocks, stride=1, dilation=1, bn_momentum=0.1):\n        downsample = None\n        if stride != 1 or self.inplanes != planes * block.expansion:\n            downsample = nn.Sequential(\n                ME.MinkowskiConvolution(\n                    self.inplanes,\n                    planes * block.expansion,\n                    kernel_size=1,\n                    stride=stride,\n                    dimension=self.D,\n                ),\n                ME.MinkowskiBatchNorm(planes * block.expansion),\n            )\n        layers = []\n        layers.append(\n            block(\n                self.inplanes,\n                planes,\n                stride=stride,\n                dilation=dilation,\n                downsample=downsample,\n                dimension=self.D,\n            )\n        )\n        self.inplanes = planes * block.expansion\n        for i in range(1, blocks):\n            layers.append(\n                block(\n                    self.inplanes, planes, stride=1, dilation=dilation, dimension=self.D\n                )\n            )\n\n        return nn.Sequential(*layers)\n\n    def forward(self, x: ME.SparseTensor):\n        x = self.conv1(x)\n        x = self.layer1(x)\n        x = self.layer2(x)\n        x = self.layer3(x)\n        x = self.layer4(x)\n        x = self.conv5(x)\n        x = self.glob_pool(x)\n        return self.final(x)\n\n\nclass ResNet14(ResNetBase):\n    BLOCK = BasicBlock\n    LAYERS = (1, 1, 1, 1)\n\n\nclass ResNet18(ResNetBase):\n    BLOCK = BasicBlock\n    LAYERS = (2, 2, 2, 2)\n\n\nclass ResNet34(ResNetBase):\n    BLOCK = BasicBlock\n    LAYERS = (3, 4, 6, 3)\n\n\nclass ResNet50(ResNetBase):\n    BLOCK = Bottleneck\n    LAYERS = (3, 4, 6, 3)\n\n\nclass ResNet101(ResNetBase):\n    BLOCK = Bottleneck\n    LAYERS = (3, 4, 23, 3)\n\n\nclass ResFieldNetBase(ResNetBase):\n    def network_initialization(self, in_channels, out_channels, D):\n        field_ch = 32\n        field_ch2 = 64\n        self.field_network = nn.Sequential(\n            ME.MinkowskiSinusoidal(in_channels, field_ch),\n            ME.MinkowskiBatchNorm(field_ch),\n            ME.MinkowskiReLU(inplace=True),\n            ME.MinkowskiLinear(field_ch, field_ch),\n            ME.MinkowskiBatchNorm(field_ch),\n            ME.MinkowskiReLU(inplace=True),\n            ME.MinkowskiToSparseTensor(),\n        )\n        self.field_network2 = nn.Sequential(\n            ME.MinkowskiSinusoidal(field_ch + in_channels, field_ch2),\n            ME.MinkowskiBatchNorm(field_ch2),\n            ME.MinkowskiReLU(inplace=True),\n            ME.MinkowskiLinear(field_ch2, field_ch2),\n            ME.MinkowskiBatchNorm(field_ch2),\n            ME.MinkowskiReLU(inplace=True),\n            ME.MinkowskiToSparseTensor(),\n        )\n\n        ResNetBase.network_initialization(self, field_ch2, out_channels, D)\n\n    def forward(self, x: ME.TensorField):\n        otensor = self.field_network(x)\n        otensor2 = self.field_network2(otensor.cat_slice(x))\n        return ResNetBase.forward(self, otensor2)\n\n\nclass ResFieldNet14(ResFieldNetBase):\n    BLOCK = BasicBlock\n    LAYERS = (1, 1, 1, 1)\n\n\nclass ResFieldNet18(ResFieldNetBase):\n    BLOCK = BasicBlock\n    LAYERS = (2, 2, 2, 2)\n\n\nclass ResFieldNet34(ResFieldNetBase):\n    BLOCK = BasicBlock\n    LAYERS = (3, 4, 6, 3)\n\n\nclass ResFieldNet50(ResFieldNetBase):\n    BLOCK = Bottleneck\n    LAYERS = (3, 4, 6, 3)\n\n\nclass ResFieldNet101(ResFieldNetBase):\n    BLOCK = Bottleneck\n    LAYERS = (3, 4, 23, 3)\n"
  },
  {
    "path": "pointnet2/_ext_src/include/ball_query.h",
    "content": "// Copyright (c) Facebook, Inc. and its affiliates.\n// \n// This source code is licensed under the MIT license found in the\n// LICENSE file in the root directory of this source tree.\n\n#pragma once\n#include <torch/extension.h>\n\nat::Tensor ball_query(at::Tensor new_xyz, at::Tensor xyz, const float radius,\n                      const int nsample);\n"
  },
  {
    "path": "pointnet2/_ext_src/include/cuda_utils.h",
    "content": "// Copyright (c) Facebook, Inc. and its affiliates.\n// \n// This source code is licensed under the MIT license found in the\n// LICENSE file in the root directory of this source tree.\n\n#ifndef _CUDA_UTILS_H\n#define _CUDA_UTILS_H\n\n#include <ATen/ATen.h>\n#include <ATen/cuda/CUDAContext.h>\n#include <cmath>\n\n#include <cuda.h>\n#include <cuda_runtime.h>\n\n#include <vector>\n\n#define TOTAL_THREADS 512\n\ninline int opt_n_threads(int work_size) {\n  const int pow_2 = std::log(static_cast<double>(work_size)) / std::log(2.0);\n\n  return max(min(1 << pow_2, TOTAL_THREADS), 1);\n}\n\ninline dim3 opt_block_config(int x, int y) {\n  const int x_threads = opt_n_threads(x);\n  const int y_threads =\n      max(min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1);\n  dim3 block_config(x_threads, y_threads, 1);\n\n  return block_config;\n}\n\n#define CUDA_CHECK_ERRORS()                                           \\\n  do {                                                                \\\n    cudaError_t err = cudaGetLastError();                             \\\n    if (cudaSuccess != err) {                                         \\\n      fprintf(stderr, \"CUDA kernel failed : %s\\n%s at L:%d in %s\\n\",  \\\n              cudaGetErrorString(err), __PRETTY_FUNCTION__, __LINE__, \\\n              __FILE__);                                              \\\n      exit(-1);                                                       \\\n    }                                                                 \\\n  } while (0)\n\n#endif\n"
  },
  {
    "path": "pointnet2/_ext_src/include/cylinder_query.h",
    "content": "// Author: chenxi-wang\n\n#pragma once\n#include <torch/extension.h>\n\nat::Tensor cylinder_query(at::Tensor new_xyz, at::Tensor xyz, at::Tensor rot, const float radius, const float hmin, const float hmax,\n                      const int nsample);\n"
  },
  {
    "path": "pointnet2/_ext_src/include/group_points.h",
    "content": "// Copyright (c) Facebook, Inc. and its affiliates.\n// \n// This source code is licensed under the MIT license found in the\n// LICENSE file in the root directory of this source tree.\n\n#pragma once\n#include <torch/extension.h>\n\nat::Tensor group_points(at::Tensor points, at::Tensor idx);\nat::Tensor group_points_grad(at::Tensor grad_out, at::Tensor idx, const int n);\n"
  },
  {
    "path": "pointnet2/_ext_src/include/interpolate.h",
    "content": "// Copyright (c) Facebook, Inc. and its affiliates.\n// \n// This source code is licensed under the MIT license found in the\n// LICENSE file in the root directory of this source tree.\n\n#pragma once\n\n#include <torch/extension.h>\n#include <vector>\n\nstd::vector<at::Tensor> three_nn(at::Tensor unknowns, at::Tensor knows);\nat::Tensor three_interpolate(at::Tensor points, at::Tensor idx,\n                             at::Tensor weight);\nat::Tensor three_interpolate_grad(at::Tensor grad_out, at::Tensor idx,\n                                  at::Tensor weight, const int m);\n"
  },
  {
    "path": "pointnet2/_ext_src/include/sampling.h",
    "content": "// Copyright (c) Facebook, Inc. and its affiliates.\n// \n// This source code is licensed under the MIT license found in the\n// LICENSE file in the root directory of this source tree.\n\n#pragma once\n#include <torch/extension.h>\n\nat::Tensor gather_points(at::Tensor points, at::Tensor idx);\nat::Tensor gather_points_grad(at::Tensor grad_out, at::Tensor idx, const int n);\nat::Tensor furthest_point_sampling(at::Tensor points, const int nsamples);\n"
  },
  {
    "path": "pointnet2/_ext_src/include/utils.h",
    "content": "// Copyright (c) Facebook, Inc. and its affiliates.\n// \n// This source code is licensed under the MIT license found in the\n// LICENSE file in the root directory of this source tree.\n\n#pragma once\n#include <ATen/cuda/CUDAContext.h>\n#include <torch/extension.h>\n\n#define CHECK_CUDA(x)                                          \\\n  do {                                                         \\\n    TORCH_CHECK(x.type().is_cuda(), #x \" must be a CUDA tensor\"); \\\n  } while (0)\n\n#define CHECK_CONTIGUOUS(x)                                         \\\n  do {                                                              \\\n    TORCH_CHECK(x.is_contiguous(), #x \" must be a contiguous tensor\"); \\\n  } while (0)\n\n#define CHECK_IS_INT(x)                              \\\n  do {                                               \\\n    TORCH_CHECK(x.scalar_type() == at::ScalarType::Int, \\\n             #x \" must be an int tensor\");           \\\n  } while (0)\n\n#define CHECK_IS_FLOAT(x)                              \\\n  do {                                                 \\\n    TORCH_CHECK(x.scalar_type() == at::ScalarType::Float, \\\n             #x \" must be a float tensor\");            \\\n  } while (0)\n"
  },
  {
    "path": "pointnet2/_ext_src/src/ball_query.cpp",
    "content": "// Copyright (c) Facebook, Inc. and its affiliates.\n// \n// This source code is licensed under the MIT license found in the\n// LICENSE file in the root directory of this source tree.\n\n#include \"ball_query.h\"\n#include \"utils.h\"\n\nvoid query_ball_point_kernel_wrapper(int b, int n, int m, float radius,\n                                     int nsample, const float *new_xyz,\n                                     const float *xyz, int *idx);\n\nat::Tensor ball_query(at::Tensor new_xyz, at::Tensor xyz, const float radius,\n                      const int nsample) {\n  CHECK_CONTIGUOUS(new_xyz);\n  CHECK_CONTIGUOUS(xyz);\n  CHECK_IS_FLOAT(new_xyz);\n  CHECK_IS_FLOAT(xyz);\n\n  if (new_xyz.type().is_cuda()) {\n    CHECK_CUDA(xyz);\n  }\n\n  at::Tensor idx =\n      torch::zeros({new_xyz.size(0), new_xyz.size(1), nsample},\n                   at::device(new_xyz.device()).dtype(at::ScalarType::Int));\n\n  if (new_xyz.type().is_cuda()) {\n    query_ball_point_kernel_wrapper(xyz.size(0), xyz.size(1), new_xyz.size(1),\n                                    radius, nsample, new_xyz.data<float>(),\n                                    xyz.data<float>(), idx.data<int>());\n  } else {\n    TORCH_CHECK(false, \"CPU not supported\");\n  }\n\n  return idx;\n}\n"
  },
  {
    "path": "pointnet2/_ext_src/src/ball_query_gpu.cu",
    "content": "// Copyright (c) Facebook, Inc. and its affiliates.\n// \n// This source code is licensed under the MIT license found in the\n// LICENSE file in the root directory of this source tree.\n\n#include <math.h>\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"cuda_utils.h\"\n\n// input: new_xyz(b, m, 3) xyz(b, n, 3)\n// output: idx(b, m, nsample)\n__global__ void query_ball_point_kernel(int b, int n, int m, float radius,\n                                        int nsample,\n                                        const float *__restrict__ new_xyz,\n                                        const float *__restrict__ xyz,\n                                        int *__restrict__ idx) {\n  int batch_index = blockIdx.x;\n  xyz += batch_index * n * 3;\n  new_xyz += batch_index * m * 3;\n  idx += m * nsample * batch_index;\n\n  int index = threadIdx.x;\n  int stride = blockDim.x;\n\n  float radius2 = radius * radius;\n  for (int j = index; j < m; j += stride) {\n    float new_x = new_xyz[j * 3 + 0];\n    float new_y = new_xyz[j * 3 + 1];\n    float new_z = new_xyz[j * 3 + 2];\n    for (int k = 0, cnt = 0; k < n && cnt < nsample; ++k) {\n      float x = xyz[k * 3 + 0];\n      float y = xyz[k * 3 + 1];\n      float z = xyz[k * 3 + 2];\n      float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) +\n                 (new_z - z) * (new_z - z);\n      if (d2 < radius2) {\n        if (cnt == 0) {\n          for (int l = 0; l < nsample; ++l) {\n            idx[j * nsample + l] = k;\n          }\n        }\n        idx[j * nsample + cnt] = k;\n        ++cnt;\n      }\n    }\n  }\n}\n\nvoid query_ball_point_kernel_wrapper(int b, int n, int m, float radius,\n                                     int nsample, const float *new_xyz,\n                                     const float *xyz, int *idx) {\n  cudaStream_t stream = at::cuda::getCurrentCUDAStream();\n  query_ball_point_kernel<<<b, opt_n_threads(m), 0, stream>>>(\n      b, n, m, radius, nsample, new_xyz, xyz, idx);\n\n  CUDA_CHECK_ERRORS();\n}\n"
  },
  {
    "path": "pointnet2/_ext_src/src/bindings.cpp",
    "content": "// Copyright (c) Facebook, Inc. and its affiliates.\n// \n// This source code is licensed under the MIT license found in the\n// LICENSE file in the root directory of this source tree.\n\n#include \"ball_query.h\"\n#include \"group_points.h\"\n#include \"interpolate.h\"\n#include \"sampling.h\"\n#include \"cylinder_query.h\"\n\nPYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {\n  m.def(\"gather_points\", &gather_points);\n  m.def(\"gather_points_grad\", &gather_points_grad);\n  m.def(\"furthest_point_sampling\", &furthest_point_sampling);\n\n  m.def(\"three_nn\", &three_nn);\n  m.def(\"three_interpolate\", &three_interpolate);\n  m.def(\"three_interpolate_grad\", &three_interpolate_grad);\n\n  m.def(\"ball_query\", &ball_query);\n\n  m.def(\"group_points\", &group_points);\n  m.def(\"group_points_grad\", &group_points_grad);\n\n  m.def(\"cylinder_query\", &cylinder_query);\n}\n"
  },
  {
    "path": "pointnet2/_ext_src/src/cylinder_query.cpp",
    "content": "// Author: chenxi-wang\n\n#include \"cylinder_query.h\"\n#include \"utils.h\"\n\nvoid query_cylinder_point_kernel_wrapper(int b, int n, int m, float radius, float hmin, float hmax,\n                                     int nsample, const float *new_xyz,\n                                     const float *xyz, const float *rot, int *idx);\n\nat::Tensor cylinder_query(at::Tensor new_xyz, at::Tensor xyz, at::Tensor rot, const float radius, const float hmin, const float hmax,\n                      const int nsample) {\n  CHECK_CONTIGUOUS(new_xyz);\n  CHECK_CONTIGUOUS(xyz);\n  CHECK_CONTIGUOUS(rot);\n  CHECK_IS_FLOAT(new_xyz);\n  CHECK_IS_FLOAT(xyz);\n  CHECK_IS_FLOAT(rot);\n\n  if (new_xyz.type().is_cuda()) {\n    CHECK_CUDA(xyz);\n    CHECK_CUDA(rot);\n  }\n\n  at::Tensor idx =\n      torch::zeros({new_xyz.size(0), new_xyz.size(1), nsample},\n                   at::device(new_xyz.device()).dtype(at::ScalarType::Int));\n\n  if (new_xyz.type().is_cuda()) {\n    query_cylinder_point_kernel_wrapper(xyz.size(0), xyz.size(1), new_xyz.size(1),\n                                    radius, hmin, hmax, nsample, new_xyz.data<float>(),\n                                    xyz.data<float>(), rot.data<float>(), idx.data<int>());\n  } else {\n    TORCH_CHECK(false, \"CPU not supported\");\n  }\n\n  return idx;\n}\n"
  },
  {
    "path": "pointnet2/_ext_src/src/cylinder_query_gpu.cu",
    "content": "// Author: chenxi-wang\n\n#include <math.h>\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"cuda_utils.h\"\n\n__global__ void query_cylinder_point_kernel(int b, int n, int m, float radius, float hmin, float hmax,\n                                        int nsample,\n                                        const float *__restrict__ new_xyz,\n                                        const float *__restrict__ xyz,\n                                        const float *__restrict__ rot,\n                                        int *__restrict__ idx) {\n  int batch_index = blockIdx.x;\n  xyz += batch_index * n * 3;\n  new_xyz += batch_index * m * 3;\n  rot += batch_index * m * 9;\n  idx += m * nsample * batch_index;\n\n  int index = threadIdx.x;\n  int stride = blockDim.x;\n\n  float radius2 = radius * radius;\n  for (int j = index; j < m; j += stride) {\n    float new_x = new_xyz[j * 3 + 0];\n    float new_y = new_xyz[j * 3 + 1];\n    float new_z = new_xyz[j * 3 + 2];\n    float r0 = rot[j * 9 + 0];\n    float r1 = rot[j * 9 + 1];\n    float r2 = rot[j * 9 + 2];\n    float r3 = rot[j * 9 + 3];\n    float r4 = rot[j * 9 + 4];\n    float r5 = rot[j * 9 + 5];\n    float r6 = rot[j * 9 + 6];\n    float r7 = rot[j * 9 + 7];\n    float r8 = rot[j * 9 + 8];\n    for (int k = 0, cnt = 0; k < n && cnt < nsample; ++k) {\n      float x = xyz[k * 3 + 0] - new_x;\n      float y = xyz[k * 3 + 1] - new_y;\n      float z = xyz[k * 3 + 2] - new_z;\n      float x_rot = r0 * x + r3 * y + r6 * z;\n      float y_rot = r1 * x + r4 * y + r7 * z;\n      float z_rot = r2 * x + r5 * y + r8 * z;\n      float d2 = y_rot * y_rot + z_rot * z_rot;\n      if (d2 < radius2 && x_rot > hmin && x_rot < hmax) {\n        if (cnt == 0) {\n          for (int l = 0; l < nsample; ++l) {\n            idx[j * nsample + l] = k;\n          }\n        }\n        idx[j * nsample + cnt] = k;\n        ++cnt;\n      }\n    }\n  }\n}\n\nvoid query_cylinder_point_kernel_wrapper(int b, int n, int m, float radius, float hmin, float hmax,\n                                     int nsample, const float *new_xyz,\n                                     const float *xyz, const float *rot, int *idx) {\n  cudaStream_t stream = at::cuda::getCurrentCUDAStream();\n  query_cylinder_point_kernel<<<b, opt_n_threads(m), 0, stream>>>(\n      b, n, m, radius, hmin, hmax, nsample, new_xyz, xyz, rot, idx);\n\n  CUDA_CHECK_ERRORS();\n}\n"
  },
  {
    "path": "pointnet2/_ext_src/src/group_points.cpp",
    "content": "// Copyright (c) Facebook, Inc. and its affiliates.\n// \n// This source code is licensed under the MIT license found in the\n// LICENSE file in the root directory of this source tree.\n\n#include \"group_points.h\"\n#include \"utils.h\"\n\nvoid group_points_kernel_wrapper(int b, int c, int n, int npoints, int nsample,\n                                 const float *points, const int *idx,\n                                 float *out);\n\nvoid group_points_grad_kernel_wrapper(int b, int c, int n, int npoints,\n                                      int nsample, const float *grad_out,\n                                      const int *idx, float *grad_points);\n\nat::Tensor group_points(at::Tensor points, at::Tensor idx) {\n  CHECK_CONTIGUOUS(points);\n  CHECK_CONTIGUOUS(idx);\n  CHECK_IS_FLOAT(points);\n  CHECK_IS_INT(idx);\n\n  if (points.type().is_cuda()) {\n    CHECK_CUDA(idx);\n  }\n\n  at::Tensor output =\n      torch::zeros({points.size(0), points.size(1), idx.size(1), idx.size(2)},\n                   at::device(points.device()).dtype(at::ScalarType::Float));\n\n  if (points.type().is_cuda()) {\n    group_points_kernel_wrapper(points.size(0), points.size(1), points.size(2),\n                                idx.size(1), idx.size(2), points.data<float>(),\n                                idx.data<int>(), output.data<float>());\n  } else {\n    TORCH_CHECK(false, \"CPU not supported\");\n  }\n\n  return output;\n}\n\nat::Tensor group_points_grad(at::Tensor grad_out, at::Tensor idx, const int n) {\n  CHECK_CONTIGUOUS(grad_out);\n  CHECK_CONTIGUOUS(idx);\n  CHECK_IS_FLOAT(grad_out);\n  CHECK_IS_INT(idx);\n\n  if (grad_out.type().is_cuda()) {\n    CHECK_CUDA(idx);\n  }\n\n  at::Tensor output =\n      torch::zeros({grad_out.size(0), grad_out.size(1), n},\n                   at::device(grad_out.device()).dtype(at::ScalarType::Float));\n\n  if (grad_out.type().is_cuda()) {\n    group_points_grad_kernel_wrapper(\n        grad_out.size(0), grad_out.size(1), n, idx.size(1), idx.size(2),\n        grad_out.data<float>(), idx.data<int>(), output.data<float>());\n  } else {\n    TORCH_CHECK(false, \"CPU not supported\");\n  }\n\n  return output;\n}\n"
  },
  {
    "path": "pointnet2/_ext_src/src/group_points_gpu.cu",
    "content": "// Copyright (c) Facebook, Inc. and its affiliates.\n// \n// This source code is licensed under the MIT license found in the\n// LICENSE file in the root directory of this source tree.\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"cuda_utils.h\"\n\n// input: points(b, c, n) idx(b, npoints, nsample)\n// output: out(b, c, npoints, nsample)\n__global__ void group_points_kernel(int b, int c, int n, int npoints,\n                                    int nsample,\n                                    const float *__restrict__ points,\n                                    const int *__restrict__ idx,\n                                    float *__restrict__ out) {\n  int batch_index = blockIdx.x;\n  points += batch_index * n * c;\n  idx += batch_index * npoints * nsample;\n  out += batch_index * npoints * nsample * c;\n\n  const int index = threadIdx.y * blockDim.x + threadIdx.x;\n  const int stride = blockDim.y * blockDim.x;\n  for (int i = index; i < c * npoints; i += stride) {\n    const int l = i / npoints;\n    const int j = i % npoints;\n    for (int k = 0; k < nsample; ++k) {\n      int ii = idx[j * nsample + k];\n      out[(l * npoints + j) * nsample + k] = points[l * n + ii];\n    }\n  }\n}\n\nvoid group_points_kernel_wrapper(int b, int c, int n, int npoints, int nsample,\n                                 const float *points, const int *idx,\n                                 float *out) {\n  cudaStream_t stream = at::cuda::getCurrentCUDAStream();\n\n  group_points_kernel<<<b, opt_block_config(npoints, c), 0, stream>>>(\n      b, c, n, npoints, nsample, points, idx, out);\n\n  CUDA_CHECK_ERRORS();\n}\n\n// input: grad_out(b, c, npoints, nsample), idx(b, npoints, nsample)\n// output: grad_points(b, c, n)\n__global__ void group_points_grad_kernel(int b, int c, int n, int npoints,\n                                         int nsample,\n                                         const float *__restrict__ grad_out,\n                                         const int *__restrict__ idx,\n                                         float *__restrict__ grad_points) {\n  int batch_index = blockIdx.x;\n  grad_out += batch_index * npoints * nsample * c;\n  idx += batch_index * npoints * nsample;\n  grad_points += batch_index * n * c;\n\n  const int index = threadIdx.y * blockDim.x + threadIdx.x;\n  const int stride = blockDim.y * blockDim.x;\n  for (int i = index; i < c * npoints; i += stride) {\n    const int l = i / npoints;\n    const int j = i % npoints;\n    for (int k = 0; k < nsample; ++k) {\n      int ii = idx[j * nsample + k];\n      atomicAdd(grad_points + l * n + ii,\n                grad_out[(l * npoints + j) * nsample + k]);\n    }\n  }\n}\n\nvoid group_points_grad_kernel_wrapper(int b, int c, int n, int npoints,\n                                      int nsample, const float *grad_out,\n                                      const int *idx, float *grad_points) {\n  cudaStream_t stream = at::cuda::getCurrentCUDAStream();\n\n  group_points_grad_kernel<<<b, opt_block_config(npoints, c), 0, stream>>>(\n      b, c, n, npoints, nsample, grad_out, idx, grad_points);\n\n  CUDA_CHECK_ERRORS();\n}\n"
  },
  {
    "path": "pointnet2/_ext_src/src/interpolate.cpp",
    "content": "// Copyright (c) Facebook, Inc. and its affiliates.\n// \n// This source code is licensed under the MIT license found in the\n// LICENSE file in the root directory of this source tree.\n\n#include \"interpolate.h\"\n#include \"utils.h\"\n\nvoid three_nn_kernel_wrapper(int b, int n, int m, const float *unknown,\n                             const float *known, float *dist2, int *idx);\nvoid three_interpolate_kernel_wrapper(int b, int c, int m, int n,\n                                      const float *points, const int *idx,\n                                      const float *weight, float *out);\nvoid three_interpolate_grad_kernel_wrapper(int b, int c, int n, int m,\n                                           const float *grad_out,\n                                           const int *idx, const float *weight,\n                                           float *grad_points);\n\nstd::vector<at::Tensor> three_nn(at::Tensor unknowns, at::Tensor knows) {\n  CHECK_CONTIGUOUS(unknowns);\n  CHECK_CONTIGUOUS(knows);\n  CHECK_IS_FLOAT(unknowns);\n  CHECK_IS_FLOAT(knows);\n\n  if (unknowns.type().is_cuda()) {\n    CHECK_CUDA(knows);\n  }\n\n  at::Tensor idx =\n      torch::zeros({unknowns.size(0), unknowns.size(1), 3},\n                   at::device(unknowns.device()).dtype(at::ScalarType::Int));\n  at::Tensor dist2 =\n      torch::zeros({unknowns.size(0), unknowns.size(1), 3},\n                   at::device(unknowns.device()).dtype(at::ScalarType::Float));\n\n  if (unknowns.type().is_cuda()) {\n    three_nn_kernel_wrapper(unknowns.size(0), unknowns.size(1), knows.size(1),\n                            unknowns.data<float>(), knows.data<float>(),\n                            dist2.data<float>(), idx.data<int>());\n  } else {\n    TORCH_CHECK(false, \"CPU not supported\");\n  }\n\n  return {dist2, idx};\n}\n\nat::Tensor three_interpolate(at::Tensor points, at::Tensor idx,\n                             at::Tensor weight) {\n  CHECK_CONTIGUOUS(points);\n  CHECK_CONTIGUOUS(idx);\n  CHECK_CONTIGUOUS(weight);\n  CHECK_IS_FLOAT(points);\n  CHECK_IS_INT(idx);\n  CHECK_IS_FLOAT(weight);\n\n  if (points.type().is_cuda()) {\n    CHECK_CUDA(idx);\n    CHECK_CUDA(weight);\n  }\n\n  at::Tensor output =\n      torch::zeros({points.size(0), points.size(1), idx.size(1)},\n                   at::device(points.device()).dtype(at::ScalarType::Float));\n\n  if (points.type().is_cuda()) {\n    three_interpolate_kernel_wrapper(\n        points.size(0), points.size(1), points.size(2), idx.size(1),\n        points.data<float>(), idx.data<int>(), weight.data<float>(),\n        output.data<float>());\n  } else {\n    TORCH_CHECK(false, \"CPU not supported\");\n  }\n\n  return output;\n}\nat::Tensor three_interpolate_grad(at::Tensor grad_out, at::Tensor idx,\n                                  at::Tensor weight, const int m) {\n  CHECK_CONTIGUOUS(grad_out);\n  CHECK_CONTIGUOUS(idx);\n  CHECK_CONTIGUOUS(weight);\n  CHECK_IS_FLOAT(grad_out);\n  CHECK_IS_INT(idx);\n  CHECK_IS_FLOAT(weight);\n\n  if (grad_out.type().is_cuda()) {\n    CHECK_CUDA(idx);\n    CHECK_CUDA(weight);\n  }\n\n  at::Tensor output =\n      torch::zeros({grad_out.size(0), grad_out.size(1), m},\n                   at::device(grad_out.device()).dtype(at::ScalarType::Float));\n\n  if (grad_out.type().is_cuda()) {\n    three_interpolate_grad_kernel_wrapper(\n        grad_out.size(0), grad_out.size(1), grad_out.size(2), m,\n        grad_out.data<float>(), idx.data<int>(), weight.data<float>(),\n        output.data<float>());\n  } else {\n    TORCH_CHECK(false, \"CPU not supported\");\n  }\n\n  return output;\n}\n"
  },
  {
    "path": "pointnet2/_ext_src/src/interpolate_gpu.cu",
    "content": "// Copyright (c) Facebook, Inc. and its affiliates.\n// \n// This source code is licensed under the MIT license found in the\n// LICENSE file in the root directory of this source tree.\n\n#include <math.h>\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"cuda_utils.h\"\n\n// input: unknown(b, n, 3) known(b, m, 3)\n// output: dist2(b, n, 3), idx(b, n, 3)\n__global__ void three_nn_kernel(int b, int n, int m,\n                                const float *__restrict__ unknown,\n                                const float *__restrict__ known,\n                                float *__restrict__ dist2,\n                                int *__restrict__ idx) {\n  int batch_index = blockIdx.x;\n  unknown += batch_index * n * 3;\n  known += batch_index * m * 3;\n  dist2 += batch_index * n * 3;\n  idx += batch_index * n * 3;\n\n  int index = threadIdx.x;\n  int stride = blockDim.x;\n  for (int j = index; j < n; j += stride) {\n    float ux = unknown[j * 3 + 0];\n    float uy = unknown[j * 3 + 1];\n    float uz = unknown[j * 3 + 2];\n\n    double best1 = 1e40, best2 = 1e40, best3 = 1e40;\n    int besti1 = 0, besti2 = 0, besti3 = 0;\n    for (int k = 0; k < m; ++k) {\n      float x = known[k * 3 + 0];\n      float y = known[k * 3 + 1];\n      float z = known[k * 3 + 2];\n      float d = (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z);\n      if (d < best1) {\n        best3 = best2;\n        besti3 = besti2;\n        best2 = best1;\n        besti2 = besti1;\n        best1 = d;\n        besti1 = k;\n      } else if (d < best2) {\n        best3 = best2;\n        besti3 = besti2;\n        best2 = d;\n        besti2 = k;\n      } else if (d < best3) {\n        best3 = d;\n        besti3 = k;\n      }\n    }\n    dist2[j * 3 + 0] = best1;\n    dist2[j * 3 + 1] = best2;\n    dist2[j * 3 + 2] = best3;\n\n    idx[j * 3 + 0] = besti1;\n    idx[j * 3 + 1] = besti2;\n    idx[j * 3 + 2] = besti3;\n  }\n}\n\nvoid three_nn_kernel_wrapper(int b, int n, int m, const float *unknown,\n                             const float *known, float *dist2, int *idx) {\n  cudaStream_t stream = at::cuda::getCurrentCUDAStream();\n  three_nn_kernel<<<b, opt_n_threads(n), 0, stream>>>(b, n, m, unknown, known,\n                                                      dist2, idx);\n\n  CUDA_CHECK_ERRORS();\n}\n\n// input: points(b, c, m), idx(b, n, 3), weight(b, n, 3)\n// output: out(b, c, n)\n__global__ void three_interpolate_kernel(int b, int c, int m, int n,\n                                         const float *__restrict__ points,\n                                         const int *__restrict__ idx,\n                                         const float *__restrict__ weight,\n                                         float *__restrict__ out) {\n  int batch_index = blockIdx.x;\n  points += batch_index * m * c;\n\n  idx += batch_index * n * 3;\n  weight += batch_index * n * 3;\n\n  out += batch_index * n * c;\n\n  const int index = threadIdx.y * blockDim.x + threadIdx.x;\n  const int stride = blockDim.y * blockDim.x;\n  for (int i = index; i < c * n; i += stride) {\n    const int l = i / n;\n    const int j = i % n;\n    float w1 = weight[j * 3 + 0];\n    float w2 = weight[j * 3 + 1];\n    float w3 = weight[j * 3 + 2];\n\n    int i1 = idx[j * 3 + 0];\n    int i2 = idx[j * 3 + 1];\n    int i3 = idx[j * 3 + 2];\n\n    out[i] = points[l * m + i1] * w1 + points[l * m + i2] * w2 +\n             points[l * m + i3] * w3;\n  }\n}\n\nvoid three_interpolate_kernel_wrapper(int b, int c, int m, int n,\n                                      const float *points, const int *idx,\n                                      const float *weight, float *out) {\n  cudaStream_t stream = at::cuda::getCurrentCUDAStream();\n  three_interpolate_kernel<<<b, opt_block_config(n, c), 0, stream>>>(\n      b, c, m, n, points, idx, weight, out);\n\n  CUDA_CHECK_ERRORS();\n}\n\n// input: grad_out(b, c, n), idx(b, n, 3), weight(b, n, 3)\n// output: grad_points(b, c, m)\n\n__global__ void three_interpolate_grad_kernel(\n    int b, int c, int n, int m, const float *__restrict__ grad_out,\n    const int *__restrict__ idx, const float *__restrict__ weight,\n    float *__restrict__ grad_points) {\n  int batch_index = blockIdx.x;\n  grad_out += batch_index * n * c;\n  idx += batch_index * n * 3;\n  weight += batch_index * n * 3;\n  grad_points += batch_index * m * c;\n\n  const int index = threadIdx.y * blockDim.x + threadIdx.x;\n  const int stride = blockDim.y * blockDim.x;\n  for (int i = index; i < c * n; i += stride) {\n    const int l = i / n;\n    const int j = i % n;\n    float w1 = weight[j * 3 + 0];\n    float w2 = weight[j * 3 + 1];\n    float w3 = weight[j * 3 + 2];\n\n    int i1 = idx[j * 3 + 0];\n    int i2 = idx[j * 3 + 1];\n    int i3 = idx[j * 3 + 2];\n\n    atomicAdd(grad_points + l * m + i1, grad_out[i] * w1);\n    atomicAdd(grad_points + l * m + i2, grad_out[i] * w2);\n    atomicAdd(grad_points + l * m + i3, grad_out[i] * w3);\n  }\n}\n\nvoid three_interpolate_grad_kernel_wrapper(int b, int c, int n, int m,\n                                           const float *grad_out,\n                                           const int *idx, const float *weight,\n                                           float *grad_points) {\n  cudaStream_t stream = at::cuda::getCurrentCUDAStream();\n  three_interpolate_grad_kernel<<<b, opt_block_config(n, c), 0, stream>>>(\n      b, c, n, m, grad_out, idx, weight, grad_points);\n\n  CUDA_CHECK_ERRORS();\n}\n"
  },
  {
    "path": "pointnet2/_ext_src/src/sampling.cpp",
    "content": "// Copyright (c) Facebook, Inc. and its affiliates.\n// \n// This source code is licensed under the MIT license found in the\n// LICENSE file in the root directory of this source tree.\n\n#include \"sampling.h\"\n#include \"utils.h\"\n\nvoid gather_points_kernel_wrapper(int b, int c, int n, int npoints,\n                                  const float *points, const int *idx,\n                                  float *out);\nvoid gather_points_grad_kernel_wrapper(int b, int c, int n, int npoints,\n                                       const float *grad_out, const int *idx,\n                                       float *grad_points);\n\nvoid furthest_point_sampling_kernel_wrapper(int b, int n, int m,\n                                            const float *dataset, float *temp,\n                                            int *idxs);\n\nat::Tensor gather_points(at::Tensor points, at::Tensor idx) {\n  CHECK_CONTIGUOUS(points);\n  CHECK_CONTIGUOUS(idx);\n  CHECK_IS_FLOAT(points);\n  CHECK_IS_INT(idx);\n\n  if (points.type().is_cuda()) {\n    CHECK_CUDA(idx);\n  }\n\n  at::Tensor output =\n      torch::zeros({points.size(0), points.size(1), idx.size(1)},\n                   at::device(points.device()).dtype(at::ScalarType::Float));\n\n  if (points.type().is_cuda()) {\n    gather_points_kernel_wrapper(points.size(0), points.size(1), points.size(2),\n                                 idx.size(1), points.data<float>(),\n                                 idx.data<int>(), output.data<float>());\n  } else {\n    TORCH_CHECK(false, \"CPU not supported\");\n  }\n\n  return output;\n}\n\nat::Tensor gather_points_grad(at::Tensor grad_out, at::Tensor idx,\n                              const int n) {\n  CHECK_CONTIGUOUS(grad_out);\n  CHECK_CONTIGUOUS(idx);\n  CHECK_IS_FLOAT(grad_out);\n  CHECK_IS_INT(idx);\n\n  if (grad_out.type().is_cuda()) {\n    CHECK_CUDA(idx);\n  }\n\n  at::Tensor output =\n      torch::zeros({grad_out.size(0), grad_out.size(1), n},\n                   at::device(grad_out.device()).dtype(at::ScalarType::Float));\n\n  if (grad_out.type().is_cuda()) {\n    gather_points_grad_kernel_wrapper(grad_out.size(0), grad_out.size(1), n,\n                                      idx.size(1), grad_out.data<float>(),\n                                      idx.data<int>(), output.data<float>());\n  } else {\n    TORCH_CHECK(false, \"CPU not supported\");\n  }\n\n  return output;\n}\nat::Tensor furthest_point_sampling(at::Tensor points, const int nsamples) {\n  CHECK_CONTIGUOUS(points);\n  CHECK_IS_FLOAT(points);\n\n  at::Tensor output =\n      torch::zeros({points.size(0), nsamples},\n                   at::device(points.device()).dtype(at::ScalarType::Int));\n\n  at::Tensor tmp =\n      torch::full({points.size(0), points.size(1)}, 1e10,\n                  at::device(points.device()).dtype(at::ScalarType::Float));\n\n  if (points.type().is_cuda()) {\n    furthest_point_sampling_kernel_wrapper(\n        points.size(0), points.size(1), nsamples, points.data<float>(),\n        tmp.data<float>(), output.data<int>());\n  } else {\n    TORCH_CHECK(false, \"CPU not supported\");\n  }\n\n  return output;\n}\n"
  },
  {
    "path": "pointnet2/_ext_src/src/sampling_gpu.cu",
    "content": "// Copyright (c) Facebook, Inc. and its affiliates.\n// \n// This source code is licensed under the MIT license found in the\n// LICENSE file in the root directory of this source tree.\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"cuda_utils.h\"\n\n// input: points(b, c, n) idx(b, m)\n// output: out(b, c, m)\n__global__ void gather_points_kernel(int b, int c, int n, int m,\n                                     const float *__restrict__ points,\n                                     const int *__restrict__ idx,\n                                     float *__restrict__ out) {\n  for (int i = blockIdx.x; i < b; i += gridDim.x) {\n    for (int l = blockIdx.y; l < c; l += gridDim.y) {\n      for (int j = threadIdx.x; j < m; j += blockDim.x) {\n        int a = idx[i * m + j];\n        out[(i * c + l) * m + j] = points[(i * c + l) * n + a];\n      }\n    }\n  }\n}\n\nvoid gather_points_kernel_wrapper(int b, int c, int n, int npoints,\n                                  const float *points, const int *idx,\n                                  float *out) {\n  gather_points_kernel<<<dim3(b, c, 1), opt_n_threads(npoints), 0,\n                         at::cuda::getCurrentCUDAStream()>>>(b, c, n, npoints,\n                                                             points, idx, out);\n\n  CUDA_CHECK_ERRORS();\n}\n\n// input: grad_out(b, c, m) idx(b, m)\n// output: grad_points(b, c, n)\n__global__ void gather_points_grad_kernel(int b, int c, int n, int m,\n                                          const float *__restrict__ grad_out,\n                                          const int *__restrict__ idx,\n                                          float *__restrict__ grad_points) {\n  for (int i = blockIdx.x; i < b; i += gridDim.x) {\n    for (int l = blockIdx.y; l < c; l += gridDim.y) {\n      for (int j = threadIdx.x; j < m; j += blockDim.x) {\n        int a = idx[i * m + j];\n        atomicAdd(grad_points + (i * c + l) * n + a,\n                  grad_out[(i * c + l) * m + j]);\n      }\n    }\n  }\n}\n\nvoid gather_points_grad_kernel_wrapper(int b, int c, int n, int npoints,\n                                       const float *grad_out, const int *idx,\n                                       float *grad_points) {\n  gather_points_grad_kernel<<<dim3(b, c, 1), opt_n_threads(npoints), 0,\n                              at::cuda::getCurrentCUDAStream()>>>(\n      b, c, n, npoints, grad_out, idx, grad_points);\n\n  CUDA_CHECK_ERRORS();\n}\n\n__device__ void __update(float *__restrict__ dists, int *__restrict__ dists_i,\n                         int idx1, int idx2) {\n  const float v1 = dists[idx1], v2 = dists[idx2];\n  const int i1 = dists_i[idx1], i2 = dists_i[idx2];\n  dists[idx1] = max(v1, v2);\n  dists_i[idx1] = v2 > v1 ? i2 : i1;\n}\n\n// Input dataset: (b, n, 3), tmp: (b, n)\n// Ouput idxs (b, m)\ntemplate <unsigned int block_size>\n__global__ void furthest_point_sampling_kernel(\n    int b, int n, int m, const float *__restrict__ dataset,\n    float *__restrict__ temp, int *__restrict__ idxs) {\n  if (m <= 0) return;\n  __shared__ float dists[block_size];\n  __shared__ int dists_i[block_size];\n\n  int batch_index = blockIdx.x;\n  dataset += batch_index * n * 3;\n  temp += batch_index * n;\n  idxs += batch_index * m;\n\n  int tid = threadIdx.x;\n  const int stride = block_size;\n\n  int old = 0;\n  if (threadIdx.x == 0) idxs[0] = old;\n\n  __syncthreads();\n  for (int j = 1; j < m; j++) {\n    int besti = 0;\n    float best = -1;\n    float x1 = dataset[old * 3 + 0];\n    float y1 = dataset[old * 3 + 1];\n    float z1 = dataset[old * 3 + 2];\n    for (int k = tid; k < n; k += stride) {\n      float x2, y2, z2;\n      x2 = dataset[k * 3 + 0];\n      y2 = dataset[k * 3 + 1];\n      z2 = dataset[k * 3 + 2];\n      float mag = (x2 * x2) + (y2 * y2) + (z2 * z2);\n      if (mag <= 1e-3) continue;\n\n      float d =\n          (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + (z2 - z1) * (z2 - z1);\n\n      float d2 = min(d, temp[k]);\n      temp[k] = d2;\n      besti = d2 > best ? k : besti;\n      best = d2 > best ? d2 : best;\n    }\n    dists[tid] = best;\n    dists_i[tid] = besti;\n    __syncthreads();\n\n    if (block_size >= 512) {\n      if (tid < 256) {\n        __update(dists, dists_i, tid, tid + 256);\n      }\n      __syncthreads();\n    }\n    if (block_size >= 256) {\n      if (tid < 128) {\n        __update(dists, dists_i, tid, tid + 128);\n      }\n      __syncthreads();\n    }\n    if (block_size >= 128) {\n      if (tid < 64) {\n        __update(dists, dists_i, tid, tid + 64);\n      }\n      __syncthreads();\n    }\n    if (block_size >= 64) {\n      if (tid < 32) {\n        __update(dists, dists_i, tid, tid + 32);\n      }\n      __syncthreads();\n    }\n    if (block_size >= 32) {\n      if (tid < 16) {\n        __update(dists, dists_i, tid, tid + 16);\n      }\n      __syncthreads();\n    }\n    if (block_size >= 16) {\n      if (tid < 8) {\n        __update(dists, dists_i, tid, tid + 8);\n      }\n      __syncthreads();\n    }\n    if (block_size >= 8) {\n      if (tid < 4) {\n        __update(dists, dists_i, tid, tid + 4);\n      }\n      __syncthreads();\n    }\n    if (block_size >= 4) {\n      if (tid < 2) {\n        __update(dists, dists_i, tid, tid + 2);\n      }\n      __syncthreads();\n    }\n    if (block_size >= 2) {\n      if (tid < 1) {\n        __update(dists, dists_i, tid, tid + 1);\n      }\n      __syncthreads();\n    }\n\n    old = dists_i[0];\n    if (tid == 0) idxs[j] = old;\n  }\n}\n\nvoid furthest_point_sampling_kernel_wrapper(int b, int n, int m,\n                                            const float *dataset, float *temp,\n                                            int *idxs) {\n  unsigned int n_threads = opt_n_threads(n);\n\n  cudaStream_t stream = at::cuda::getCurrentCUDAStream();\n\n  switch (n_threads) {\n    case 512:\n      furthest_point_sampling_kernel<512>\n          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);\n      break;\n    case 256:\n      furthest_point_sampling_kernel<256>\n          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);\n      break;\n    case 128:\n      furthest_point_sampling_kernel<128>\n          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);\n      break;\n    case 64:\n      furthest_point_sampling_kernel<64>\n          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);\n      break;\n    case 32:\n      furthest_point_sampling_kernel<32>\n          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);\n      break;\n    case 16:\n      furthest_point_sampling_kernel<16>\n          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);\n      break;\n    case 8:\n      furthest_point_sampling_kernel<8>\n          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);\n      break;\n    case 4:\n      furthest_point_sampling_kernel<4>\n          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);\n      break;\n    case 2:\n      furthest_point_sampling_kernel<2>\n          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);\n      break;\n    case 1:\n      furthest_point_sampling_kernel<1>\n          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);\n      break;\n    default:\n      furthest_point_sampling_kernel<512>\n          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);\n  }\n\n  CUDA_CHECK_ERRORS();\n}\n"
  },
  {
    "path": "pointnet2/pointnet2_modules.py",
    "content": "# Copyright (c) Facebook, Inc. and its affiliates.\n# \n# This source code is licensed under the MIT license found in the\n# LICENSE file in the root directory of this source tree.\n\n''' Pointnet2 layers.\nModified based on: https://github.com/erikwijmans/Pointnet2_PyTorch\nExtended with the following:\n1. Uniform sampling in each local region (sample_uniformly)\n2. Return sampled points indices to support votenet.\n'''\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nimport os\nimport sys\nBASE_DIR = os.path.dirname(os.path.abspath(__file__))\nsys.path.append(BASE_DIR)\n\nimport pointnet2_utils\nimport pytorch_utils as pt_utils\nfrom typing import List\n\n\nclass _PointnetSAModuleBase(nn.Module):\n\n    def __init__(self):\n        super().__init__()\n        self.npoint = None\n        self.groupers = None\n        self.mlps = None\n\n    def forward(self, xyz: torch.Tensor,\n                features: torch.Tensor = None) -> (torch.Tensor, torch.Tensor):\n        r\"\"\"\n        Parameters\n        ----------\n        xyz : torch.Tensor\n            (B, N, 3) tensor of the xyz coordinates of the features\n        features : torch.Tensor\n            (B, N, C) tensor of the descriptors of the the features\n\n        Returns\n        -------\n        new_xyz : torch.Tensor\n            (B, npoint, 3) tensor of the new features' xyz\n        new_features : torch.Tensor\n            (B, npoint, \\sum_k(mlps[k][-1])) tensor of the new_features descriptors\n        \"\"\"\n\n        new_features_list = []\n\n        xyz_flipped = xyz.transpose(1, 2).contiguous()\n        new_xyz = pointnet2_utils.gather_operation(\n            xyz_flipped,\n            pointnet2_utils.furthest_point_sample(xyz, self.npoint)\n        ).transpose(1, 2).contiguous() if self.npoint is not None else None\n\n        for i in range(len(self.groupers)):\n            new_features = self.groupers[i](\n                xyz, new_xyz, features\n            )  # (B, C, npoint, nsample)\n\n            new_features = self.mlps[i](\n                new_features\n            )  # (B, mlp[-1], npoint, nsample)\n            new_features = F.max_pool2d(\n                new_features, kernel_size=[1, new_features.size(3)]\n            )  # (B, mlp[-1], npoint, 1)\n            new_features = new_features.squeeze(-1)  # (B, mlp[-1], npoint)\n\n            new_features_list.append(new_features)\n\n        return new_xyz, torch.cat(new_features_list, dim=1)\n\n\nclass PointnetSAModuleMSG(_PointnetSAModuleBase):\n    r\"\"\"Pointnet set abstrction layer with multiscale grouping\n\n    Parameters\n    ----------\n    npoint : int\n        Number of features\n    radii : list of float32\n        list of radii to group with\n    nsamples : list of int32\n        Number of samples in each ball query\n    mlps : list of list of int32\n        Spec of the pointnet before the global max_pool for each scale\n    bn : bool\n        Use batchnorm\n    \"\"\"\n\n    def __init__(\n            self,\n            *,\n            npoint: int,\n            radii: List[float],\n            nsamples: List[int],\n            mlps: List[List[int]],\n            bn: bool = True,\n            use_xyz: bool = True, \n            sample_uniformly: bool = False\n    ):\n        super().__init__()\n\n        assert len(radii) == len(nsamples) == len(mlps)\n\n        self.npoint = npoint\n        self.groupers = nn.ModuleList()\n        self.mlps = nn.ModuleList()\n        for i in range(len(radii)):\n            radius = radii[i]\n            nsample = nsamples[i]\n            self.groupers.append(\n                pointnet2_utils.QueryAndGroup(radius, nsample, use_xyz=use_xyz, sample_uniformly=sample_uniformly)\n                if npoint is not None else pointnet2_utils.GroupAll(use_xyz)\n            )\n            mlp_spec = mlps[i]\n            if use_xyz:\n                mlp_spec[0] += 3\n\n            self.mlps.append(pt_utils.SharedMLP(mlp_spec, bn=bn))\n\n\nclass PointnetSAModule(PointnetSAModuleMSG):\n    r\"\"\"Pointnet set abstrction layer\n\n    Parameters\n    ----------\n    npoint : int\n        Number of features\n    radius : float\n        Radius of ball\n    nsample : int\n        Number of samples in the ball query\n    mlp : list\n        Spec of the pointnet before the global max_pool\n    bn : bool\n        Use batchnorm\n    \"\"\"\n\n    def __init__(\n            self,\n            *,\n            mlp: List[int],\n            npoint: int = None,\n            radius: float = None,\n            nsample: int = None,\n            bn: bool = True,\n            use_xyz: bool = True\n    ):\n        super().__init__(\n            mlps=[mlp],\n            npoint=npoint,\n            radii=[radius],\n            nsamples=[nsample],\n            bn=bn,\n            use_xyz=use_xyz\n        )\n\n\nclass PointnetSAModuleVotes(nn.Module):\n    ''' Modified based on _PointnetSAModuleBase and PointnetSAModuleMSG\n    with extra support for returning point indices for getting their GT votes '''\n\n    def __init__(\n            self,\n            *,\n            mlp: List[int],\n            npoint: int = None,\n            radius: float = None,\n            nsample: int = None,\n            bn: bool = True,\n            use_xyz: bool = True,\n            pooling: str = 'max',\n            sigma: float = None, # for RBF pooling\n            normalize_xyz: bool = False, # noramlize local XYZ with radius\n            sample_uniformly: bool = False,\n            ret_unique_cnt: bool = False\n    ):\n        super().__init__()\n\n        self.npoint = npoint\n        self.radius = radius\n        self.nsample = nsample\n        self.pooling = pooling\n        self.mlp_module = None\n        self.use_xyz = use_xyz\n        self.sigma = sigma\n        if self.sigma is None:\n            self.sigma = self.radius/2\n        self.normalize_xyz = normalize_xyz\n        self.ret_unique_cnt = ret_unique_cnt\n\n        if npoint is not None:\n            self.grouper = pointnet2_utils.QueryAndGroup(radius, nsample,\n                use_xyz=use_xyz, ret_grouped_xyz=True, normalize_xyz=normalize_xyz,\n                sample_uniformly=sample_uniformly, ret_unique_cnt=ret_unique_cnt)\n        else:\n            self.grouper = pointnet2_utils.GroupAll(use_xyz, ret_grouped_xyz=True)\n\n        mlp_spec = mlp\n        if use_xyz and len(mlp_spec)>0:\n            mlp_spec[0] += 3\n        self.mlp_module = pt_utils.SharedMLP(mlp_spec, bn=bn)\n\n\n    def forward(self, xyz: torch.Tensor,\n                features: torch.Tensor = None,\n                inds: torch.Tensor = None) -> (torch.Tensor, torch.Tensor):\n        r\"\"\"\n        Parameters\n        ----------\n        xyz : torch.Tensor\n            (B, N, 3) tensor of the xyz coordinates of the features\n        features : torch.Tensor\n            (B, C, N) tensor of the descriptors of the the features\n        inds : torch.Tensor\n            (B, npoint) tensor that stores index to the xyz points (values in 0-N-1)\n\n        Returns\n        -------\n        new_xyz : torch.Tensor\n            (B, npoint, 3) tensor of the new features' xyz\n        new_features : torch.Tensor\n            (B, \\sum_k(mlps[k][-1]), npoint) tensor of the new_features descriptors\n        inds: torch.Tensor\n            (B, npoint) tensor of the inds\n        \"\"\"\n\n        xyz_flipped = xyz.transpose(1, 2).contiguous()\n        if inds is None:\n            inds = pointnet2_utils.furthest_point_sample(xyz, self.npoint)\n        else:\n            assert(inds.shape[1] == self.npoint)\n        new_xyz = pointnet2_utils.gather_operation(\n            xyz_flipped, inds\n        ).transpose(1, 2).contiguous() if self.npoint is not None else None\n\n        if not self.ret_unique_cnt:\n            grouped_features, grouped_xyz = self.grouper(\n                xyz, new_xyz, features\n            )  # (B, C, npoint, nsample)\n        else:\n            grouped_features, grouped_xyz, unique_cnt = self.grouper(\n                xyz, new_xyz, features\n            )  # (B, C, npoint, nsample), (B,3,npoint,nsample), (B,npoint)\n\n        new_features = self.mlp_module(\n            grouped_features\n        )  # (B, mlp[-1], npoint, nsample)\n        if self.pooling == 'max':\n            new_features = F.max_pool2d(\n                new_features, kernel_size=[1, new_features.size(3)]\n            )  # (B, mlp[-1], npoint, 1)\n        elif self.pooling == 'avg':\n            new_features = F.avg_pool2d(\n                new_features, kernel_size=[1, new_features.size(3)]\n            )  # (B, mlp[-1], npoint, 1)\n        elif self.pooling == 'rbf': \n            # Use radial basis function kernel for weighted sum of features (normalized by nsample and sigma)\n            # Ref: https://en.wikipedia.org/wiki/Radial_basis_function_kernel\n            rbf = torch.exp(-1 * grouped_xyz.pow(2).sum(1,keepdim=False) / (self.sigma**2) / 2) # (B, npoint, nsample)\n            new_features = torch.sum(new_features * rbf.unsqueeze(1), -1, keepdim=True) / float(self.nsample) # (B, mlp[-1], npoint, 1)\n        new_features = new_features.squeeze(-1)  # (B, mlp[-1], npoint)\n\n        if not self.ret_unique_cnt:\n            return new_xyz, new_features, inds\n        else:\n            return new_xyz, new_features, inds, unique_cnt\n\nclass PointnetSAModuleMSGVotes(nn.Module):\n    ''' Modified based on _PointnetSAModuleBase and PointnetSAModuleMSG\n    with extra support for returning point indices for getting their GT votes '''\n\n    def __init__(\n            self,\n            *,\n            mlps: List[List[int]],\n            npoint: int,\n            radii: List[float],\n            nsamples: List[int],\n            bn: bool = True,\n            use_xyz: bool = True,\n            sample_uniformly: bool = False\n    ):\n        super().__init__()\n\n        assert(len(mlps) == len(nsamples) == len(radii))\n\n        self.npoint = npoint\n        self.groupers = nn.ModuleList()\n        self.mlps = nn.ModuleList()\n        for i in range(len(radii)):\n            radius = radii[i]\n            nsample = nsamples[i]\n            self.groupers.append(\n                pointnet2_utils.QueryAndGroup(radius, nsample, use_xyz=use_xyz, sample_uniformly=sample_uniformly)\n                if npoint is not None else pointnet2_utils.GroupAll(use_xyz)\n            )\n            mlp_spec = mlps[i]\n            if use_xyz:\n                mlp_spec[0] += 3\n\n            self.mlps.append(pt_utils.SharedMLP(mlp_spec, bn=bn))\n\n    def forward(self, xyz: torch.Tensor,\n                features: torch.Tensor = None, inds: torch.Tensor = None) -> (torch.Tensor, torch.Tensor):\n        r\"\"\"\n        Parameters\n        ----------\n        xyz : torch.Tensor\n            (B, N, 3) tensor of the xyz coordinates of the features\n        features : torch.Tensor\n            (B, C, C) tensor of the descriptors of the the features\n        inds : torch.Tensor\n            (B, npoint) tensor that stores index to the xyz points (values in 0-N-1)\n\n        Returns\n        -------\n        new_xyz : torch.Tensor\n            (B, npoint, 3) tensor of the new features' xyz\n        new_features : torch.Tensor\n            (B, \\sum_k(mlps[k][-1]), npoint) tensor of the new_features descriptors\n        inds: torch.Tensor\n            (B, npoint) tensor of the inds\n        \"\"\"\n        new_features_list = []\n\n        xyz_flipped = xyz.transpose(1, 2).contiguous()\n        if inds is None:\n            inds = pointnet2_utils.furthest_point_sample(xyz, self.npoint)\n        new_xyz = pointnet2_utils.gather_operation(\n            xyz_flipped, inds\n        ).transpose(1, 2).contiguous() if self.npoint is not None else None\n\n        for i in range(len(self.groupers)):\n            new_features = self.groupers[i](\n                xyz, new_xyz, features\n            )  # (B, C, npoint, nsample)\n            new_features = self.mlps[i](\n                new_features\n            )  # (B, mlp[-1], npoint, nsample)\n            new_features = F.max_pool2d(\n                new_features, kernel_size=[1, new_features.size(3)]\n            )  # (B, mlp[-1], npoint, 1)\n            new_features = new_features.squeeze(-1)  # (B, mlp[-1], npoint)\n\n            new_features_list.append(new_features)\n\n        return new_xyz, torch.cat(new_features_list, dim=1), inds\n\n\nclass PointnetFPModule(nn.Module):\n    r\"\"\"Propigates the features of one set to another\n\n    Parameters\n    ----------\n    mlp : list\n        Pointnet module parameters\n    bn : bool\n        Use batchnorm\n    \"\"\"\n\n    def __init__(self, *, mlp: List[int], bn: bool = True):\n        super().__init__()\n        self.mlp = pt_utils.SharedMLP(mlp, bn=bn)\n\n    def forward(\n            self, unknown: torch.Tensor, known: torch.Tensor,\n            unknow_feats: torch.Tensor, known_feats: torch.Tensor\n    ) -> torch.Tensor:\n        r\"\"\"\n        Parameters\n        ----------\n        unknown : torch.Tensor\n            (B, n, 3) tensor of the xyz positions of the unknown features\n        known : torch.Tensor\n            (B, m, 3) tensor of the xyz positions of the known features\n        unknow_feats : torch.Tensor\n            (B, C1, n) tensor of the features to be propigated to\n        known_feats : torch.Tensor\n            (B, C2, m) tensor of features to be propigated\n\n        Returns\n        -------\n        new_features : torch.Tensor\n            (B, mlp[-1], n) tensor of the features of the unknown features\n        \"\"\"\n\n        if known is not None:\n            dist, idx = pointnet2_utils.three_nn(unknown, known)\n            dist_recip = 1.0 / (dist + 1e-8)\n            norm = torch.sum(dist_recip, dim=2, keepdim=True)\n            weight = dist_recip / norm\n\n            interpolated_feats = pointnet2_utils.three_interpolate(\n                known_feats, idx, weight\n            )\n        else:\n            interpolated_feats = known_feats.expand(\n                *known_feats.size()[0:2], unknown.size(1)\n            )\n\n        if unknow_feats is not None:\n            new_features = torch.cat([interpolated_feats, unknow_feats],\n                                   dim=1)  #(B, C2 + C1, n)\n        else:\n            new_features = interpolated_feats\n\n        new_features = new_features.unsqueeze(-1)\n        new_features = self.mlp(new_features)\n\n        return new_features.squeeze(-1)\n\nclass PointnetLFPModuleMSG(nn.Module):\n    ''' Modified based on _PointnetSAModuleBase and PointnetSAModuleMSG\n    learnable feature propagation layer.'''\n\n    def __init__(\n            self,\n            *,\n            mlps: List[List[int]],\n            radii: List[float],\n            nsamples: List[int],\n            post_mlp: List[int],\n            bn: bool = True,\n            use_xyz: bool = True,\n            sample_uniformly: bool = False\n    ):\n        super().__init__()\n\n        assert(len(mlps) == len(nsamples) == len(radii))\n        \n        self.post_mlp = pt_utils.SharedMLP(post_mlp, bn=bn)\n\n        self.groupers = nn.ModuleList()\n        self.mlps = nn.ModuleList()\n        for i in range(len(radii)):\n            radius = radii[i]\n            nsample = nsamples[i]\n            self.groupers.append(\n                pointnet2_utils.QueryAndGroup(radius, nsample, use_xyz=use_xyz,\n                    sample_uniformly=sample_uniformly)\n            )\n            mlp_spec = mlps[i]\n            if use_xyz:\n                mlp_spec[0] += 3\n\n            self.mlps.append(pt_utils.SharedMLP(mlp_spec, bn=bn))\n\n    def forward(self, xyz2: torch.Tensor, xyz1: torch.Tensor,\n                features2: torch.Tensor, features1: torch.Tensor) -> torch.Tensor:\n        r\"\"\" Propagate features from xyz1 to xyz2.\n        Parameters\n        ----------\n        xyz2 : torch.Tensor\n            (B, N2, 3) tensor of the xyz coordinates of the features\n        xyz1 : torch.Tensor\n            (B, N1, 3) tensor of the xyz coordinates of the features\n        features2 : torch.Tensor\n            (B, C2, N2) tensor of the descriptors of the the features\n        features1 : torch.Tensor\n            (B, C1, N1) tensor of the descriptors of the the features\n\n        Returns\n        -------\n        new_features1 : torch.Tensor\n            (B, \\sum_k(mlps[k][-1]), N1) tensor of the new_features descriptors\n        \"\"\"\n        new_features_list = []\n\n        for i in range(len(self.groupers)):\n            new_features = self.groupers[i](\n                xyz1, xyz2, features1\n            )  # (B, C1, N2, nsample)\n            new_features = self.mlps[i](\n                new_features\n            )  # (B, mlp[-1], N2, nsample)\n            new_features = F.max_pool2d(\n                new_features, kernel_size=[1, new_features.size(3)]\n            )  # (B, mlp[-1], N2, 1)\n            new_features = new_features.squeeze(-1)  # (B, mlp[-1], N2)\n\n            if features2 is not None:\n                new_features = torch.cat([new_features, features2],\n                                           dim=1)  #(B, mlp[-1] + C2, N2)\n\n            new_features = new_features.unsqueeze(-1)\n            new_features = self.post_mlp(new_features)\n\n            new_features_list.append(new_features)\n\n        return torch.cat(new_features_list, dim=1).squeeze(-1)\n\n\nif __name__ == \"__main__\":\n    from torch.autograd import Variable\n    torch.manual_seed(1)\n    torch.cuda.manual_seed_all(1)\n    xyz = Variable(torch.randn(2, 9, 3).cuda(), requires_grad=True)\n    xyz_feats = Variable(torch.randn(2, 9, 6).cuda(), requires_grad=True)\n\n    test_module = PointnetSAModuleMSG(\n        npoint=2, radii=[5.0, 10.0], nsamples=[6, 3], mlps=[[9, 3], [9, 6]]\n    )\n    test_module.cuda()\n    print(test_module(xyz, xyz_feats))\n\n    for _ in range(1):\n        _, new_features = test_module(xyz, xyz_feats)\n        new_features.backward(\n            torch.cuda.FloatTensor(*new_features.size()).fill_(1)\n        )\n        print(new_features)\n        print(xyz.grad)\n"
  },
  {
    "path": "pointnet2/pointnet2_utils.py",
    "content": "# Copyright (c) Facebook, Inc. and its affiliates.\n# \n# This source code is licensed under the MIT license found in the\n# LICENSE file in the root directory of this source tree.\n\n''' Modified based on: https://github.com/erikwijmans/Pointnet2_PyTorch '''\nfrom __future__ import (\n    division,\n    absolute_import,\n    with_statement,\n    print_function,\n    unicode_literals,\n)\nimport torch\nfrom torch.autograd import Function\nimport torch.nn as nn\nimport pytorch_utils as pt_utils\nimport sys\n\ntry:\n    import builtins\nexcept:\n    import __builtin__ as builtins\n\ntry:\n    import pointnet2._ext as _ext\nexcept ImportError:\n    if not getattr(builtins, \"__POINTNET2_SETUP__\", False):\n        raise ImportError(\n            \"Could not import _ext module.\\n\"\n            \"Please see the setup instructions in the README: \"\n            \"https://github.com/erikwijmans/Pointnet2_PyTorch/blob/master/README.rst\"\n        )\n\nif False:\n    # Workaround for type hints without depending on the `typing` module\n    from typing import *\n\n\nclass RandomDropout(nn.Module):\n    def __init__(self, p=0.5, inplace=False):\n        super(RandomDropout, self).__init__()\n        self.p = p\n        self.inplace = inplace\n\n    def forward(self, X):\n        theta = torch.Tensor(1).uniform_(0, self.p)[0]\n        return pt_utils.feature_dropout_no_scaling(X, theta, self.train, self.inplace)\n\n\nclass FurthestPointSampling(Function):\n    @staticmethod\n    def forward(ctx, xyz, npoint):\n        # type: (Any, torch.Tensor, int) -> torch.Tensor\n        r\"\"\"\n        Uses iterative furthest point sampling to select a set of npoint features that have the largest\n        minimum distance\n\n        Parameters\n        ----------\n        xyz : torch.Tensor\n            (B, N, 3) tensor where N > npoint\n        npoint : int32\n            number of features in the sampled set\n\n        Returns\n        -------\n        torch.Tensor\n            (B, npoint) tensor containing the set\n        \"\"\"\n        return _ext.furthest_point_sampling(xyz, npoint)\n\n    @staticmethod\n    def backward(xyz, a=None):\n        return None, None\n\n\nfurthest_point_sample = FurthestPointSampling.apply\n\n\nclass GatherOperation(Function):\n    @staticmethod\n    def forward(ctx, features, idx):\n        # type: (Any, torch.Tensor, torch.Tensor) -> torch.Tensor\n        r\"\"\"\n\n        Parameters\n        ----------\n        features : torch.Tensor\n            (B, C, N) tensor\n\n        idx : torch.Tensor\n            (B, npoint) tensor of the features to gather\n\n        Returns\n        -------\n        torch.Tensor\n            (B, C, npoint) tensor\n        \"\"\"\n\n        _, C, N = features.size()\n\n        ctx.for_backwards = (idx, C, N)\n\n        return _ext.gather_points(features, idx)\n\n    @staticmethod\n    def backward(ctx, grad_out):\n        idx, C, N = ctx.for_backwards\n\n        grad_features = _ext.gather_points_grad(grad_out.contiguous(), idx, N)\n        return grad_features, None\n\n\ngather_operation = GatherOperation.apply\n\n\nclass ThreeNN(Function):\n    @staticmethod\n    def forward(ctx, unknown, known):\n        # type: (Any, torch.Tensor, torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]\n        r\"\"\"\n            Find the three nearest neighbors of unknown in known\n        Parameters\n        ----------\n        unknown : torch.Tensor\n            (B, n, 3) tensor of known features\n        known : torch.Tensor\n            (B, m, 3) tensor of unknown features\n\n        Returns\n        -------\n        dist : torch.Tensor\n            (B, n, 3) l2 distance to the three nearest neighbors\n        idx : torch.Tensor\n            (B, n, 3) index of 3 nearest neighbors\n        \"\"\"\n        dist2, idx = _ext.three_nn(unknown, known)\n\n        return torch.sqrt(dist2), idx\n\n    @staticmethod\n    def backward(ctx, a=None, b=None):\n        return None, None\n\n\nthree_nn = ThreeNN.apply\n\n\nclass ThreeInterpolate(Function):\n    @staticmethod\n    def forward(ctx, features, idx, weight):\n        # type(Any, torch.Tensor, torch.Tensor, torch.Tensor) -> Torch.Tensor\n        r\"\"\"\n            Performs weight linear interpolation on 3 features\n        Parameters\n        ----------\n        features : torch.Tensor\n            (B, c, m) Features descriptors to be interpolated from\n        idx : torch.Tensor\n            (B, n, 3) three nearest neighbors of the target features in features\n        weight : torch.Tensor\n            (B, n, 3) weights\n\n        Returns\n        -------\n        torch.Tensor\n            (B, c, n) tensor of the interpolated features\n        \"\"\"\n        B, c, m = features.size()\n        n = idx.size(1)\n\n        ctx.three_interpolate_for_backward = (idx, weight, m)\n\n        return _ext.three_interpolate(features, idx, weight)\n\n    @staticmethod\n    def backward(ctx, grad_out):\n        # type: (Any, torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]\n        r\"\"\"\n        Parameters\n        ----------\n        grad_out : torch.Tensor\n            (B, c, n) tensor with gradients of ouputs\n\n        Returns\n        -------\n        grad_features : torch.Tensor\n            (B, c, m) tensor with gradients of features\n\n        None\n\n        None\n        \"\"\"\n        idx, weight, m = ctx.three_interpolate_for_backward\n\n        grad_features = _ext.three_interpolate_grad(\n            grad_out.contiguous(), idx, weight, m\n        )\n\n        return grad_features, None, None\n\n\nthree_interpolate = ThreeInterpolate.apply\n\n\nclass GroupingOperation(Function):\n    @staticmethod\n    def forward(ctx, features, idx):\n        # type: (Any, torch.Tensor, torch.Tensor) -> torch.Tensor\n        r\"\"\"\n\n        Parameters\n        ----------\n        features : torch.Tensor\n            (B, C, N) tensor of features to group\n        idx : torch.Tensor\n            (B, npoint, nsample) tensor containing the indicies of features to group with\n\n        Returns\n        -------\n        torch.Tensor\n            (B, C, npoint, nsample) tensor\n        \"\"\"\n        B, nfeatures, nsample = idx.size()\n        _, C, N = features.size()\n\n        ctx.for_backwards = (idx, N)\n\n        return _ext.group_points(features, idx)\n\n    @staticmethod\n    def backward(ctx, grad_out):\n        # type: (Any, torch.tensor) -> Tuple[torch.Tensor, torch.Tensor]\n        r\"\"\"\n\n        Parameters\n        ----------\n        grad_out : torch.Tensor\n            (B, C, npoint, nsample) tensor of the gradients of the output from forward\n\n        Returns\n        -------\n        torch.Tensor\n            (B, C, N) gradient of the features\n        None\n        \"\"\"\n        idx, N = ctx.for_backwards\n\n        grad_features = _ext.group_points_grad(grad_out.contiguous(), idx, N)\n\n        return grad_features, None\n\n\ngrouping_operation = GroupingOperation.apply\n\n\nclass BallQuery(Function):\n    @staticmethod\n    def forward(ctx, radius, nsample, xyz, new_xyz):\n        # type: (Any, float, int, torch.Tensor, torch.Tensor) -> torch.Tensor\n        r\"\"\"\n\n        Parameters\n        ----------\n        radius : float\n            radius of the balls\n        nsample : int\n            maximum number of features in the balls\n        xyz : torch.Tensor\n            (B, N, 3) xyz coordinates of the features\n        new_xyz : torch.Tensor\n            (B, npoint, 3) centers of the ball query\n\n        Returns\n        -------\n        torch.Tensor\n            (B, npoint, nsample) tensor with the indicies of the features that form the query balls\n        \"\"\"\n        return _ext.ball_query(new_xyz, xyz, radius, nsample)\n\n    @staticmethod\n    def backward(ctx, a=None):\n        return None, None, None, None\n\n\nball_query = BallQuery.apply\n\n\nclass QueryAndGroup(nn.Module):\n    r\"\"\"\n    Groups with a ball query of radius\n\n    Parameters\n    ---------\n    radius : float32\n        Radius of ball\n    nsample : int32\n        Maximum number of features to gather in the ball\n    \"\"\"\n\n    def __init__(self, radius, nsample, use_xyz=True, ret_grouped_xyz=False, normalize_xyz=False, sample_uniformly=False, ret_unique_cnt=False):\n        # type: (QueryAndGroup, float, int, bool) -> None\n        super(QueryAndGroup, self).__init__()\n        self.radius, self.nsample, self.use_xyz = radius, nsample, use_xyz\n        self.ret_grouped_xyz = ret_grouped_xyz\n        self.normalize_xyz = normalize_xyz\n        self.sample_uniformly = sample_uniformly\n        self.ret_unique_cnt = ret_unique_cnt\n        if self.ret_unique_cnt:\n            assert(self.sample_uniformly)\n\n    def forward(self, xyz, new_xyz, features=None):\n        # type: (QueryAndGroup, torch.Tensor. torch.Tensor, torch.Tensor) -> Tuple[Torch.Tensor]\n        r\"\"\"\n        Parameters\n        ----------\n        xyz : torch.Tensor\n            xyz coordinates of the features (B, N, 3)\n        new_xyz : torch.Tensor\n            centriods (B, npoint, 3)\n        features : torch.Tensor\n            Descriptors of the features (B, C, N)\n\n        Returns\n        -------\n        new_features : torch.Tensor\n            (B, 3 + C, npoint, nsample) tensor\n        \"\"\"\n        idx = ball_query(self.radius, self.nsample, xyz, new_xyz)\n\n        if self.sample_uniformly:\n            unique_cnt = torch.zeros((idx.shape[0], idx.shape[1]))\n            for i_batch in range(idx.shape[0]):\n                for i_region in range(idx.shape[1]):\n                    unique_ind = torch.unique(idx[i_batch, i_region, :])\n                    num_unique = unique_ind.shape[0]\n                    unique_cnt[i_batch, i_region] = num_unique\n                    sample_ind = torch.randint(0, num_unique, (self.nsample - num_unique,), dtype=torch.long)\n                    all_ind = torch.cat((unique_ind, unique_ind[sample_ind]))\n                    idx[i_batch, i_region, :] = all_ind\n\n\n        xyz_trans = xyz.transpose(1, 2).contiguous()\n        grouped_xyz = grouping_operation(xyz_trans, idx)  # (B, 3, npoint, nsample)\n        grouped_xyz -= new_xyz.transpose(1, 2).unsqueeze(-1)\n        if self.normalize_xyz:\n            grouped_xyz /= self.radius\n\n        if features is not None:\n            grouped_features = grouping_operation(features, idx)\n            if self.use_xyz:\n                new_features = torch.cat(\n                    [grouped_xyz, grouped_features], dim=1\n                )  # (B, C + 3, npoint, nsample)\n            else:\n                new_features = grouped_features\n        else:\n            assert (\n                self.use_xyz\n            ), \"Cannot have not features and not use xyz as a feature!\"\n            new_features = grouped_xyz\n\n        ret = [new_features]\n        if self.ret_grouped_xyz:\n            ret.append(grouped_xyz)\n        if self.ret_unique_cnt:\n            ret.append(unique_cnt)\n        if len(ret) == 1:\n            return ret[0]\n        else:\n            return tuple(ret)\n\n\nclass GroupAll(nn.Module):\n    r\"\"\"\n    Groups all features\n\n    Parameters\n    ---------\n    \"\"\"\n\n    def __init__(self, use_xyz=True, ret_grouped_xyz=False):\n        # type: (GroupAll, bool) -> None\n        super(GroupAll, self).__init__()\n        self.use_xyz = use_xyz\n\n    def forward(self, xyz, new_xyz, features=None):\n        # type: (GroupAll, torch.Tensor, torch.Tensor, torch.Tensor) -> Tuple[torch.Tensor]\n        r\"\"\"\n        Parameters\n        ----------\n        xyz : torch.Tensor\n            xyz coordinates of the features (B, N, 3)\n        new_xyz : torch.Tensor\n            Ignored\n        features : torch.Tensor\n            Descriptors of the features (B, C, N)\n\n        Returns\n        -------\n        new_features : torch.Tensor\n            (B, C + 3, 1, N) tensor\n        \"\"\"\n\n        grouped_xyz = xyz.transpose(1, 2).unsqueeze(2)\n        if features is not None:\n            grouped_features = features.unsqueeze(2)\n            if self.use_xyz:\n                new_features = torch.cat(\n                    [grouped_xyz, grouped_features], dim=1\n                )  # (B, 3 + C, 1, N)\n            else:\n                new_features = grouped_features\n        else:\n            new_features = grouped_xyz\n\n        if self.ret_grouped_xyz:\n            return new_features, grouped_xyz\n        else:\n            return new_features\n\n\nclass CylinderQuery(Function):\n    @staticmethod\n    def forward(ctx, radius, hmin, hmax, nsample, xyz, new_xyz, rot):\n        # type: (Any, float, float, float, int, torch.Tensor, torch.Tensor, torch.Tensor) -> torch.Tensor\n        r\"\"\"\n\n        Parameters\n        ----------\n        radius : float\n            radius of the cylinders\n        hmin, hmax : float\n            endpoints of cylinder height in x-rotation axis\n        nsample : int\n            maximum number of features in the cylinders\n        xyz : torch.Tensor\n            (B, N, 3) xyz coordinates of the features\n        new_xyz : torch.Tensor\n            (B, npoint, 3) centers of the cylinder query\n        rot: torch.Tensor\n            (B, npoint, 9) flatten rotation matrices from\n                           cylinder frame to world frame\n\n        Returns\n        -------\n        torch.Tensor\n            (B, npoint, nsample) tensor with the indicies of the features that form the query balls\n        \"\"\"\n        return _ext.cylinder_query(new_xyz, xyz, rot, radius, hmin, hmax, nsample)\n\n    @staticmethod\n    def backward(ctx, a=None):\n        return None, None, None, None, None, None, None\n\n\ncylinder_query = CylinderQuery.apply\n\n\nclass CylinderQueryAndGroup(nn.Module):\n    r\"\"\"\n    Groups with a cylinder query of radius and height\n\n    Parameters\n    ---------\n    radius : float32\n        Radius of cylinder\n    hmin, hmax: float32\n        endpoints of cylinder height in x-rotation axis\n    nsample : int32\n        Maximum number of features to gather in the ball\n    \"\"\"\n\n    def __init__(self, radius, hmin, hmax, nsample, use_xyz=True, ret_grouped_xyz=False, normalize_xyz=False, rotate_xyz=True, sample_uniformly=False, ret_unique_cnt=False):\n        super(CylinderQueryAndGroup, self).__init__()\n        self.radius, self.nsample, self.hmin, self.hmax, = radius, nsample, hmin, hmax\n        self.use_xyz = use_xyz\n        self.ret_grouped_xyz = ret_grouped_xyz\n        self.normalize_xyz = normalize_xyz\n        self.rotate_xyz = rotate_xyz\n        self.sample_uniformly = sample_uniformly\n        self.ret_unique_cnt = ret_unique_cnt\n        if self.ret_unique_cnt:\n            assert(self.sample_uniformly)\n\n    def forward(self, xyz, new_xyz, rot, features=None):\n        r\"\"\"\n        Parameters\n        ----------\n        xyz : torch.Tensor\n            xyz coordinates of the features (B, N, 3)\n        new_xyz : torch.Tensor\n            centriods (B, npoint, 3)\n        rot : torch.Tensor\n            rotation matrices (B, npoint, 3, 3)\n        features : torch.Tensor\n            Descriptors of the features (B, C, N)\n\n        Returns\n        -------\n        new_features : torch.Tensor\n            (B, 3 + C, npoint, nsample) tensor\n        \"\"\"\n        B, npoint, _ = new_xyz.size()\n        idx = cylinder_query(self.radius, self.hmin, self.hmax, self.nsample, xyz, new_xyz, rot.view(B, npoint, 9))\n\n        if self.sample_uniformly:\n            unique_cnt = torch.zeros((idx.shape[0], idx.shape[1]))\n            for i_batch in range(idx.shape[0]):\n                for i_region in range(idx.shape[1]):\n                    unique_ind = torch.unique(idx[i_batch, i_region, :])\n                    num_unique = unique_ind.shape[0]\n                    unique_cnt[i_batch, i_region] = num_unique\n                    sample_ind = torch.randint(0, num_unique, (self.nsample - num_unique,), dtype=torch.long)\n                    all_ind = torch.cat((unique_ind, unique_ind[sample_ind]))\n                    idx[i_batch, i_region, :] = all_ind\n\n\n        xyz_trans = xyz.transpose(1, 2).contiguous()\n        grouped_xyz = grouping_operation(xyz_trans, idx)  # (B, 3, npoint, nsample)\n        grouped_xyz -= new_xyz.transpose(1, 2).unsqueeze(-1)\n        if self.normalize_xyz:\n            grouped_xyz /= self.radius\n        if self.rotate_xyz:\n            grouped_xyz_ = grouped_xyz.permute(0, 2, 3, 1).contiguous() # (B, npoint, nsample, 3)\n            grouped_xyz_ = torch.matmul(grouped_xyz_, rot)\n            grouped_xyz = grouped_xyz_.permute(0, 3, 1, 2).contiguous()\n\n\n        if features is not None:\n            grouped_features = grouping_operation(features, idx)\n            if self.use_xyz:\n                new_features = torch.cat(\n                    [grouped_xyz, grouped_features], dim=1\n                )  # (B, C + 3, npoint, nsample)\n            else:\n                new_features = grouped_features\n        else:\n            assert (\n                self.use_xyz\n            ), \"Cannot have not features and not use xyz as a feature!\"\n            new_features = grouped_xyz\n\n        ret = [new_features]\n        if self.ret_grouped_xyz:\n            ret.append(grouped_xyz)\n        if self.ret_unique_cnt:\n            ret.append(unique_cnt)\n        if len(ret) == 1:\n            return ret[0]\n        else:\n            return tuple(ret)"
  },
  {
    "path": "pointnet2/pytorch_utils.py",
    "content": "# Copyright (c) Facebook, Inc. and its affiliates.\n# \n# This source code is licensed under the MIT license found in the\n# LICENSE file in the root directory of this source tree.\n\n''' Modified based on Ref: https://github.com/erikwijmans/Pointnet2_PyTorch '''\nimport torch\nimport torch.nn as nn\nfrom typing import List, Tuple\n\nclass SharedMLP(nn.Sequential):\n\n    def __init__(\n            self,\n            args: List[int],\n            *,\n            bn: bool = False,\n            activation=nn.ReLU(inplace=True),\n            preact: bool = False,\n            first: bool = False,\n            name: str = \"\"\n    ):\n        super().__init__()\n\n        for i in range(len(args) - 1):\n            self.add_module(\n                name + 'layer{}'.format(i),\n                Conv2d(\n                    args[i],\n                    args[i + 1],\n                    bn=(not first or not preact or (i != 0)) and bn,\n                    activation=activation\n                    if (not first or not preact or (i != 0)) else None,\n                    preact=preact\n                )\n            )\n\n\nclass _BNBase(nn.Sequential):\n\n    def __init__(self, in_size, batch_norm=None, name=\"\"):\n        super().__init__()\n        self.add_module(name + \"bn\", batch_norm(in_size))\n\n        nn.init.constant_(self[0].weight, 1.0)\n        nn.init.constant_(self[0].bias, 0)\n\n\nclass BatchNorm1d(_BNBase):\n\n    def __init__(self, in_size: int, *, name: str = \"\"):\n        super().__init__(in_size, batch_norm=nn.BatchNorm1d, name=name)\n\n\nclass BatchNorm2d(_BNBase):\n\n    def __init__(self, in_size: int, name: str = \"\"):\n        super().__init__(in_size, batch_norm=nn.BatchNorm2d, name=name)\n\n\nclass BatchNorm3d(_BNBase):\n\n    def __init__(self, in_size: int, name: str = \"\"):\n        super().__init__(in_size, batch_norm=nn.BatchNorm3d, name=name)\n\n\nclass _ConvBase(nn.Sequential):\n\n    def __init__(\n            self,\n            in_size,\n            out_size,\n            kernel_size,\n            stride,\n            padding,\n            activation,\n            bn,\n            init,\n            conv=None,\n            batch_norm=None,\n            bias=True,\n            preact=False,\n            name=\"\"\n    ):\n        super().__init__()\n\n        bias = bias and (not bn)\n        conv_unit = conv(\n            in_size,\n            out_size,\n            kernel_size=kernel_size,\n            stride=stride,\n            padding=padding,\n            bias=bias\n        )\n        init(conv_unit.weight)\n        if bias:\n            nn.init.constant_(conv_unit.bias, 0)\n\n        if bn:\n            if not preact:\n                bn_unit = batch_norm(out_size)\n            else:\n                bn_unit = batch_norm(in_size)\n\n        if preact:\n            if bn:\n                self.add_module(name + 'bn', bn_unit)\n\n            if activation is not None:\n                self.add_module(name + 'activation', activation)\n\n        self.add_module(name + 'conv', conv_unit)\n\n        if not preact:\n            if bn:\n                self.add_module(name + 'bn', bn_unit)\n\n            if activation is not None:\n                self.add_module(name + 'activation', activation)\n\n\nclass Conv1d(_ConvBase):\n\n    def __init__(\n            self,\n            in_size: int,\n            out_size: int,\n            *,\n            kernel_size: int = 1,\n            stride: int = 1,\n            padding: int = 0,\n            activation=nn.ReLU(inplace=True),\n            bn: bool = False,\n            init=nn.init.kaiming_normal_,\n            bias: bool = True,\n            preact: bool = False,\n            name: str = \"\"\n    ):\n        super().__init__(\n            in_size,\n            out_size,\n            kernel_size,\n            stride,\n            padding,\n            activation,\n            bn,\n            init,\n            conv=nn.Conv1d,\n            batch_norm=BatchNorm1d,\n            bias=bias,\n            preact=preact,\n            name=name\n        )\n\n\nclass Conv2d(_ConvBase):\n\n    def __init__(\n            self,\n            in_size: int,\n            out_size: int,\n            *,\n            kernel_size: Tuple[int, int] = (1, 1),\n            stride: Tuple[int, int] = (1, 1),\n            padding: Tuple[int, int] = (0, 0),\n            activation=nn.ReLU(inplace=True),\n            bn: bool = False,\n            init=nn.init.kaiming_normal_,\n            bias: bool = True,\n            preact: bool = False,\n            name: str = \"\"\n    ):\n        super().__init__(\n            in_size,\n            out_size,\n            kernel_size,\n            stride,\n            padding,\n            activation,\n            bn,\n            init,\n            conv=nn.Conv2d,\n            batch_norm=BatchNorm2d,\n            bias=bias,\n            preact=preact,\n            name=name\n        )\n\n\nclass Conv3d(_ConvBase):\n\n    def __init__(\n            self,\n            in_size: int,\n            out_size: int,\n            *,\n            kernel_size: Tuple[int, int, int] = (1, 1, 1),\n            stride: Tuple[int, int, int] = (1, 1, 1),\n            padding: Tuple[int, int, int] = (0, 0, 0),\n            activation=nn.ReLU(inplace=True),\n            bn: bool = False,\n            init=nn.init.kaiming_normal_,\n            bias: bool = True,\n            preact: bool = False,\n            name: str = \"\"\n    ):\n        super().__init__(\n            in_size,\n            out_size,\n            kernel_size,\n            stride,\n            padding,\n            activation,\n            bn,\n            init,\n            conv=nn.Conv3d,\n            batch_norm=BatchNorm3d,\n            bias=bias,\n            preact=preact,\n            name=name\n        )\n\n\nclass FC(nn.Sequential):\n\n    def __init__(\n            self,\n            in_size: int,\n            out_size: int,\n            *,\n            activation=nn.ReLU(inplace=True),\n            bn: bool = False,\n            init=None,\n            preact: bool = False,\n            name: str = \"\"\n    ):\n        super().__init__()\n\n        fc = nn.Linear(in_size, out_size, bias=not bn)\n        if init is not None:\n            init(fc.weight)\n        if not bn:\n            nn.init.constant_(fc.bias, 0)\n\n        if preact:\n            if bn:\n                self.add_module(name + 'bn', BatchNorm1d(in_size))\n\n            if activation is not None:\n                self.add_module(name + 'activation', activation)\n\n        self.add_module(name + 'fc', fc)\n\n        if not preact:\n            if bn:\n                self.add_module(name + 'bn', BatchNorm1d(out_size))\n\n            if activation is not None:\n                self.add_module(name + 'activation', activation)\n\ndef set_bn_momentum_default(bn_momentum):\n\n    def fn(m):\n        if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d)):\n            m.momentum = bn_momentum\n\n    return fn\n\n\nclass BNMomentumScheduler(object):\n\n    def __init__(\n            self, model, bn_lambda, last_epoch=-1,\n            setter=set_bn_momentum_default\n    ):\n        if not isinstance(model, nn.Module):\n            raise RuntimeError(\n                \"Class '{}' is not a PyTorch nn Module\".format(\n                    type(model).__name__\n                )\n            )\n\n        self.model = model\n        self.setter = setter\n        self.lmbd = bn_lambda\n\n        self.step(last_epoch + 1)\n        self.last_epoch = last_epoch\n\n    def step(self, epoch=None):\n        if epoch is None:\n            epoch = self.last_epoch + 1\n\n        self.last_epoch = epoch\n        self.model.apply(self.setter(self.lmbd(epoch)))\n\n\n"
  },
  {
    "path": "pointnet2/setup.py",
    "content": "# Copyright (c) Facebook, Inc. and its affiliates.\n# \n# This source code is licensed under the MIT license found in the\n# LICENSE file in the root directory of this source tree.\n\nfrom setuptools import setup\nfrom torch.utils.cpp_extension import BuildExtension, CUDAExtension\nimport glob\nimport os\nROOT = os.path.dirname(os.path.abspath(__file__))\n\n_ext_src_root = \"_ext_src\"\n_ext_sources = glob.glob(\"{}/src/*.cpp\".format(_ext_src_root)) + glob.glob(\n    \"{}/src/*.cu\".format(_ext_src_root)\n)\n_ext_headers = glob.glob(\"{}/include/*\".format(_ext_src_root))\n\nsetup(\n    name='pointnet2',\n    ext_modules=[\n        CUDAExtension(\n            name='pointnet2._ext',\n            sources=_ext_sources,\n            extra_compile_args={\n                \"cxx\": [\"-O2\", \"-I{}\".format(\"{}/{}/include\".format(ROOT, _ext_src_root))],\n                \"nvcc\": [\"-O2\", \"-I{}\".format(\"{}/{}/include\".format(ROOT, _ext_src_root))],\n            },\n        )\n    ],\n    cmdclass={\n        'build_ext': BuildExtension\n    }\n)\n"
  },
  {
    "path": "requirements.txt",
    "content": "torch>=1.8\ntensorboard==2.3\nnumpy\nscipy\nopen3d>=0.8\nPillow\ntqdm\nMinkowskiEngine==0.5.4"
  },
  {
    "path": "test.py",
    "content": "import os\nimport sys\nimport numpy as np\nimport argparse\nimport time\nimport torch\nfrom torch.utils.data import DataLoader\nfrom graspnetAPI.graspnet_eval import GraspGroup, GraspNetEval\n\nROOT_DIR = os.path.dirname(os.path.abspath(__file__))\nsys.path.append(os.path.join(ROOT_DIR, 'pointnet2'))\nsys.path.append(os.path.join(ROOT_DIR, 'utils'))\nsys.path.append(os.path.join(ROOT_DIR, 'models'))\nsys.path.append(os.path.join(ROOT_DIR, 'dataset'))\n\nfrom models.graspnet import GraspNet, pred_decode\nfrom dataset.graspnet_dataset import GraspNetDataset, minkowski_collate_fn\nfrom utils.collision_detector import ModelFreeCollisionDetector\n\nparser = argparse.ArgumentParser()\nparser.add_argument('--dataset_root', default=None, required=True)\nparser.add_argument('--checkpoint_path', help='Model checkpoint path', default=None, required=True)\nparser.add_argument('--dump_dir', help='Dump dir to save outputs', default=None, required=True)\nparser.add_argument('--seed_feat_dim', default=512, type=int, help='Point wise feature dim')\nparser.add_argument('--camera', default='kinect', help='Camera split [realsense/kinect]')\nparser.add_argument('--num_point', type=int, default=15000, help='Point Number [default: 15000]')\nparser.add_argument('--batch_size', type=int, default=1, help='Batch Size during inference [default: 1]')\nparser.add_argument('--voxel_size', type=float, default=0.005, help='Voxel Size for sparse convolution')\nparser.add_argument('--collision_thresh', type=float, default=0.01,\n                    help='Collision Threshold in collision detection [default: 0.01]')\nparser.add_argument('--voxel_size_cd', type=float, default=0.01, help='Voxel Size for collision detection')\nparser.add_argument('--infer', action='store_true', default=False)\nparser.add_argument('--eval', action='store_true', default=False)\ncfgs = parser.parse_args()\n\n# ------------------------------------------------------------------------- GLOBAL CONFIG BEG\nif not os.path.exists(cfgs.dump_dir):\n    os.mkdir(cfgs.dump_dir)\n\n\n# Init datasets and dataloaders \ndef my_worker_init_fn(worker_id):\n    np.random.seed(np.random.get_state()[1][0] + worker_id)\n    pass\n\n\ndef inference():\n    test_dataset = GraspNetDataset(cfgs.dataset_root, split='test_seen', camera=cfgs.camera, num_points=cfgs.num_point,\n                                   voxel_size=cfgs.voxel_size, remove_outlier=True, augment=False, load_label=False)\n    print('Test dataset length: ', len(test_dataset))\n    scene_list = test_dataset.scene_list()\n    test_dataloader = DataLoader(test_dataset, batch_size=cfgs.batch_size, shuffle=False,\n                                 num_workers=0, worker_init_fn=my_worker_init_fn, collate_fn=minkowski_collate_fn)\n    print('Test dataloader length: ', len(test_dataloader))\n    # Init the model\n    net = GraspNet(seed_feat_dim=cfgs.seed_feat_dim, is_training=False)\n    device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n    net.to(device)\n    # Load checkpoint\n    checkpoint = torch.load(cfgs.checkpoint_path)\n    net.load_state_dict(checkpoint['model_state_dict'])\n    start_epoch = checkpoint['epoch']\n    print(\"-> loaded checkpoint %s (epoch: %d)\" % (cfgs.checkpoint_path, start_epoch))\n\n    batch_interval = 100\n    net.eval()\n    tic = time.time()\n    for batch_idx, batch_data in enumerate(test_dataloader):\n        for key in batch_data:\n            if 'list' in key:\n                for i in range(len(batch_data[key])):\n                    for j in range(len(batch_data[key][i])):\n                        batch_data[key][i][j] = batch_data[key][i][j].to(device)\n            else:\n                batch_data[key] = batch_data[key].to(device)\n\n        # Forward pass\n        with torch.no_grad():\n            end_points = net(batch_data)\n            grasp_preds = pred_decode(end_points)\n\n        # Dump results for evaluation\n        for i in range(cfgs.batch_size):\n            data_idx = batch_idx * cfgs.batch_size + i\n            preds = grasp_preds[i].detach().cpu().numpy()\n\n            gg = GraspGroup(preds)\n            # collision detection\n            if cfgs.collision_thresh > 0:\n                cloud = test_dataset.get_data(data_idx, return_raw_cloud=True)\n                mfcdetector = ModelFreeCollisionDetector(cloud, voxel_size=cfgs.voxel_size_cd)\n                collision_mask = mfcdetector.detect(gg, approach_dist=0.05, collision_thresh=cfgs.collision_thresh)\n                gg = gg[~collision_mask]\n\n            # save grasps\n            save_dir = os.path.join(cfgs.dump_dir, scene_list[data_idx], cfgs.camera)\n            save_path = os.path.join(save_dir, str(data_idx % 256).zfill(4) + '.npy')\n            if not os.path.exists(save_dir):\n                os.makedirs(save_dir)\n            gg.save_npy(save_path)\n\n        if (batch_idx + 1) % batch_interval == 0:\n            toc = time.time()\n            print('Eval batch: %d, time: %fs' % (batch_idx + 1, (toc - tic) / batch_interval))\n            tic = time.time()\n\n\ndef evaluate(dump_dir):\n    ge = GraspNetEval(root=cfgs.dataset_root, camera=cfgs.camera, split='test_seen')\n    res, ap = ge.eval_seen(dump_folder=dump_dir, proc=6)\n    save_dir = os.path.join(cfgs.dump_dir, 'ap_{}.npy'.format(cfgs.camera))\n    np.save(save_dir, res)\n\n\nif __name__ == '__main__':\n    if cfgs.infer:\n        inference()\n    if cfgs.eval:\n        evaluate(cfgs.dump_dir)\n"
  },
  {
    "path": "train.py",
    "content": "import os\nimport sys\nimport numpy as np\nfrom datetime import datetime\nimport argparse\n\nimport torch\nimport torch.optim as optim\nfrom torch.utils.data import DataLoader\nfrom torch.utils.tensorboard import SummaryWriter\n\nROOT_DIR = os.path.dirname(os.path.abspath(__file__))\nsys.path.append(os.path.join(ROOT_DIR, 'pointnet2'))\nsys.path.append(os.path.join(ROOT_DIR, 'utils'))\nsys.path.append(os.path.join(ROOT_DIR, 'models'))\nsys.path.append(os.path.join(ROOT_DIR, 'dataset'))\n\nfrom models.graspnet import GraspNet\nfrom models.loss import get_loss\nfrom dataset.graspnet_dataset import GraspNetDataset, minkowski_collate_fn, load_grasp_labels\n\nparser = argparse.ArgumentParser()\nparser.add_argument('--dataset_root', default=None, required=True)\nparser.add_argument('--camera', default='kinect', help='Camera split [realsense/kinect]')\nparser.add_argument('--checkpoint_path', help='Model checkpoint path', default=None)\nparser.add_argument('--model_name', type=str, default=None)\nparser.add_argument('--log_dir', default='logs/log')\nparser.add_argument('--num_point', type=int, default=15000, help='Point Number [default: 20000]')\nparser.add_argument('--seed_feat_dim', default=512, type=int, help='Point wise feature dim')\nparser.add_argument('--voxel_size', type=float, default=0.005, help='Voxel Size to process point clouds ')\nparser.add_argument('--max_epoch', type=int, default=10, help='Epoch to run [default: 18]')\nparser.add_argument('--batch_size', type=int, default=4, help='Batch Size during training [default: 2]')\nparser.add_argument('--learning_rate', type=float, default=0.001, help='Initial learning rate [default: 0.001]')\nparser.add_argument('--resume', action='store_true', default=False, help='Whether to resume from checkpoint')\ncfgs = parser.parse_args()\n# ------------------------------------------------------------------------- GLOBAL CONFIG BEG\nEPOCH_CNT = 0\nCHECKPOINT_PATH = cfgs.checkpoint_path if cfgs.checkpoint_path is not None and cfgs.resume else None\nif not os.path.exists(cfgs.log_dir):\n    os.makedirs(cfgs.log_dir)\n\nLOG_FOUT = open(os.path.join(cfgs.log_dir, 'log_train.txt'), 'a')\nLOG_FOUT.write(str(cfgs) + '\\n')\n\n\ndef log_string(out_str):\n    LOG_FOUT.write(out_str + '\\n')\n    LOG_FOUT.flush()\n    print(out_str)\n\n\n# Init datasets and dataloaders \ndef my_worker_init_fn(worker_id):\n    np.random.seed(np.random.get_state()[1][0] + worker_id)\n    pass\n\n\ngrasp_labels = load_grasp_labels(cfgs.dataset_root)\nTRAIN_DATASET = GraspNetDataset(cfgs.dataset_root, grasp_labels=grasp_labels, camera=cfgs.camera, split='train',\n                                num_points=cfgs.num_point, voxel_size=cfgs.voxel_size,\n                                remove_outlier=True, augment=True, load_label=True)\nprint('train dataset length: ', len(TRAIN_DATASET))\nTRAIN_DATALOADER = DataLoader(TRAIN_DATASET, batch_size=cfgs.batch_size, shuffle=True,\n                              num_workers=0, worker_init_fn=my_worker_init_fn, collate_fn=minkowski_collate_fn)\nprint('train dataloader length: ', len(TRAIN_DATALOADER))\n\nnet = GraspNet(seed_feat_dim=cfgs.seed_feat_dim, is_training=True)\ndevice = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\nnet.to(device)\n# Load the Adam optimizer\noptimizer = optim.Adam(net.parameters(), lr=cfgs.learning_rate)\nstart_epoch = 0\nif CHECKPOINT_PATH is not None and os.path.isfile(CHECKPOINT_PATH):\n    checkpoint = torch.load(CHECKPOINT_PATH)\n    net.load_state_dict(checkpoint['model_state_dict'])\n    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])\n    start_epoch = checkpoint['epoch']\n    log_string(\"-> loaded checkpoint %s (epoch: %d)\" % (CHECKPOINT_PATH, start_epoch))\n# TensorBoard Visualizers\nTRAIN_WRITER = SummaryWriter(os.path.join(cfgs.log_dir, 'train'))\n\n\ndef get_current_lr(epoch):\n    lr = cfgs.learning_rate\n    lr = lr * (0.95 ** epoch)\n    return lr\n\n\ndef adjust_learning_rate(optimizer, epoch):\n    lr = get_current_lr(epoch)\n    for param_group in optimizer.param_groups:\n        param_group['lr'] = lr\n\n\ndef train_one_epoch():\n    stat_dict = {}  # collect statistics\n    adjust_learning_rate(optimizer, EPOCH_CNT)\n    net.train()\n    batch_interval = 20\n    for batch_idx, batch_data_label in enumerate(TRAIN_DATALOADER):\n        for key in batch_data_label:\n            if 'list' in key:\n                for i in range(len(batch_data_label[key])):\n                    for j in range(len(batch_data_label[key][i])):\n                        batch_data_label[key][i][j] = batch_data_label[key][i][j].to(device)\n            else:\n                batch_data_label[key] = batch_data_label[key].to(device)\n\n        end_points = net(batch_data_label)\n        loss, end_points = get_loss(end_points)\n        loss.backward()\n        optimizer.step()\n        optimizer.zero_grad()\n\n        for key in end_points:\n            if 'loss' in key or 'acc' in key or 'prec' in key or 'recall' in key or 'count' in key:\n                if key not in stat_dict:\n                    stat_dict[key] = 0\n                stat_dict[key] += end_points[key].item()\n\n        if (batch_idx + 1) % batch_interval == 0:\n            log_string(' ----epoch: %03d  ---- batch: %03d ----' % (EPOCH_CNT, batch_idx + 1))\n            for key in sorted(stat_dict.keys()):\n                TRAIN_WRITER.add_scalar(key, stat_dict[key] / batch_interval,\n                                        (EPOCH_CNT * len(TRAIN_DATALOADER) + batch_idx) * cfgs.batch_size)\n                log_string('mean %s: %f' % (key, stat_dict[key] / batch_interval))\n                stat_dict[key] = 0\n\n\ndef train(start_epoch):\n    global EPOCH_CNT\n    for epoch in range(start_epoch, cfgs.max_epoch):\n        EPOCH_CNT = epoch\n        log_string('**** EPOCH %03d ****' % epoch)\n        log_string('Current learning rate: %f' % (get_current_lr(epoch)))\n        log_string(str(datetime.now()))\n        # Reset numpy seed.\n        # REF: https://github.com/pytorch/pytorch/issues/5059\n        np.random.seed()\n        train_one_epoch()\n\n        save_dict = {'epoch': epoch + 1, 'optimizer_state_dict': optimizer.state_dict(),\n                     'model_state_dict': net.state_dict()}\n        torch.save(save_dict, os.path.join(cfgs.log_dir, cfgs.model_name + '_epoch' + str(epoch + 1).zfill(2) + '.tar'))\n\n\nif __name__ == '__main__':\n    train(start_epoch)\n"
  },
  {
    "path": "utils/collision_detector.py",
    "content": "\"\"\" Collision detection to remove collided grasp pose predictions.\nAuthor: chenxi-wang\n\"\"\"\n\nimport os\nimport sys\nimport numpy as np\nimport open3d as o3d\n\nclass ModelFreeCollisionDetector():\n    \"\"\" Collision detection in scenes without object labels. Current finger width and length are fixed.\n\n        Input:\n                scene_points: [numpy.ndarray, (N,3), numpy.float32]\n                    the scene points to detect\n                voxel_size: [float]\n                    used for downsample\n\n        Example usage:\n            mfcdetector = ModelFreeCollisionDetector(scene_points, voxel_size=0.005)\n            collision_mask = mfcdetector.detect(grasp_group, approach_dist=0.03)\n            collision_mask, iou_list = mfcdetector.detect(grasp_group, approach_dist=0.03, collision_thresh=0.05, return_ious=True)\n            collision_mask, empty_mask = mfcdetector.detect(grasp_group, approach_dist=0.03, collision_thresh=0.05,\n                                            return_empty_grasp=True, empty_thresh=0.01)\n            collision_mask, empty_mask, iou_list = mfcdetector.detect(grasp_group, approach_dist=0.03, collision_thresh=0.05,\n                                            return_empty_grasp=True, empty_thresh=0.01, return_ious=True)\n    \"\"\"\n    def __init__(self, scene_points, voxel_size=0.005):\n        self.finger_width = 0.01\n        self.finger_length = 0.06\n        self.voxel_size = voxel_size\n        scene_cloud = o3d.geometry.PointCloud()\n        scene_cloud.points = o3d.utility.Vector3dVector(scene_points)\n        scene_cloud = scene_cloud.voxel_down_sample(voxel_size)\n        self.scene_points = np.array(scene_cloud.points)\n\n    def detect(self, grasp_group, approach_dist=0.03, collision_thresh=0.05, return_empty_grasp=False, empty_thresh=0.01, return_ious=False):\n        \"\"\" Detect collision of grasps.\n\n            Input:\n                grasp_group: [GraspGroup, M grasps]\n                    the grasps to check\n                approach_dist: [float]\n                    the distance for a gripper to move along approaching direction before grasping\n                    this shifting space requires no point either\n                collision_thresh: [float]\n                    if global collision iou is greater than this threshold,\n                    a collision is detected\n                return_empty_grasp: [bool]\n                    if True, return a mask to imply whether there are objects in a grasp\n                empty_thresh: [float]\n                    if inner space iou is smaller than this threshold,\n                    a collision is detected\n                    only set when [return_empty_grasp] is True\n                return_ious: [bool]\n                    if True, return global collision iou and part collision ious\n                    \n            Output:\n                collision_mask: [numpy.ndarray, (M,), numpy.bool]\n                    True implies collision\n                [optional] empty_mask: [numpy.ndarray, (M,), numpy.bool]\n                    True implies empty grasp\n                    only returned when [return_empty_grasp] is True\n                [optional] iou_list: list of [numpy.ndarray, (M,), numpy.float32]\n                    global and part collision ious, containing\n                    [global_iou, left_iou, right_iou, bottom_iou, shifting_iou]\n                    only returned when [return_ious] is True\n        \"\"\"\n        approach_dist = max(approach_dist, self.finger_width)\n        T = grasp_group.translations\n        R = grasp_group.rotation_matrices\n        heights = grasp_group.heights[:,np.newaxis]\n        depths = grasp_group.depths[:,np.newaxis]\n        widths = grasp_group.widths[:,np.newaxis]\n        targets = self.scene_points[np.newaxis,:,:] - T[:,np.newaxis,:]\n        targets = np.matmul(targets, R)\n\n        ## collision detection\n        # height mask\n        mask1 = ((targets[:,:,2] > -heights/2) & (targets[:,:,2] < heights/2))\n        # left finger mask\n        mask2 = ((targets[:,:,0] > depths - self.finger_length) & (targets[:,:,0] < depths))\n        mask3 = (targets[:,:,1] > -(widths/2 + self.finger_width))\n        mask4 = (targets[:,:,1] < -widths/2)\n        # right finger mask\n        mask5 = (targets[:,:,1] < (widths/2 + self.finger_width))\n        mask6 = (targets[:,:,1] > widths/2)\n        # bottom mask\n        mask7 = ((targets[:,:,0] <= depths - self.finger_length)\\\n                & (targets[:,:,0] > depths - self.finger_length - self.finger_width))\n        # shifting mask\n        mask8 = ((targets[:,:,0] <= depths - self.finger_length - self.finger_width)\\\n                & (targets[:,:,0] > depths - self.finger_length - self.finger_width - approach_dist))\n\n        # get collision mask of each point\n        left_mask = (mask1 & mask2 & mask3 & mask4)\n        right_mask = (mask1 & mask2 & mask5 & mask6)\n        bottom_mask = (mask1 & mask3 & mask5 & mask7)\n        shifting_mask = (mask1 & mask3 & mask5 & mask8)\n        global_mask = (left_mask | right_mask | bottom_mask | shifting_mask)\n\n        # calculate equivalant volume of each part\n        left_right_volume = (heights * self.finger_length * self.finger_width / (self.voxel_size**3)).reshape(-1)\n        bottom_volume = (heights * (widths+2*self.finger_width) * self.finger_width / (self.voxel_size**3)).reshape(-1)\n        shifting_volume = (heights * (widths+2*self.finger_width) * approach_dist / (self.voxel_size**3)).reshape(-1)\n        volume = left_right_volume*2 + bottom_volume + shifting_volume\n\n        # get collision iou of each part\n        global_iou = global_mask.sum(axis=1) / (volume+1e-6)\n\n        # get collison mask\n        collision_mask = (global_iou > collision_thresh)\n\n        if not (return_empty_grasp or return_ious):\n            return collision_mask\n\n        ret_value = [collision_mask,]\n        if return_empty_grasp:\n            inner_mask = (mask1 & mask2 & (~mask4) & (~mask6))\n            inner_volume = (heights * self.finger_length * widths / (self.voxel_size**3)).reshape(-1)\n            empty_mask = (inner_mask.sum(axis=-1)/inner_volume < empty_thresh)\n            ret_value.append(empty_mask)\n        if return_ious:\n            left_iou = left_mask.sum(axis=1) / (left_right_volume+1e-6)\n            right_iou = right_mask.sum(axis=1) / (left_right_volume+1e-6)\n            bottom_iou = bottom_mask.sum(axis=1) / (bottom_volume+1e-6)\n            shifting_iou = shifting_mask.sum(axis=1) / (shifting_volume+1e-6)\n            ret_value.append([global_iou, left_iou, right_iou, bottom_iou, shifting_iou])\n        return ret_value\n"
  },
  {
    "path": "utils/data_utils.py",
    "content": "\"\"\" Tools for data processing.\n    Author: chenxi-wang\n\"\"\"\n\nimport numpy as np\n\n\nclass CameraInfo():\n    \"\"\" Camera intrisics for point cloud creation. \"\"\"\n\n    def __init__(self, width, height, fx, fy, cx, cy, scale):\n        self.width = width\n        self.height = height\n        self.fx = fx\n        self.fy = fy\n        self.cx = cx\n        self.cy = cy\n        self.scale = scale\n\n\ndef create_point_cloud_from_depth_image(depth, camera, organized=True):\n    \"\"\" Generate point cloud using depth image only.\n\n        Input:\n            depth: [numpy.ndarray, (H,W), numpy.float32]\n                depth image\n            camera: [CameraInfo]\n                camera intrinsics\n            organized: bool\n                whether to keep the cloud in image shape (H,W,3)\n\n        Output:\n            cloud: [numpy.ndarray, (H,W,3)/(H*W,3), numpy.float32]\n                generated cloud, (H,W,3) for organized=True, (H*W,3) for organized=False\n    \"\"\"\n    assert (depth.shape[0] == camera.height and depth.shape[1] == camera.width)\n    xmap = np.arange(camera.width)\n    ymap = np.arange(camera.height)\n    xmap, ymap = np.meshgrid(xmap, ymap)\n    points_z = depth / camera.scale\n    points_x = (xmap - camera.cx) * points_z / camera.fx\n    points_y = (ymap - camera.cy) * points_z / camera.fy\n    cloud = np.stack([points_x, points_y, points_z], axis=-1)\n    if not organized:\n        cloud = cloud.reshape([-1, 3])\n    return cloud\n\n\ndef transform_point_cloud(cloud, transform, format='4x4'):\n    \"\"\" Transform points to new coordinates with transformation matrix.\n\n        Input:\n            cloud: [np.ndarray, (N,3), np.float32]\n                points in original coordinates\n            transform: [np.ndarray, (3,3)/(3,4)/(4,4), np.float32]\n                transformation matrix, could be rotation only or rotation+translation\n            format: [string, '3x3'/'3x4'/'4x4']\n                the shape of transformation matrix\n                '3x3' --> rotation matrix\n                '3x4'/'4x4' --> rotation matrix + translation matrix\n\n        Output:\n            cloud_transformed: [np.ndarray, (N,3), np.float32]\n                points in new coordinates\n    \"\"\"\n    if not (format == '3x3' or format == '4x4' or format == '3x4'):\n        raise ValueError('Unknown transformation format, only support \\'3x3\\' or \\'4x4\\' or \\'3x4\\'.')\n    if format == '3x3':\n        cloud_transformed = np.dot(transform, cloud.T).T\n    elif format == '4x4' or format == '3x4':\n        ones = np.ones(cloud.shape[0])[:, np.newaxis]\n        cloud_ = np.concatenate([cloud, ones], axis=1)\n        cloud_transformed = np.dot(transform, cloud_.T).T\n        cloud_transformed = cloud_transformed[:, :3]\n    return cloud_transformed\n\n\ndef compute_point_dists(A, B):\n    \"\"\" Compute pair-wise point distances in two matrices.\n\n        Input:\n            A: [np.ndarray, (N,3), np.float32]\n                point cloud A\n            B: [np.ndarray, (M,3), np.float32]\n                point cloud B\n\n        Output:\n            dists: [np.ndarray, (N,M), np.float32]\n                distance matrix\n    \"\"\"\n    A = A[:, np.newaxis, :]\n    B = B[np.newaxis, :, :]\n    dists = np.linalg.norm(A - B, axis=-1)\n    return dists\n\n\ndef remove_invisible_grasp_points(cloud, grasp_points, pose, th=0.01):\n    \"\"\" Remove invisible part of object model according to scene point cloud.\n\n        Input:\n            cloud: [np.ndarray, (N,3), np.float32]\n                scene point cloud\n            grasp_points: [np.ndarray, (M,3), np.float32]\n                grasp point label in object coordinates\n            pose: [np.ndarray, (4,4), np.float32]\n                transformation matrix from object coordinates to world coordinates\n            th: [float]\n                if the minimum distance between a grasp point and the scene points is greater than outlier, the point will be removed\n\n        Output:\n            visible_mask: [np.ndarray, (M,), np.bool]\n                mask to show the visible part of grasp points\n    \"\"\"\n    grasp_points_trans = transform_point_cloud(grasp_points, pose)\n    dists = compute_point_dists(grasp_points_trans, cloud)\n    min_dists = dists.min(axis=1)\n    visible_mask = (min_dists < th)\n    return visible_mask\n\n\ndef get_workspace_mask(cloud, seg, trans=None, organized=True, outlier=0):\n    \"\"\" Keep points in workspace as input.\n\n        Input:\n            cloud: [np.ndarray, (H,W,3), np.float32]\n                scene point cloud\n            seg: [np.ndarray, (H,W,), np.uint8]\n                segmantation label of scene points\n            trans: [np.ndarray, (4,4), np.float32]\n                transformation matrix for scene points, default: None.\n            organized: [bool]\n                whether to keep the cloud in image shape (H,W,3)\n            outlier: [float]\n                if the distance between a point and workspace is greater than outlier, the point will be removed\n                \n        Output:\n            workspace_mask: [np.ndarray, (H,W)/(H*W,), np.bool]\n                mask to indicate whether scene points are in workspace\n    \"\"\"\n    if organized:\n        h, w, _ = cloud.shape\n        cloud = cloud.reshape([h * w, 3])\n        seg = seg.reshape(h * w)\n    if trans is not None:\n        cloud = transform_point_cloud(cloud, trans)\n    foreground = cloud[seg > 0]\n    xmin, ymin, zmin = foreground.min(axis=0)\n    xmax, ymax, zmax = foreground.max(axis=0)\n    mask_x = ((cloud[:, 0] > xmin - outlier) & (cloud[:, 0] < xmax + outlier))\n    mask_y = ((cloud[:, 1] > ymin - outlier) & (cloud[:, 1] < ymax + outlier))\n    mask_z = ((cloud[:, 2] > zmin - outlier) & (cloud[:, 2] < zmax + outlier))\n    workspace_mask = (mask_x & mask_y & mask_z)\n    if organized:\n        workspace_mask = workspace_mask.reshape([h, w])\n\n    return workspace_mask\n"
  },
  {
    "path": "utils/label_generation.py",
    "content": "\"\"\" Dynamically generate grasp labels during training.\n    Author: chenxi-wang\n\"\"\"\n\nimport os\nimport sys\nimport torch\n\nBASE_DIR = os.path.dirname(os.path.abspath(__file__))\nROOT_DIR = os.path.dirname(BASE_DIR)\nsys.path.append(ROOT_DIR)\n# sys.path.append(os.path.join(ROOT_DIR, 'knn'))\n\nfrom knn.knn_modules import knn\nfrom loss_utils import GRASP_MAX_WIDTH, batch_viewpoint_params_to_matrix, \\\n    transform_point_cloud, generate_grasp_views\n\n\ndef process_grasp_labels(end_points):\n    \"\"\" Process labels according to scene points and object poses. \"\"\"\n    seed_xyzs = end_points['xyz_graspable']  # (B, M_point, 3)\n    batch_size, num_samples, _ = seed_xyzs.size()\n\n    batch_grasp_points = []\n    batch_grasp_views_rot = []\n    batch_grasp_scores = []\n    batch_grasp_widths = []\n    for i in range(batch_size):\n        seed_xyz = seed_xyzs[i]  # (Ns, 3)\n        poses = end_points['object_poses_list'][i]  # [(3, 4),]\n\n        # get merged grasp points for label computation\n        grasp_points_merged = []\n        grasp_views_rot_merged = []\n        grasp_scores_merged = []\n        grasp_widths_merged = []\n        for obj_idx, pose in enumerate(poses):\n            grasp_points = end_points['grasp_points_list'][i][obj_idx]  # (Np, 3)\n            grasp_scores = end_points['grasp_scores_list'][i][obj_idx]  # (Np, V, A, D)\n            grasp_widths = end_points['grasp_widths_list'][i][obj_idx]  # (Np, V, A, D)\n            _, V, A, D = grasp_scores.size()\n            num_grasp_points = grasp_points.size(0)\n            # generate and transform template grasp views\n            grasp_views = generate_grasp_views(V).to(pose.device)  # (V, 3)\n            grasp_points_trans = transform_point_cloud(grasp_points, pose, '3x4')\n            grasp_views_trans = transform_point_cloud(grasp_views, pose[:3, :3], '3x3')\n            # generate and transform template grasp view rotation\n            angles = torch.zeros(grasp_views.size(0), dtype=grasp_views.dtype, device=grasp_views.device)\n            grasp_views_rot = batch_viewpoint_params_to_matrix(-grasp_views, angles)  # (V, 3, 3)\n            grasp_views_rot_trans = torch.matmul(pose[:3, :3], grasp_views_rot)  # (V, 3, 3)\n\n            # assign views\n            grasp_views_ = grasp_views.transpose(0, 1).contiguous().unsqueeze(0)\n            grasp_views_trans_ = grasp_views_trans.transpose(0, 1).contiguous().unsqueeze(0)\n            view_inds = knn(grasp_views_trans_, grasp_views_, k=1).squeeze() - 1\n            grasp_views_rot_trans = torch.index_select(grasp_views_rot_trans, 0, view_inds)  # (V, 3, 3)\n            grasp_views_rot_trans = grasp_views_rot_trans.unsqueeze(0).expand(num_grasp_points, -1, -1,\n                                                                              -1)  # (Np, V, 3, 3)\n            grasp_scores = torch.index_select(grasp_scores, 1, view_inds)  # (Np, V, A, D)\n            grasp_widths = torch.index_select(grasp_widths, 1, view_inds)  # (Np, V, A, D)\n            # add to list\n            grasp_points_merged.append(grasp_points_trans)\n            grasp_views_rot_merged.append(grasp_views_rot_trans)\n            grasp_scores_merged.append(grasp_scores)\n            grasp_widths_merged.append(grasp_widths)\n\n        grasp_points_merged = torch.cat(grasp_points_merged, dim=0)  # (Np', 3)\n        grasp_views_rot_merged = torch.cat(grasp_views_rot_merged, dim=0)  # (Np', V, 3, 3)\n        grasp_scores_merged = torch.cat(grasp_scores_merged, dim=0)  # (Np', V, A, D)\n        grasp_widths_merged = torch.cat(grasp_widths_merged, dim=0)  # (Np', V, A, D)\n\n        # compute nearest neighbors\n        seed_xyz_ = seed_xyz.transpose(0, 1).contiguous().unsqueeze(0)  # (1, 3, Ns)\n        grasp_points_merged_ = grasp_points_merged.transpose(0, 1).contiguous().unsqueeze(0)  # (1, 3, Np')\n        nn_inds = knn(grasp_points_merged_, seed_xyz_, k=1).squeeze() - 1  # (Ns)\n\n        # assign anchor points to real points\n        grasp_points_merged = torch.index_select(grasp_points_merged, 0, nn_inds)  # (Ns, 3)\n        grasp_views_rot_merged = torch.index_select(grasp_views_rot_merged, 0, nn_inds)  # (Ns, V, 3, 3)\n        grasp_scores_merged = torch.index_select(grasp_scores_merged, 0, nn_inds)  # (Ns, V, A, D)\n        grasp_widths_merged = torch.index_select(grasp_widths_merged, 0, nn_inds)  # (Ns, V, A, D)\n\n        # add to batch\n        batch_grasp_points.append(grasp_points_merged)\n        batch_grasp_views_rot.append(grasp_views_rot_merged)\n        batch_grasp_scores.append(grasp_scores_merged)\n        batch_grasp_widths.append(grasp_widths_merged)\n\n    batch_grasp_points = torch.stack(batch_grasp_points, 0)  # (B, Ns, 3)\n    batch_grasp_views_rot = torch.stack(batch_grasp_views_rot, 0)  # (B, Ns, V, 3, 3)\n    batch_grasp_scores = torch.stack(batch_grasp_scores, 0)  # (B, Ns, V, A, D)\n    batch_grasp_widths = torch.stack(batch_grasp_widths, 0)  # (B, Ns, V, A, D)\n\n    # compute view graspness\n    view_u_threshold = 0.6\n    view_grasp_num = 48\n    batch_grasp_view_valid_mask = (batch_grasp_scores <= view_u_threshold) & (batch_grasp_scores > 0) # (B, Ns, V, A, D)\n    batch_grasp_view_valid = batch_grasp_view_valid_mask.float()\n    batch_grasp_view_graspness = torch.sum(torch.sum(batch_grasp_view_valid, dim=-1), dim=-1) / view_grasp_num  # (B, Ns, V)\n    view_graspness_min, _ = torch.min(batch_grasp_view_graspness, dim=-1)  # (B, Ns)\n    view_graspness_max, _ = torch.max(batch_grasp_view_graspness, dim=-1)\n    view_graspness_max = view_graspness_max.unsqueeze(-1).expand(-1, -1, 300)  # (B, Ns, V)\n    view_graspness_min = view_graspness_min.unsqueeze(-1).expand(-1, -1, 300)  # same shape as batch_grasp_view_graspness\n    batch_grasp_view_graspness = (batch_grasp_view_graspness - view_graspness_min) / (view_graspness_max - view_graspness_min + 1e-5)\n\n    # process scores\n    label_mask = (batch_grasp_scores > 0) & (batch_grasp_widths <= GRASP_MAX_WIDTH)  # (B, Ns, V, A, D)\n    batch_grasp_scores[~label_mask] = 0\n\n    end_points['batch_grasp_point'] = batch_grasp_points\n    end_points['batch_grasp_view_rot'] = batch_grasp_views_rot\n    end_points['batch_grasp_score'] = batch_grasp_scores\n    end_points['batch_grasp_width'] = batch_grasp_widths\n    end_points['batch_grasp_view_graspness'] = batch_grasp_view_graspness\n\n    return end_points\n\n\ndef match_grasp_view_and_label(end_points):\n    \"\"\" Slice grasp labels according to predicted views. \"\"\"\n    top_view_inds = end_points['grasp_top_view_inds']  # (B, Ns)\n    template_views_rot = end_points['batch_grasp_view_rot']  # (B, Ns, V, 3, 3)\n    grasp_scores = end_points['batch_grasp_score']  # (B, Ns, V, A, D)\n    grasp_widths = end_points['batch_grasp_width']  # (B, Ns, V, A, D, 3)\n\n    B, Ns, V, A, D = grasp_scores.size()\n    top_view_inds_ = top_view_inds.view(B, Ns, 1, 1, 1).expand(-1, -1, -1, 3, 3)\n    top_template_views_rot = torch.gather(template_views_rot, 2, top_view_inds_).squeeze(2)\n    top_view_inds_ = top_view_inds.view(B, Ns, 1, 1, 1).expand(-1, -1, -1, A, D)\n    top_view_grasp_scores = torch.gather(grasp_scores, 2, top_view_inds_).squeeze(2)\n    top_view_grasp_widths = torch.gather(grasp_widths, 2, top_view_inds_).squeeze(2)\n\n    u_max = top_view_grasp_scores.max()\n    po_mask = top_view_grasp_scores > 0\n    po_mask_num = torch.sum(po_mask)\n    if po_mask_num > 0:\n        u_min = top_view_grasp_scores[po_mask].min()\n        top_view_grasp_scores[po_mask] = torch.log(u_max / top_view_grasp_scores[po_mask]) / (torch.log(u_max / u_min) + 1e-6)\n\n    end_points['batch_grasp_score'] = top_view_grasp_scores  # (B, Ns, A, D)\n    end_points['batch_grasp_width'] = top_view_grasp_widths  # (B, Ns, A, D)\n\n    return top_template_views_rot, end_points\n"
  },
  {
    "path": "utils/loss_utils.py",
    "content": "\"\"\" Tools for loss computation.\n    Author: chenxi-wang\n\"\"\"\n\nimport torch\nimport numpy as np\n\nGRASP_MAX_WIDTH = 0.1\nGRASPNESS_THRESHOLD = 0.1\nNUM_VIEW = 300\nNUM_ANGLE = 12\nNUM_DEPTH = 4\nM_POINT = 1024\n\n\ndef transform_point_cloud(cloud, transform, format='4x4'):\n    \"\"\" Transform points to new coordinates with transformation matrix.\n\n        Input:\n            cloud: [torch.FloatTensor, (N,3)]\n                points in original coordinates\n            transform: [torch.FloatTensor, (3,3)/(3,4)/(4,4)]\n                transformation matrix, could be rotation only or rotation+translation\n            format: [string, '3x3'/'3x4'/'4x4']\n                the shape of transformation matrix\n                '3x3' --> rotation matrix\n                '3x4'/'4x4' --> rotation matrix + translation matrix\n\n        Output:\n            cloud_transformed: [torch.FloatTensor, (N,3)]\n                points in new coordinates\n    \"\"\"\n    if not (format == '3x3' or format == '4x4' or format == '3x4'):\n        raise ValueError('Unknown transformation format, only support \\'3x3\\' or \\'4x4\\' or \\'3x4\\'.')\n    if format == '3x3':\n        cloud_transformed = torch.matmul(transform, cloud.T).T\n    elif format == '4x4' or format == '3x4':\n        ones = cloud.new_ones(cloud.size(0), device=cloud.device).unsqueeze(-1)\n        cloud_ = torch.cat([cloud, ones], dim=1)\n        cloud_transformed = torch.matmul(transform, cloud_.T).T\n        cloud_transformed = cloud_transformed[:, :3]\n    return cloud_transformed\n\n\ndef generate_grasp_views(N=300, phi=(np.sqrt(5) - 1) / 2, center=np.zeros(3), r=1):\n    \"\"\" View sampling on a unit sphere using Fibonacci lattices.\n        Ref: https://arxiv.org/abs/0912.4540\n\n        Input:\n            N: [int]\n                number of sampled views\n            phi: [float]\n                constant for view coordinate calculation, different phi's bring different distributions, default: (sqrt(5)-1)/2\n            center: [np.ndarray, (3,), np.float32]\n                sphere center\n            r: [float]\n                sphere radius\n\n        Output:\n            views: [torch.FloatTensor, (N,3)]\n                sampled view coordinates\n    \"\"\"\n    views = []\n    for i in range(N):\n        zi = (2 * i + 1) / N - 1\n        xi = np.sqrt(1 - zi ** 2) * np.cos(2 * i * np.pi * phi)\n        yi = np.sqrt(1 - zi ** 2) * np.sin(2 * i * np.pi * phi)\n        views.append([xi, yi, zi])\n    views = r * np.array(views) + center\n    return torch.from_numpy(views.astype(np.float32))\n\n\ndef batch_viewpoint_params_to_matrix(batch_towards, batch_angle):\n    \"\"\" Transform approach vectors and in-plane rotation angles to rotation matrices.\n\n        Input:\n            batch_towards: [torch.FloatTensor, (N,3)]\n                approach vectors in batch\n            batch_angle: [torch.floatTensor, (N,)]\n                in-plane rotation angles in batch\n                \n        Output:\n            batch_matrix: [torch.floatTensor, (N,3,3)]\n                rotation matrices in batch\n    \"\"\"\n    axis_x = batch_towards\n    ones = torch.ones(axis_x.shape[0], dtype=axis_x.dtype, device=axis_x.device)\n    zeros = torch.zeros(axis_x.shape[0], dtype=axis_x.dtype, device=axis_x.device)\n    axis_y = torch.stack([-axis_x[:, 1], axis_x[:, 0], zeros], dim=-1)\n    mask_y = (torch.norm(axis_y, dim=-1) == 0)\n    axis_y[mask_y, 1] = 1\n    axis_x = axis_x / torch.norm(axis_x, dim=-1, keepdim=True)\n    axis_y = axis_y / torch.norm(axis_y, dim=-1, keepdim=True)\n    axis_z = torch.cross(axis_x, axis_y)\n    sin = torch.sin(batch_angle)\n    cos = torch.cos(batch_angle)\n    R1 = torch.stack([ones, zeros, zeros, zeros, cos, -sin, zeros, sin, cos], dim=-1)\n    R1 = R1.reshape([-1, 3, 3])\n    R2 = torch.stack([axis_x, axis_y, axis_z], dim=-1)\n    batch_matrix = torch.matmul(R2, R1)\n    return batch_matrix\n\n\ndef huber_loss(error, delta=1.0):\n    \"\"\"\n    Args:\n        error: Torch tensor (d1,d2,...,dk)\n    Returns:\n        loss: Torch tensor (d1,d2,...,dk)\n\n    x = error = pred - gt or dist(pred,gt)\n    0.5 * |x|^2                 if |x|<=d\n    0.5 * d^2 + d * (|x|-d)     if |x|>d\n    Author: Charles R. Qi\n    Ref: https://github.com/charlesq34/frustum-pointnets/blob/master/models/model_util.py\n    \"\"\"\n    abs_error = torch.abs(error)\n    quadratic = torch.clamp(abs_error, max=delta)\n    linear = (abs_error - quadratic)\n    loss = 0.5 * quadratic ** 2 + delta * linear\n    return loss\n"
  }
]