Repository: StanfordVL/JRMOT_ROS Branch: master Commit: ca1e87e51ecf Files: 100 Total size: 715.3 KB Directory structure: gitextract_mbelvxqp/ ├── CMakeLists.txt ├── LICENSE ├── README.md ├── calib/ │ ├── cameras.yaml │ └── defaults.yaml ├── config/ │ └── featurepointnet.cfg ├── launch/ │ └── jpda_tracker.launch ├── msg/ │ ├── __init__.py │ ├── detection2d_with_feature.msg │ ├── detection2d_with_feature_array.msg │ ├── detection3d_with_feature.msg │ └── detection3d_with_feature_array.msg ├── package.xml ├── paper_experiments/ │ ├── models/ │ │ ├── __init__.py │ │ ├── aligned_reid_model.py │ │ ├── combination_model.py │ │ ├── deep_sort_model.py │ │ ├── featurepointnet_model.py │ │ ├── pointnet_model.py │ │ ├── resnet_reid_models.py │ │ └── yolo_models.py │ ├── requirements.txt │ ├── track.py │ └── utils/ │ ├── EKF.py │ ├── JPDA_matching.py │ ├── aligned_reid_utils.py │ ├── assign_ids_detections.py │ ├── calibration.py │ ├── combine_and_process_detections.py │ ├── dataset.py │ ├── deep_sort_utils.py │ ├── detection.py │ ├── double_measurement_kf.py │ ├── evaluate_detections.py │ ├── featurepointnet_model_util.py │ ├── featurepointnet_tf_util.py │ ├── imm.py │ ├── iou_matching.py │ ├── kf_2d.py │ ├── kf_3d.py │ ├── linear_assignment.py │ ├── logger.py │ ├── mbest_ilp.py │ ├── nn_matching.py │ ├── pointnet_tf_util.py │ ├── pointnet_transform_nets.py │ ├── read_detections.py │ ├── resnet_reid_utils.py │ ├── test_jpda.py │ ├── test_kf/ │ │ ├── .gitignore │ │ ├── run_kf_test.py │ │ ├── single_track_4state_test.p.val │ │ ├── single_track_6state_test.p.val │ │ ├── two_track_4state_test.p.val │ │ └── write_kf_test.py │ ├── track.py │ ├── track_3d.py │ ├── tracker.py │ ├── tracker_3d.py │ ├── tracking_utils.py │ ├── visualise.py │ └── yolo_utils/ │ ├── __init__.py │ ├── datasets.py │ ├── parse_config.py │ └── utils.py ├── requirements.txt └── src/ ├── 3d_detector.py ├── EKF.py ├── JPDA_matching.py ├── __init__.py ├── aligned_reid_model.py ├── aligned_reid_utils.py ├── calibration.py ├── combination_model.py ├── deep_sort_utils.py ├── detection.py ├── distances.py ├── double_measurement_kf.py ├── evaluation/ │ ├── __init__.py │ ├── distances 2.py │ └── distances.py ├── featurepointnet_model.py ├── featurepointnet_model_util.py ├── featurepointnet_tf_util.py ├── iou_matching.py ├── kf_2d.py ├── linear_assignment.py ├── mbest_ilp.py ├── nn_matching.py ├── pointnet_model.py ├── template 2.py ├── template.py ├── track_3d 2.py ├── track_3d.py ├── tracker_3d 2.py ├── tracker_3d.py ├── tracker_3d_node 2.py ├── tracker_3d_node.py ├── tracking_utils 2.py └── tracking_utils.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: CMakeLists.txt ================================================ cmake_minimum_required(VERSION 2.8.3) project(jpda_rospack) ## Compile as C++11, supported in ROS Kinetic and newer # add_compile_options(-std=c++11) ## Find catkin macros and libraries ## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz) ## is used, also find other catkin packages find_package(catkin REQUIRED COMPONENTS roscpp rospy std_msgs vision_msgs message_generation ) ## System dependencies are found with CMake's conventions # find_package(Boost REQUIRED COMPONENTS system) ## Uncomment this if the package has a setup.py. This macro ensures ## modules and global scripts declared therein get installed ## See http://ros.org/doc/api/catkin/html/user_guide/setup_dot_py.html # catkin_python_setup() ################################################ ## Declare ROS messages, services and actions ## ################################################ ## To declare and build messages, services or actions from within this ## package, follow these steps: ## * Let MSG_DEP_SET be the set of packages whose message types you use in ## your messages/services/actions (e.g. std_msgs, actionlib_msgs, ...). ## * In the file package.xml: ## * add a build_depend tag for "message_generation" ## * add a build_depend and a exec_depend tag for each package in MSG_DEP_SET ## * If MSG_DEP_SET isn't empty the following dependency has been pulled in ## but can be declared for certainty nonetheless: ## * add a exec_depend tag for "message_runtime" ## * In this file (CMakeLists.txt): ## * add "message_generation" and every package in MSG_DEP_SET to ## find_package(catkin REQUIRED COMPONENTS ...) ## * add "message_runtime" and every package in MSG_DEP_SET to ## catkin_package(CATKIN_DEPENDS ...) ## * uncomment the add_*_files sections below as needed ## and list every .msg/.srv/.action file to be processed ## * uncomment the generate_messages entry below ## * add every package in MSG_DEP_SET to generate_messages(DEPENDENCIES ...) ## Generate messages in the 'msg' folder add_message_files( FILES detection2d_with_feature.msg detection2d_with_feature_array.msg detection3d_with_feature.msg detection3d_with_feature_array.msg ) ## Generate services in the 'srv' folder # add_service_files( # FILES # Service1.srv # Service2.srv # ) ## Generate actions in the 'action' folder # add_action_files( # FILES # Action1.action # Action2.action # ) ## Generate added messages and services with any dependencies listed here generate_messages( DEPENDENCIES std_msgs vision_msgs jpda_rospack ) ################################################ ## Declare ROS dynamic reconfigure parameters ## ################################################ ## To declare and build dynamic reconfigure parameters within this ## package, follow these steps: ## * In the file package.xml: ## * add a build_depend and a exec_depend tag for "dynamic_reconfigure" ## * In this file (CMakeLists.txt): ## * add "dynamic_reconfigure" to ## find_package(catkin REQUIRED COMPONENTS ...) ## * uncomment the "generate_dynamic_reconfigure_options" section below ## and list every .cfg file to be processed ## Generate dynamic reconfigure parameters in the 'cfg' folder # generate_dynamic_reconfigure_options( # cfg/DynReconf1.cfg # cfg/DynReconf2.cfg # ) ################################### ## catkin specific configuration ## ################################### ## The catkin_package macro generates cmake config files for your package ## Declare things to be passed to dependent projects ## INCLUDE_DIRS: uncomment this if your package contains header files ## LIBRARIES: libraries you create in this project that dependent projects also need ## CATKIN_DEPENDS: catkin_packages dependent projects also need ## DEPENDS: system dependencies of this project that dependent projects also need catkin_package( # INCLUDE_DIRS include # LIBRARIES jpda_rospack CATKIN_DEPENDS roscpp rospy std_msgs vision_msgs message_runtime # DEPENDS system_lib ) ########### ## Build ## ########### ## Specify additional locations of header files ## Your package locations should be listed before other locations include_directories( # include ${catkin_INCLUDE_DIRS} ) ## Declare a C++ library # add_library(${PROJECT_NAME} # src/${PROJECT_NAME}/jpda_rospack.cpp # ) ## Add cmake target dependencies of the library ## as an example, code may need to be generated before libraries ## either from message generation or dynamic reconfigure # add_dependencies(${PROJECT_NAME} ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS}) ## Declare a C++ executable ## With catkin_make all packages are built within a single CMake context ## The recommended prefix ensures that target names across packages don't collide # add_executable(${PROJECT_NAME}_node src/jpda_rospack_node.cpp) ## Rename C++ executable without prefix ## The above recommended prefix causes long target names, the following renames the ## target back to the shorter version for ease of user use ## e.g. "rosrun someones_pkg node" instead of "rosrun someones_pkg someones_pkg_node" # set_target_properties(${PROJECT_NAME}_node PROPERTIES OUTPUT_NAME node PREFIX "") ## Add cmake target dependencies of the executable ## same as for the library above # add_dependencies(${PROJECT_NAME}_node ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS}) ## Specify libraries to link a library or executable target against # target_link_libraries(${PROJECT_NAME}_node # ${catkin_LIBRARIES} # ) ############# ## Install ## ############# # all install targets should use catkin DESTINATION variables # See http://ros.org/doc/api/catkin/html/adv_user_guide/variables.html ## Mark executable scripts (Python etc.) for installation ## in contrast to setup.py, you can choose the destination # install(PROGRAMS # scripts/my_python_script # DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} # ) ## Mark executables and/or libraries for installation # install(TARGETS ${PROJECT_NAME} ${PROJECT_NAME}_node # ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} # LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} # RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} # ) ## Mark cpp header files for installation # install(DIRECTORY include/${PROJECT_NAME}/ # DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION} # FILES_MATCHING PATTERN "*.h" # PATTERN ".svn" EXCLUDE # ) ## Mark other files for installation (e.g. launch and bag files, etc.) # install(FILES # # myfile1 # # myfile2 # DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION} # ) ############# ## Testing ## ############# ## Add gtest based cpp test target and link libraries # catkin_add_gtest(${PROJECT_NAME}-test test/test_jpda_rospack.cpp) # if(TARGET ${PROJECT_NAME}-test) # target_link_libraries(${PROJECT_NAME}-test ${PROJECT_NAME}) # endif() ## Add folders to be run by python nosetests # catkin_add_nosetests(test) ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2020 Stanford Vision and Learning Group Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ # JRMOT ROS package The repository contains the code for the work "JRMOT: A Real-Time 3D Multi-Object Tracker and a New Large-Scale Dataset". Note that due to the global pandemic, this repository is still a work in progress. Updates will be made as soon as possible. ## Introduction JRMOT is a 3D multi object tracking system that: - Is real-time - Is online - Fuses 2D and 3D information - Achieves State of the Art performance on KITTI We also release JRDB: - A dataset with over 2 million annotated boxes and 3500 time consistent trajectories in 2D and 3D - Captured in social, human-centric settings - Captured by our social mobile-manipulator JackRabbot - Contains 360 degree cylindrical images, stereo camera images, 3D pointclouds and more sensing modalties All information, including download links for JRDB can be found [here](https://jrdb.stanford.edu). ## JRMOT ![system overview](https://github.com/StanfordVL/JRMOT_ROS/blob/master/assets/framework.png) - Our system is built on top of state of the art 2D and 3D detectors (mask-RCNN and F-PointNet respectively). These detections are associated with predicted track locations at every time step. - Association is done via a novel feature fusion, as well as a cost selection procedure, followed by Kalman state gating and JPDA. - Given the JPDA output, we use both 2D and 3D detections in a novel multi-modal Kalman filter to update the track locations. ## Using the code There are 3 nodes forming parts of the ROS package: + 3d_detector.py: Runs F-PointNet, which performs 3D detection and 3D feature extraction + template.py: Runs Aligned-Re-ID, which performs 2D feature extraction + tracker_3d_node.py: Performs tracking while taking both 2D detections + features and 3D detections + features as input The launch file in the folder "launch" launches all 3 nodes. ## Dependencies The following are dependencies of the code: + 2D detector: The 2D detector is not included in this package. To interface with your own 2D detector, please modify the file template.py to subscribe to the correct topic, and also to handle the conversion from ROS message to numpy array. + Spencer People Tracking messages: The final tracker output is in a Spencer People Tracking message. Please install this package and include these message types. + Various python packages: These can be found in [requirements.txt](./requirements.txt).. Please install all dependencies prior to running the code (including CUDA and cuDNN. Additionally, this code requires a solver called Gurobi. Instructions to install gurobipy can be found [here](https://www.gurobi.com/documentation/9.0/quickstart_mac/the_grb_python_interface_f.html). + Weight files: The trained weights, (trained on JRDB) for FPointNet and Aligne-ReID can be found [here](https://drive.google.com/open?id=1YQinMPVWEI44KezS9inXe0mvVnm4aL3s). ## Citation If you find this work useful, please cite: ``` @INPROCEEDINGS{shenoi2020jrmot, author={A. {Shenoi} and M. {Patel} and J. {Gwak} and P. {Goebel} and A. {Sadeghian} and H. {Rezatofighi} and R. {Mart\'in-Mart\'in} and S. {Savarese}}, booktitle={2020 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)}, title={JRMOT: A Real-Time 3D Multi-Object Tracker and a New Large-Scale Dataset}, year={2020}, volume={}, number={}, pages={10335-10342}, doi={10.1109/IROS45743.2020.9341635}} ``` If you utilise our dataset, please also cite: ``` @article{martin2019jrdb, title={JRDB: A dataset and benchmark of egocentric visual perception for navigation in human environments}, author={Mart{\'i}n-Mart{\'i}n, Roberto and Patel, Mihir and Rezatofighi, Hamid and Shenoi, Abhijeet and Gwak, JunYoung and Frankel, Eric and Sadeghian, Amir and Savarese, Silvio}, journal={arXiv preprint arXiv:1910.11792}, year={2019} } ``` ## ================================================ FILE: calib/cameras.yaml ================================================ stitching: radius: 3360000 rotation: 0 scalewidth: 1831 crop: 1 cameras: # camera order matters! sensor_0: width: 752 height: 480 D: -0.336591 0.159742 0.00012697 -7.22557e-05 -0.0461953 # K = fx 0 cx # 0 fy cy # 0 0 1 K: > 476.71 0 350.738 0 479.505 209.532 0 0 1 R: > 0.999994 0.000654539 0.00340293 -0.000654519 1 -6.81963e-06 -0.00340293 4.59231e-06 0.999994 T: -0.0104242 -3.70974 -56.9177 sensor_1: width: 752 height: 480 D: -0.335073 0.151959 -0.000232061 0.00032014 -0.0396825 K: > 483.254 0 365.33 0 485.78 210.953 0 0 1 R: > 0.305706 -0.00895443 -0.952084 0.0110396 0.999922 -0.00585963 0.952062 -0.0087193 0.305781 T: 0.93957 -4.05131 -52.03 sensor_2: width: 752 height: 480 D: -0.338469 0.156256 -0.000385467 0.000295485 -0.0401965 K: > 483.911 0 355.144 0 486.466 223.026 0 0 1 R: > -0.806828 0.0136361 -0.590629 0.00870468 0.999899 0.011194 0.590723 0.00389039 -0.806865 T: -0.25753 -6.54978 -47.7311 sensor_3: width: 752 height: 480 D: -0.330848 0.14747 8.59247e-05 0.000262599 -0.0385311 K: > 475.807 0 339.53 0 478.371 188.481 0 0 1 R: > -0.811334 0.0033829 0.584574 0.00046071 0.999987 -0.00514746 -0.584583 -0.00390699 -0.811324 T: 2.72207 -6.82928 -45.9778 sensor_4: width: 752 height: 480 D: -0.34064 0.168338 0.000147292 0.000229372 -0.0516133 K: > 485.046 0 368.864 0 488.185 208.215 0 0 1 R: > 0.310275 0.00160497 0.950645 -0.00648686 0.999979 0.000428942 -0.950625 -0.00629979 0.310279 T: -0.333857 -5.12974 -56.0573 sensor_5: width: 752 height: 480 D: -0.338422 0.163703 -0.000376267 7.73351e-06 -0.0479871 K: > 478.406 0 353.499 0 481.322 190.225 0 0 1 R: > 0.999995 0.00282205 0.00163291 -0.00282345 0.999996 0.000852931 -0.00163049 -0.000857537 0.999998 T: -0.903588 -126.851 -56.6256 sensor_6: width: 752 height: 480 D: -0.340676 0.165511 -0.00035978 0.000181532 -0.0493721 K: > 480.459 0 362.503 0 482.924 197.949 0 0 1 R: > 0.308288 -0.0110391 -0.951229 -0.000933102 0.999929 -0.0119067 0.951293 0.00455829 0.308256 T: 1.74525 -127.214 -51.7722 sensor_7: width: 752 height: 480 D: -0.344379 0.170343 -0.000137847 0.000141047 -0.0510536 K: > 486.491 0 361.559 0 489.22 210.547 0 0 1 R: > -0.808201 0.0313998 -0.588068 0.026057 0.999506 0.0175574 0.588329 -0.00113337 -0.808621 T: -2.56535 -129.191 -47.5803 sensor_8: width: 752 height: 480 D: -0.331228 0.144696 0.000117553 0.000566449 -0.0343506 K: > 476.708 0 354.16 0 479.424 209.383 0 0 1 R: > -0.807384 -0.00296577 0.590019 -0.0122001 0.999857 -0.0116688 -0.589901 -0.0166195 -0.807305 T: 3.39727 -129.381 -45.2409 sensor_9: width: 752 height: 480 D: -0.345189 0.180808 0.000276465 0.000131868 -0.062103 K: > 484.219 0 345.303 0 487.312 192.371 0 0 1 R: > 0.308505 0.00370159 0.951215 -0.00403535 0.999988 -0.00258261 -0.951214 -0.00304174 0.308517 T: 0.354966 -128.218 -54.0617 ================================================ FILE: calib/defaults.yaml ================================================ calibrated: # the lidar_to_rgb parameters allow tweaking of the transformation between lidar and rgb frames # the default transformation is taken from the TF Tree # NOTE: applied to the original (sensor/velodyne) frame [x forward, y left, z up]: lidar_upper_to_rgb: # in meters: [x,y,z] translation: [0, 0, -0.33529] # in radians: [x,y,z] rotation: [0, 0, 0.085] lidar_lower_to_rgb: translation: [0, 0, 0.13511] rotation: [0, 0, 0] image: # all in pixels width: 3760 height: 480 # y-axis forward pixel offset (e.g. 3760/2 => 1880, b/c center of the cylindrical image is forward) # TODO: move into calibrated params, when auto-calibration is possible stitched_image_offset: 1880 frames: # lookup for people transforms global: base_link # name of the rgb360 camera frame to which we wish to transform rgb360: occam ================================================ FILE: config/featurepointnet.cfg ================================================ [general] num_point = 1024 model_path = /home/sibot/jr2_catkin_ws/src/jpda_rospack/src/fpointnet_jrdb/model.ckpt ================================================ FILE: launch/jpda_tracker.launch ================================================ ================================================ FILE: msg/__init__.py ================================================ ================================================ FILE: msg/detection2d_with_feature.msg ================================================ # This message contains a 2D bounding box corresponding to the detection of a person # Also contains the feature of this person used for re-ID Header header #header timestamp is time of frame acquisition uint64 x1 # x coordinate of the top left of the bounding box uint64 y1 # y coordinate of the top left of the bounding box uint64 x2 # x coordinate of the bottom right of the bounding box uint64 y2 # y coordinate of the bottom right of the bounding box float64[] feature # re-ID feature uint8 frame_det_id #unique id of this detection within this frame (used for associating 2D and 3D detections) bool valid # whether detection is valid (within the boundaries of the image and has minimum required size) ================================================ FILE: msg/detection2d_with_feature_array.msg ================================================ Header header detection2d_with_feature[] detection2d_with_features ================================================ FILE: msg/detection3d_with_feature.msg ================================================ # This message contains a 3D bounding box corresponding to the detection of a person # Also contains the feature of this person used for re-ID Header header #header timestamp is time of frame acquisition float32 x # x coordinate of the center of the bottom face of the bounding box float32 y # y coordinate of the center of the bottom face of the bounding box float32 z # x coordinate of the center of the bottom face of the bounding box float32 l # size of bounding box along x dimension float32 h # size of bounding box along y dimension float32 w # size of bounding box along z dimension float32 theta # rotation of bounding box with respect to the positive x axis float64[] feature # re-ID feature uint8 frame_det_id #unique id of this detection within this frame (used for associating 2D and 3D detections) bool valid # whether detection is valid (enough lidar points) ================================================ FILE: msg/detection3d_with_feature_array.msg ================================================ Header header detection3d_with_feature[] detection3d_with_features ================================================ FILE: package.xml ================================================ jpda_rospack 0.0.1 The jpda_rospack package ashenoi TODO message_generation message_runtime catkin roscpp rospy std_msgs vision_msgs roscpp rospy std_msgs vision_msgs roscpp rospy std_msgs vision_msgs ================================================ FILE: paper_experiments/models/__init__.py ================================================ ================================================ FILE: paper_experiments/models/aligned_reid_model.py ================================================ import torch import torch.nn as nn import torch.nn.init as init import torch.nn.functional as F import torch.utils.model_zoo as model_zoo import os import math class Model(nn.Module): def __init__(self, local_conv_out_channels=128, num_classes=None): super(Model, self).__init__() self.base = resnet50(pretrained=True) planes = 2048 self.local_conv = nn.Conv2d(planes, local_conv_out_channels, 1) self.local_bn = nn.BatchNorm2d(local_conv_out_channels) self.local_relu = nn.ReLU(inplace=True) if num_classes is not None: self.fc = nn.Linear(planes, num_classes) init.normal(self.fc.weight, std=0.001) init.constant(self.fc.bias, 0) def forward(self, x): """ Returns: global_feat: shape [N, C] local_feat: shape [N, H, c] """ # shape [N, C, H, W] feat = self.base(x) global_feat = F.avg_pool2d(feat, feat.size()[2:]) # shape [N, C] global_feat = global_feat.view(global_feat.size(0), -1) # shape [N, C, H, 1] local_feat = torch.mean(feat, -1, keepdim=True) local_feat = self.local_relu(self.local_bn(self.local_conv(local_feat))) # shape [N, H, c] local_feat = local_feat.squeeze(-1).permute(0, 2, 1) if hasattr(self, 'fc'): logits = self.fc(global_feat) return global_feat, local_feat, logits return global_feat, local_feat __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152'] model_urls = { 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', } os.environ["TORCH_HOME"] = "./ResNet_Model" def conv3x3(in_planes, out_planes, stride=1): """3x3 convolution with padding""" return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False) class BasicBlock(nn.Module): expansion = 1 def __init__(self, inplanes, planes, stride=1, downsample=None): super(BasicBlock, self).__init__() self.conv1 = conv3x3(inplanes, planes, stride) self.bn1 = nn.BatchNorm2d(planes) self.relu = nn.ReLU(inplace=True) self.conv2 = conv3x3(planes, planes) self.bn2 = nn.BatchNorm2d(planes) self.downsample = downsample self.stride = stride def forward(self, x): residual = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) if self.downsample is not None: residual = self.downsample(x) out += residual out = self.relu(out) return out class Bottleneck(nn.Module): expansion = 4 def __init__(self, inplanes, planes, stride=1, downsample=None): super(Bottleneck, self).__init__() self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) self.bn1 = nn.BatchNorm2d(planes) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(planes) self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) self.bn3 = nn.BatchNorm2d(planes * 4) self.relu = nn.ReLU(inplace=True) self.downsample = downsample self.stride = stride def forward(self, x): residual = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out = self.relu(out) out = self.conv3(out) out = self.bn3(out) if self.downsample is not None: residual = self.downsample(x) out += residual out = self.relu(out) return out class ResNet(nn.Module): def __init__(self, block, layers): self.inplanes = 64 super(ResNet, self).__init__() self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(64) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2) self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer(block, 512, layers[3], stride=2) for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() def _make_layer(self, block, planes, blocks, stride=1): downsample = None if stride != 1 or self.inplanes != planes * block.expansion: downsample = nn.Sequential( nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(planes * block.expansion), ) layers = [] layers.append(block(self.inplanes, planes, stride, downsample)) self.inplanes = planes * block.expansion for i in range(1, blocks): layers.append(block(self.inplanes, planes)) return nn.Sequential(*layers) def forward(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) return x def remove_fc(state_dict): """Remove the fc layer parameters from state_dict.""" new_state_dict = state_dict.copy() for key, value in state_dict.items(): if key.startswith('fc.'): del new_state_dict[key] return new_state_dict def resnet18(pretrained=False): """Constructs a ResNet-18 model. Args: pretrained (bool): If True, returns a model pre-trained on ImageNet """ model = ResNet(BasicBlock, [2, 2, 2, 2]) if pretrained: model.load_state_dict(remove_fc(model_zoo.load_url(model_urls['resnet18']))) return model def resnet34(pretrained=False): """Constructs a ResNet-34 model. Args: pretrained (bool): If True, returns a model pre-trained on ImageNet """ model = ResNet(BasicBlock, [3, 4, 6, 3]) if pretrained: model.load_state_dict(remove_fc(model_zoo.load_url(model_urls['resnet34']))) return model def resnet50(pretrained=False): """Constructs a ResNet-50 model. Args: pretrained (bool): If True, returns a model pre-trained on ImageNet """ model = ResNet(Bottleneck, [3, 4, 6, 3]) if pretrained: model.load_state_dict(remove_fc(model_zoo.load_url(model_urls['resnet50'], model_dir="./ResNet_Model")))### ADDED MODEL_DIR return model def resnet101(pretrained=False): """Constructs a ResNet-101 model. Args: pretrained (bool): If True, returns a model pre-trained on ImageNet """ model = ResNet(Bottleneck, [3, 4, 23, 3]) if pretrained: model.load_state_dict( remove_fc(model_zoo.load_url(model_urls['resnet101']))) return model def resnet152(pretrained=False): """Constructs a ResNet-152 model. Args: pretrained (bool): If True, returns a model pre-trained on ImageNet """ model = ResNet(Bottleneck, [3, 8, 36, 3]) if pretrained: model.load_state_dict( remove_fc(model_zoo.load_url(model_urls['resnet152']))) return model ================================================ FILE: paper_experiments/models/combination_model.py ================================================ import pdb import numpy as np import torch.nn as nn class CombiNet(nn.Module): def __init__(self, in_dim = 2560, hidden_units = 512, out_dim = 2560): super().__init__() self.fc1 = nn.Linear(in_dim, 2*hidden_units) # self.bn1 = nn.BatchNorm1d(hidden_units) self.fc2 = nn.Linear(2*hidden_units, 2*hidden_units) # self.bn2 = nn.BatchNorm1d(2*hidden_units) self.fc3 = nn.Linear(2*hidden_units, out_dim) self.relu = nn.ReLU() self.apply(weight_init) def forward(self, x): # out = nn.functional.normalize(x) skip = x out = self.fc1(x) # out = self.bn1(out) out = self.relu(out) out = self.fc2(out) # out = self.bn2(out) out = self.relu(out) out = self.fc3(out) # out = nn.functional.normalize(out) out += skip return out class CombiLSTM(nn.Module): def __init__(self, in_dim = 2560, hidden_units = 512, out_dim = 2560): super().__init__() self.in_linear1 = nn.Linear(in_dim, hidden_units) # self.bn1 = nn.BatchNorm1d(hidden_units) self.in_linear2 = nn.Linear(hidden_units, hidden_units) self.rnn = nn.LSTM(input_size = hidden_units, hidden_size = hidden_units, dropout = 0) self.out_linear1 = nn.Linear(hidden_units, hidden_units) # self.bn2 = nn.BatchNorm1d(hidden_units) self.out_linear2 = nn.Linear(hidden_units, out_dim) self.relu = nn.ReLU() self.apply(weight_init) def forward(self, x, hidden = None): out = nn.functional.normalize(x) skip = out out = self.in_linear1(out) # out = self.bn1(out) out = self.relu(out) out = self.in_linear2(out) out = out.unsqueeze(1) #Adding batch dimension if hidden is None: out, hidden = self.rnn(out) else: out, hidden = self.rnn(out, hidden) out = out.squeeze(1) #removing batch dimension out = self.out_linear1(out) # out = self.bn2(out) out = self.relu(out) out = self.out_linear2(out) out = nn.functional.normalize(out) out += skip return out, hidden def weight_init(m): if type(m)==nn.Linear: nn.init.xavier_normal_(m.weight, gain=np.sqrt(2)) elif type(m)==nn.LSTM: nn.init.xavier_normal_(m.weight_ih_l0) nn.init.xavier_normal_(m.weight_hh_l0) ================================================ FILE: paper_experiments/models/deep_sort_model.py ================================================ import tensorflow as tf from skimage.transform import resize import numpy as np class ImageEncoder(object): def __init__(self, checkpoint_filename="weights/deep_sort_weights.pb", input_name="images", output_name="features"): config = tf.ConfigProto() config.gpu_options.allow_growth = True self.session = tf.Session(config=config) with tf.gfile.GFile(checkpoint_filename, "rb") as file_handle: graph_def = tf.GraphDef() graph_def.ParseFromString(file_handle.read()) tf.import_graph_def(graph_def, name="net") self.input_var = tf.get_default_graph().get_tensor_by_name( "net/%s:0" % input_name) self.output_var = tf.get_default_graph().get_tensor_by_name( "net/%s:0" % output_name) assert len(self.output_var.get_shape()) == 2 assert len(self.input_var.get_shape()) == 4 self.feature_dim = self.output_var.get_shape().as_list()[-1] self.image_shape = self.input_var.get_shape().as_list()[1:] def __call__(self, data_x): #Resize input to expected size for model data_x = resize(data_x[0], self.image_shape, anti_aliasing=True, mode='reflect') data_x = np.expand_dims(data_x, 0) out = self.session.run(self.output_var, feed_dict={self.input_var: data_x}) return out if __name__ == '__main__': encoder = ImageEncoder() ================================================ FILE: paper_experiments/models/featurepointnet_model.py ================================================ import os, pdb import numpy as np import tensorflow as tf tf.logging.set_verbosity(tf.logging.ERROR) import configparser import utils.featurepointnet_tf_util as tf_util import utils.featurepointnet_model_util as model_util from utils.calibration import Calibration, OmniCalibration batch_size = 45 #TODO: Update if needed? class FPointNet(): def __init__(self, config_path): parser = configparser.SafeConfigParser() parser.read(config_path) self.num_point = parser.getint('general', 'num_point') self.model_path = parser.get('general', 'model_path') with tf.device('/gpu:'+str('0')): pointclouds_pl, one_hot_vec_pl, labels_pl, centers_pl, \ heading_class_label_pl, heading_residual_label_pl, \ size_class_label_pl, size_residual_label_pl = model_util.placeholder_inputs(batch_size, self.num_point) is_training_pl = tf.placeholder(tf.bool, shape=()) end_points, depth_feature = self.get_model(pointclouds_pl, one_hot_vec_pl, is_training_pl) self.object_pointcloud = tf.placeholder(tf.float32, shape=(None, None, 3)) #depth_feature = self.get_depth_feature_op(is_training_pl) loss = model_util.get_loss(labels_pl, centers_pl, heading_class_label_pl, heading_residual_label_pl, size_class_label_pl, size_residual_label_pl, end_points) self.saver = tf.train.Saver() # Create a session config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True self.sess = tf.Session(config=config) #Initialize variables self.sess.run(tf.global_variables_initializer()) # Restore variables from disk. self.saver.restore(self.sess, self.model_path) self.ops = {'pointclouds_pl': pointclouds_pl, 'one_hot_vec_pl': one_hot_vec_pl, 'labels_pl': labels_pl, 'centers_pl': centers_pl, 'heading_class_label_pl': heading_class_label_pl, 'heading_residual_label_pl': heading_residual_label_pl, 'size_class_label_pl': size_class_label_pl, 'size_residual_label_pl': size_residual_label_pl, 'is_training_pl': is_training_pl, 'logits': end_points['mask_logits'], 'center': end_points['center'], 'end_points': end_points, 'depth_feature':depth_feature, 'loss': loss} # @profile def __call__(self, input_point_cloud, rot_angle, peds=False): ''' one_hot_vec = np.zeros((batch_size, 3)) feed_dict = {self.pointclouds_pl: input_point_cloud, self.one_hot_vec_pl: one_hot_vec, self.is_training_pl: False} features = self.sess.run(self.feature,feed_dict=feed_dict) return features ''' ''' Run inference for frustum pointnets in batch mode ''' one_hot_vec = np.zeros((batch_size,3)) if peds: one_hot_vec[:, 1] = 1 num_batches = input_point_cloud.shape[0]//batch_size + 1 num_inputs = input_point_cloud.shape[0] if input_point_cloud.shape[0]%batch_size !=0: input_point_cloud = np.vstack([input_point_cloud, np.zeros((batch_size - input_point_cloud.shape[0]%batch_size, self.num_point, 4))]) else: num_batches -= 1 logits = np.zeros((input_point_cloud.shape[0], input_point_cloud.shape[1], 2)) centers = np.zeros((input_point_cloud.shape[0], 3)) heading_logits = np.zeros((input_point_cloud.shape[0], model_util.NUM_HEADING_BIN)) heading_residuals = np.zeros((input_point_cloud.shape[0], model_util.NUM_HEADING_BIN)) size_logits = np.zeros((input_point_cloud.shape[0], model_util.NUM_SIZE_CLUSTER)) size_residuals = np.zeros((input_point_cloud.shape[0], model_util.NUM_SIZE_CLUSTER, 3)) mask_mean_prob = np.zeros((input_point_cloud.shape[0],)) # Step scores heading_prob = np.zeros((input_point_cloud.shape[0],)) size_prob = np.zeros((input_point_cloud.shape[0],)) scores = np.zeros((input_point_cloud.shape[0],)) # 3D box score features = np.zeros((input_point_cloud.shape[0], 512)) for i in range(num_batches): ep = self.ops['end_points'] feed_dict = {\ self.ops['pointclouds_pl']: input_point_cloud[i*batch_size: (i+1)*batch_size], self.ops['one_hot_vec_pl']: one_hot_vec, self.ops['is_training_pl']: False} batch_logits, batch_centers, \ batch_heading_scores, batch_heading_residuals, \ batch_size_scores, batch_size_residuals, batch_features = \ self.sess.run([self.ops['logits'], self.ops['center'], ep['heading_scores'], ep['heading_residuals'], ep['size_scores'], ep['size_residuals'], self.ops['depth_feature']], feed_dict=feed_dict) logits[i*batch_size: (i+1)*batch_size] = batch_logits centers[i*batch_size: (i+1)*batch_size] = batch_centers heading_logits[i*batch_size: (i+1)*batch_size] = batch_heading_scores heading_residuals[i*batch_size: (i+1)*batch_size] = batch_heading_residuals size_logits[i*batch_size: (i+1)*batch_size] = batch_size_scores size_residuals[i*batch_size: (i+1)*batch_size] = batch_size_residuals features[i*batch_size: (i+1)*batch_size] = batch_features[:,0,:] heading_cls = np.argmax(heading_logits, 1) # B size_cls = np.argmax(size_logits, 1) # B heading_res = np.vstack([heading_residuals[i, heading_cls[i]] for i in range(heading_cls.shape[0])]) size_res = np.vstack([size_residuals[i, size_cls[i], :] for i in range(size_cls.shape[0])]) #TODO: Make this accept batches if wanted boxes = [] for i in range(num_inputs): box = np.array(model_util.from_prediction_to_label_format(centers[i], heading_cls[i], heading_res[i], size_cls[i], size_res[i], rot_angle[i])) box[6] = np.squeeze(box[6]) swp = box[5] box[5] = box[4] box[4] = swp boxes.append(box) boxes = np.vstack(boxes) return boxes, mask_mean_prob[:num_inputs], features[:num_inputs] def get_instance_seg_v1_net(self, point_cloud, one_hot_vec, is_training, bn_decay, end_points): ''' 3D instance segmentation PointNet v1 network. Input: point_cloud: TF tensor in shape (B,N,4) frustum point clouds with XYZ and intensity in point channels XYZs are in frustum coordinate one_hot_vec: TF tensor in shape (B,3) length-3 vectors indicating predicted object type is_training: TF boolean scalar bn_decay: TF float scalar end_points: dict Output: logits: TF tensor in shape (B,N,2), scores for bkg/clutter and object end_points: dict ''' num_point = point_cloud.get_shape()[1].value net = tf.expand_dims(point_cloud, 2) net = tf_util.conv2d(net, 64, [1,1], padding='VALID', stride=[1,1], bn=True, is_training=is_training, scope='conv1', bn_decay=bn_decay) net = tf_util.conv2d(net, 64, [1,1], padding='VALID', stride=[1,1], bn=True, is_training=is_training, scope='conv2', bn_decay=bn_decay) point_feat = tf_util.conv2d(net, 64, [1,1], padding='VALID', stride=[1,1], bn=True, is_training=is_training, scope='conv3', bn_decay=bn_decay) net = tf_util.conv2d(point_feat, 128, [1,1], padding='VALID', stride=[1,1], bn=True, is_training=is_training, scope='conv4', bn_decay=bn_decay) net = tf_util.conv2d(net, 1024, [1,1], padding='VALID', stride=[1,1], bn=True, is_training=is_training, scope='conv5', bn_decay=bn_decay) global_feat = tf_util.max_pool2d(net, [num_point,1], padding='VALID', scope='maxpool') global_feat = tf.concat([global_feat, tf.expand_dims(tf.expand_dims(one_hot_vec, 1), 1)], axis=3) global_feat_expand = tf.tile(global_feat, [1, num_point, 1, 1]) concat_feat = tf.concat(axis=3, values=[point_feat, global_feat_expand]) net = tf_util.conv2d(concat_feat, 512, [1,1], padding='VALID', stride=[1,1], bn=True, is_training=is_training, scope='conv6', bn_decay=bn_decay) net = tf_util.conv2d(net, 256, [1,1], padding='VALID', stride=[1,1], bn=True, is_training=is_training, scope='conv7', bn_decay=bn_decay) net = tf_util.conv2d(net, 128, [1,1], padding='VALID', stride=[1,1], bn=True, is_training=is_training, scope='conv8', bn_decay=bn_decay) net = tf_util.conv2d(net, 128, [1,1], padding='VALID', stride=[1,1], bn=True, is_training=is_training, scope='conv9', bn_decay=bn_decay) net = tf_util.dropout(net, is_training, 'dp1', keep_prob=0.5) logits = tf_util.conv2d(net, 2, [1,1], padding='VALID', stride=[1,1], activation_fn=None, scope='conv10') logits = tf.squeeze(logits, [2]) # BxNxC return logits, end_points def get_3d_box_estimation_v1_net(self, object_point_cloud, one_hot_vec,is_training, bn_decay, end_points): ''' 3D Box Estimation PointNet v1 network. Input: object_point_cloud: TF tensor in shape (B,M,C) point clouds in object coordinate one_hot_vec: TF tensor in shape (B,3) length-3 vectors indicating predicted object type Output: output: TF tensor in shape (B,3+NUM_HEADING_BIN*2+NUM_SIZE_CLUSTER*4) including box centers, heading bin class scores and residuals, and size cluster scores and residuals ''' num_point = object_point_cloud.get_shape()[1].value net = tf.expand_dims(object_point_cloud, 2) net = tf_util.conv2d(net, 128, [1,1], padding='VALID', stride=[1,1], bn=True, is_training=is_training, scope='conv-reg1', bn_decay=bn_decay) net = tf_util.conv2d(net, 128, [1,1], padding='VALID', stride=[1,1], bn=True, is_training=is_training, scope='conv-reg2', bn_decay=bn_decay) net = tf_util.conv2d(net, 256, [1,1], padding='VALID', stride=[1,1], bn=True, is_training=is_training, scope='conv-reg3', bn_decay=bn_decay) net = tf_util.conv2d(net, 512, [1,1], padding='VALID', stride=[1,1], bn=True, is_training=is_training, scope='conv-reg4', bn_decay=bn_decay) features = tf.reduce_max(net, axis = 1) net = tf_util.max_pool2d(net, [num_point,1], padding='VALID', scope='maxpool2') net = tf.squeeze(net, axis=[1,2]) net = tf.concat([net, one_hot_vec], axis=1) net = tf_util.fully_connected(net, 512, scope='fc1', bn=True, is_training=is_training, bn_decay=bn_decay) net = tf_util.fully_connected(net, 256, scope='fc2', bn=True, is_training=is_training, bn_decay=bn_decay) # The first 3 numbers: box center coordinates (cx,cy,cz), # the next NUM_HEADING_BIN*2: heading bin class scores and bin residuals # next NUM_SIZE_CLUSTER*4: box cluster scores and residuals output = tf_util.fully_connected(net, 3+model_util.NUM_HEADING_BIN*2+model_util.NUM_SIZE_CLUSTER*4, activation_fn=None, scope='fc3') return output, end_points, features def get_model(self, point_cloud, one_hot_vec, is_training, bn_decay=None): ''' Frustum PointNets model. The model predict 3D object masks and amodel bounding boxes for objects in frustum point clouds. Input: point_cloud: TF tensor in shape (B,N,4) frustum point clouds with XYZ and intensity in point channels XYZs are in frustum coordinate one_hot_vec: TF tensor in shape (B,3) length-3 vectors indicating predicted object type is_training: TF boolean scalar bn_decay: TF float scalar Output: end_points: dict (map from name strings to TF tensors) ''' end_points = {} # 3D Instance Segmentation PointNet logits, end_points = self.get_instance_seg_v1_net(\ point_cloud, one_hot_vec, is_training, bn_decay, end_points) end_points['mask_logits'] = logits # Masking # select masked points and translate to masked points' centroid object_point_cloud_xyz, mask_xyz_mean, end_points = \ model_util.point_cloud_masking(point_cloud, logits, end_points) # T-Net and coordinate translation center_delta, end_points = model_util.get_center_regression_net(\ object_point_cloud_xyz, one_hot_vec, is_training, bn_decay, end_points) stage1_center = center_delta + mask_xyz_mean # Bx3 end_points['stage1_center'] = stage1_center # Get object point cloud in object coordinate object_point_cloud_xyz_new = \ object_point_cloud_xyz - tf.expand_dims(center_delta, 1) # Amodel Box Estimation PointNet output, end_points, features = self.get_3d_box_estimation_v1_net(\ object_point_cloud_xyz_new, one_hot_vec, is_training, bn_decay, end_points) # Parse output to 3D box parameters end_points = model_util.parse_output_to_tensors(output, end_points) end_points['center'] = end_points['center_boxnet'] + stage1_center # Bx3 return end_points, features def get_depth_feature_op(self, is_training): net = tf.expand_dims(self.object_pointcloud, 2) net = tf_util.conv2d(net, 128, [1,1], padding='VALID', stride=[1,1], bn=True, is_training=is_training, scope='conv-reg1', bn_decay=None) net = tf_util.conv2d(net, 128, [1,1], padding='VALID', stride=[1,1], bn=True, is_training=is_training, scope='conv-reg2', bn_decay=None) net = tf_util.conv2d(net, 256, [1,1], padding='VALID', stride=[1,1], bn=True, is_training=is_training, scope='conv-reg3', bn_decay=None) net = tf_util.conv2d(net, 512, [1,1], padding='VALID', stride=[1,1], bn=True, is_training=is_training, scope='conv-reg4', bn_decay=None) net = tf.reduce_max(net, axis = 1) return net def get_depth_feature(self, object_pointcloud): feed_dict = {self.object_pointcloud:object_pointcloud, self.ops['is_training_pl']:False} depth_feature = self.sess.run([self.ops['depth_feature']], feed_dict = feed_dict) return depth_feature def softmax(self, x): ''' Numpy function for softmax''' shape = x.shape probs = np.exp(x - np.max(x, axis=len(shape)-1, keepdims=True)) probs /= np.sum(probs, axis=len(shape)-1, keepdims=True) return probs def create_depth_model(model, config_path): #Note that folder path must be the folder containing the config.yaml file if omni_camera is True if model == 'FPointNet': return FPointNet(config_path) elif model == 'PointNet': return PointNet(config_path) ================================================ FILE: paper_experiments/models/pointnet_model.py ================================================ import os, pdb import tensorflow as tf tf.logging.set_verbosity(tf.logging.ERROR) import configparser from utils.pointnet_transform_nets import input_transform_net, feature_transform_net import utils.pointnet_tf_util as pointnet_tf_util class PointNet(): def __init__(self, config_path): parser = configparser.SafeConfigParser() parser.read(config_path) num_points = parser.getint('general', 'num_point') depth_model_path = parser.get('general', 'depth_model_path') with tf.device('/gpu:'+str(0)): self.pointclouds_pl, _ = self.placeholder_inputs(1, num_points) self.is_training_pl = tf.placeholder(tf.bool, shape=()) # simple model feature = self.get_model(self.pointclouds_pl, self.is_training_pl) self.feature = feature # Add ops to save and restore all the variables. self.saver = tf.train.Saver() #Create session config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True config.log_device_placement = False self.sess = tf.Session(config=config) #Initialize variables self.sess.run(tf.global_variables_initializer()) #Restore model weights self.saver.restore(self.sess, depth_model_path) def __call__(self, input_point_cloud): feed_dict = {self.pointclouds_pl: input_point_cloud, self.is_training_pl: False} features = self.sess.run(self.feature,feed_dict=feed_dict) return features def placeholder_inputs(self, batch_size, num_point): pointclouds_pl = tf.placeholder(tf.float32, shape=(batch_size, None, 3)) labels_pl = tf.placeholder(tf.int32, shape=(batch_size)) return pointclouds_pl, labels_pl def get_model(self, point_cloud, is_training, bn_decay=None): """ Classification PointNet, input is BxNx3, output Bx40 """ batch_size = point_cloud.get_shape()[0].value end_points = {} with tf.variable_scope('transform_net1', reuse=tf.AUTO_REUSE) as sc: transform = input_transform_net(point_cloud, is_training, bn_decay, K=3) point_cloud_transformed = tf.matmul(point_cloud, transform) input_image = tf.expand_dims(point_cloud_transformed, -1) net = pointnet_tf_util.conv2d(input_image, 64, [1,3], padding='VALID', stride=[1,1], bn=True, is_training=is_training, scope='conv1', bn_decay=bn_decay) net = pointnet_tf_util.conv2d(net, 64, [1,1], padding='VALID', stride=[1,1], bn=True, is_training=is_training, scope='conv2', bn_decay=bn_decay) with tf.variable_scope('transform_net2', reuse=tf.AUTO_REUSE) as sc: transform = feature_transform_net(net, is_training, bn_decay, K=64) end_points['transform'] = transform net_transformed = tf.matmul(tf.squeeze(net, axis=[2]), transform) net_transformed = tf.expand_dims(net_transformed, [2]) net = pointnet_tf_util.conv2d(net_transformed, 64, [1,1], padding='VALID', stride=[1,1], bn=True, is_training=is_training, scope='conv3', bn_decay=bn_decay) net = pointnet_tf_util.conv2d(net, 128, [1,1], padding='VALID', stride=[1,1], bn=True, is_training=is_training, scope='conv4', bn_decay=bn_decay) net = pointnet_tf_util.conv2d(net, 1024, [1,1], padding='VALID', stride=[1,1], bn=True, is_training=is_training, scope='conv5', bn_decay=bn_decay) # Symmetric function: max pooling net = tf.reduce_max(net, axis = 1) net = tf.reshape(net, [batch_size, -1]) feature = net return feature def get_loss(self, pred, label, end_points, reg_weight=0.001): """ pred: B*NUM_CLASSES, label: B, """ loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=pred, labels=label) classify_loss = tf.reduce_mean(loss) tf.summary.scalar('classify loss', classify_loss) # Enforce the transformation as orthogonal matrix transform = end_points['transform'] # BxKxK K = transform.get_shape()[1].value mat_diff = tf.matmul(transform, tf.transpose(transform, perm=[0,2,1])) mat_diff = mat_diff - tf.constant(np.eye(K), dtype=tf.float32) mat_diff_loss = tf.nn.l2_loss(mat_diff) tf.summary.scalar('mat loss', mat_diff_loss) return classify_loss + mat_diff_loss * reg_weight ================================================ FILE: paper_experiments/models/resnet_reid_models.py ================================================ import torch import torch.nn as nn from torch.autograd import Variable import torchvision.models as models from torchvision import transforms import torch.nn.functional as F class FeatureResNet(nn.Module): def __init__(self,n_layers=50,pretrained=True): super(FeatureResNet,self).__init__() if n_layers == 50: old_model= models.resnet50(pretrained=pretrained) elif n_layers == 34: old_model= models.resnet34(pretrained=pretrained) elif n_layers == 18: old_model= models.resnet18(pretrained=pretrained) else: raise NotImplementedError('resnet%s is not found'%(n_layers)) for name,modules in old_model._modules.items(): if name.find('fc') == -1: self.add_module(name,modules) self.output_dim = old_model.fc.in_features self.pretrained = pretrained def forward(self,x): for name,module in self._modules.items(): x = nn.parallel.data_parallel(module, x) return x.view(x.size(0), -1) class ResNet(nn.Module): def __init__(self,n_id,n_layers=50,pretrained=True): super(ResNet,self).__init__() if n_layers == 50: old_model= models.resnet50(pretrained=pretrained) elif n_layers == 34: old_model= models.resnet34(pretrained=pretrained) elif n_layers == 18: old_model= models.resnet18(pretrained=pretrained) else: raise NotImplementedError('resnet%s is not found'%(n_layers)) for name,modules in old_model._modules.items(): self.add_module(name,modules) self.fc = nn.Linear(self.fc.in_features,n_id) ######### self.pretrained = pretrained def forward(self,x): for name,module in self._modules.items(): if name != 'fc': x = module(x) out = self.fc(x.view(x.size(0),-1)) return out, x.view(x.size(0), -1) class NLayersFC(nn.Module): def __init__(self, in_dim, out_dim, hidden_dim=1, n_layers=0): super(NLayersFC, self).__init__() if n_layers == 0: model = [nn.Linear(in_dim, out_dim)] else: model = [] model += [nn.Linear(in_dim, hidden_dim), nn.ReLU(True)] for i in range(n_layers-1): model += [nn.Linear(hidden_dim, hidden_dim), nn.ReLU(True)] model += [nn.Linear(hidden_dim, out_dim)] self.model = nn.Sequential(*model) def forward(self, x): return self.model(x) class ICT_ResNet(nn.Module): def __init__(self,n_id,n_color,n_type,n_layers=50,pretrained=True): super(ICT_ResNet,self).__init__() if n_layers == 50: old_model= models.resnet50(pretrained=pretrained) elif n_layers == 34: old_model= models.resnet34(pretrained=pretrained) elif n_layers == 18: old_model= models.resnet18(pretrained=pretrained) else: raise NotImplementedError('resnet%s is not found'%(n_layers)) for name,modules in old_model._modules.items(): self.add_module(name,modules) self.fc = nn.Linear(self.fc.in_features,n_id) self.fc_c = nn.Linear(self.fc.in_features,n_color) self.fc_t = nn.Linear(self.fc.in_features,n_type) ######### self.pretrained = pretrained def forward(self,x): for name,module in self._modules.items(): if name.find('fc')==-1: x = module(x) x = x.view(x.size(0),-1) x_i = self.fc(x) x_c = self.fc_c(x) x_t = self.fc_t(x) return x_i,x_c,x_t class TripletNet(nn.Module): def __init__(self, net): super(TripletNet, self).__init__() self.net = net def forward(self, x, y, z): pred_x, feat_x = self.net(x) pred_y, feat_y = self.net(y) pred_z, feat_z = self.net(z) dist_pos = F.pairwise_distance(feat_x, feat_y, 2) dist_neg = F.pairwise_distance(feat_x, feat_z, 2) return dist_pos, dist_neg, pred_x, pred_y, pred_z if __name__ == '__main__': netM = ICT_ResNet(n_id=1000,n_color=7,n_type=7,n_layers=18,pretrained=True).cuda() print(netM) output = netM(Variable(torch.ones(1,3,224,224).cuda()/2.)) print(output[0].size()) print(output[1].size()) print(output[2].size()) ================================================ FILE: paper_experiments/models/yolo_models.py ================================================ from __future__ import division import torch import torch.nn as nn import torch.nn.functional as F from torch.autograd import Variable import numpy as np from PIL import Image from utils.yolo_utils.parse_config import * from utils.yolo_utils.utils import build_targets from collections import defaultdict import matplotlib.pyplot as plt import matplotlib.patches as patches def create_modules(module_defs): """ Constructs module list of layer blocks from module configuration in module_defs """ hyperparams = module_defs.pop(0) output_filters = [int(hyperparams["channels"])] module_list = nn.ModuleList() for i, module_def in enumerate(module_defs): modules = nn.Sequential() if module_def["type"] == "convolutional": bn = int(module_def["batch_normalize"]) filters = int(module_def["filters"]) kernel_size = int(module_def["size"]) pad = (kernel_size - 1) // 2 if int(module_def["pad"]) else 0 modules.add_module( "conv_%d" % i, nn.Conv2d( in_channels=output_filters[-1], out_channels=filters, kernel_size=kernel_size, stride=int(module_def["stride"]), padding=pad, bias=not bn, ), ) if bn: modules.add_module("batch_norm_%d" % i, nn.BatchNorm2d(filters)) if module_def["activation"] == "leaky": modules.add_module("leaky_%d" % i, nn.LeakyReLU(0.1)) elif module_def["type"] == "maxpool": kernel_size = int(module_def["size"]) stride = int(module_def["stride"]) if kernel_size == 2 and stride == 1: padding = nn.ZeroPad2d((0, 1, 0, 1)) modules.add_module("_debug_padding_%d" % i, padding) maxpool = nn.MaxPool2d( kernel_size=int(module_def["size"]), stride=int(module_def["stride"]), padding=int((kernel_size - 1) // 2), ) modules.add_module("maxpool_%d" % i, maxpool) elif module_def["type"] == "upsample": upsample = Interpolate(scale_factor=int(module_def["stride"]), mode="nearest") modules.add_module("upsample_%d" % i, upsample) elif module_def["type"] == "route": layers = [int(x) for x in module_def["layers"].split(",")] filters = sum([output_filters[layer_i] for layer_i in layers]) modules.add_module("route_%d" % i, EmptyLayer()) elif module_def["type"] == "shortcut": filters = output_filters[int(module_def["from"])] modules.add_module("shortcut_%d" % i, EmptyLayer()) elif module_def["type"] == "yolo": anchor_idxs = [int(x) for x in module_def["mask"].split(",")] # Extract anchors anchors = [int(x) for x in module_def["anchors"].split(",")] anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)] anchors = [anchors[i] for i in anchor_idxs] num_classes = int(module_def["classes"]) img_height = int(hyperparams["height"]) # Define detection layer yolo_layer = YOLOLayer(anchors, num_classes, img_height) modules.add_module("yolo_%d" % i, yolo_layer) # Register module list and number of output filters module_list.append(modules) output_filters.append(filters) return hyperparams, module_list class EmptyLayer(nn.Module): """Placeholder for 'route' and 'shortcut' layers""" def __init__(self): super(EmptyLayer, self).__init__() class Interpolate(nn.Module): def __init__(self, scale_factor, mode): super(Interpolate, self).__init__() self.interp = nn.functional.interpolate self.scale_factor = scale_factor self.mode = mode def forward(self, x): x = self.interp(x, scale_factor=self.scale_factor, mode=self.mode) return x class YOLOLayer(nn.Module): """Detection layer""" def __init__(self, anchors, num_classes, img_dim): super(YOLOLayer, self).__init__() self.anchors = anchors self.num_anchors = len(anchors) self.num_classes = num_classes self.bbox_attrs = 5 + num_classes self.image_dim = img_dim self.ignore_thres = 0.5 self.lambda_coord = 1 self.mse_loss = nn.MSELoss(reduction = 'elementwise_mean') # Coordinate loss self.bce_loss = nn.BCELoss(reduction = 'elementwise_mean') # Confidence loss self.ce_loss = nn.CrossEntropyLoss() # Class loss def forward(self, x, targets=None): nA = self.num_anchors nB = x.size(0) nG = x.size(2) stride = self.image_dim / nG # Tensors for cuda support FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor prediction = x.view(nB, nA, self.bbox_attrs, nG, nG).permute(0, 1, 3, 4, 2).contiguous() # Get outputs x = torch.sigmoid(prediction[..., 0]) # Center x y = torch.sigmoid(prediction[..., 1]) # Center y w = prediction[..., 2] # Width h = prediction[..., 3] # Height pred_conf = torch.sigmoid(prediction[..., 4]) # Conf pred_cls = torch.sigmoid(prediction[..., 5:]) # Cls pred. # Calculate offsets for each grid grid_x = torch.arange(nG).repeat(nG, 1).view([1, 1, nG, nG]).type(FloatTensor) grid_y = torch.arange(nG).repeat(nG, 1).t().view([1, 1, nG, nG]).type(FloatTensor) scaled_anchors = FloatTensor([(a_w / stride, a_h / stride) for a_w, a_h in self.anchors]) anchor_w = scaled_anchors[:, 0:1].view((1, nA, 1, 1)) anchor_h = scaled_anchors[:, 1:2].view((1, nA, 1, 1)) # Add offset and scale with anchors pred_boxes = FloatTensor(prediction[..., :4].shape) pred_boxes[..., 0] = x.data + grid_x pred_boxes[..., 1] = y.data + grid_y pred_boxes[..., 2] = torch.exp(w.data) * anchor_w pred_boxes[..., 3] = torch.exp(h.data) * anchor_h # Training if targets is not None: if x.is_cuda: self.mse_loss = self.mse_loss.cuda() self.bce_loss = self.bce_loss.cuda() self.ce_loss = self.ce_loss.cuda() nGT, nCorrect, mask, conf_mask, tx, ty, tw, th, tconf, tcls = build_targets( pred_boxes=pred_boxes.cpu().data, pred_conf=pred_conf.cpu().data, pred_cls=pred_cls.cpu().data, target=targets.cpu().data, anchors=scaled_anchors.cpu().data, num_anchors=nA, num_classes=self.num_classes, grid_size=nG, ignore_thres=self.ignore_thres, img_dim=self.image_dim, ) nProposals = int((pred_conf > 0.5).sum().item()) recall = float(nCorrect / nGT) if nGT else 1 precision = float(nCorrect / nProposals) # Handle masks mask = Variable(mask.type(ByteTensor)) conf_mask = Variable(conf_mask.type(ByteTensor)) # Handle target variables tx = Variable(tx.type(FloatTensor), requires_grad=False) ty = Variable(ty.type(FloatTensor), requires_grad=False) tw = Variable(tw.type(FloatTensor), requires_grad=False) th = Variable(th.type(FloatTensor), requires_grad=False) tconf = Variable(tconf.type(FloatTensor), requires_grad=False) tcls = Variable(tcls.type(LongTensor), requires_grad=False) # Get conf mask where gt and where there is no gt conf_mask_true = mask conf_mask_false = conf_mask - mask # Mask outputs to ignore non-existing objects loss_x = self.mse_loss(x[mask], tx[mask]) loss_y = self.mse_loss(y[mask], ty[mask]) loss_w = self.mse_loss(w[mask], tw[mask]) loss_h = self.mse_loss(h[mask], th[mask]) loss_conf = self.bce_loss(pred_conf[conf_mask_false], tconf[conf_mask_false]) + self.bce_loss( pred_conf[conf_mask_true], tconf[conf_mask_true] ) loss_cls = (1 / nB) * self.ce_loss(pred_cls[mask], torch.argmax(tcls[mask], 1)) loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls return ( loss, loss_x.item(), loss_y.item(), loss_w.item(), loss_h.item(), loss_conf.item(), loss_cls.item(), recall, precision, ) else: # If not in training phase return predictions output = torch.cat( ( pred_boxes.view(nB, -1, 4) * stride, pred_conf.view(nB, -1, 1), pred_cls.view(nB, -1, self.num_classes), ), -1, ) return output class Darknet(nn.Module): """YOLOv3 object detection model""" def __init__(self, config_path): super(Darknet, self).__init__() self.module_defs = parse_model_config(config_path) self.hyperparams, self.module_list = create_modules(self.module_defs) self.seen = 0 self.header_info = np.array([0, 0, 0, self.seen, 0]) self.loss_names = ["x", "y", "w", "h", "conf", "cls", "recall", "precision"] self.load_weights(self.module_defs[-1]['path']) def forward(self, x, targets=None): is_training = targets is not None output = [] self.losses = defaultdict(float) layer_outputs = [] for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)): if module_def["type"] in ["convolutional", "upsample", "maxpool"]: x = module(x) elif module_def["type"] == "route": layer_i = [int(x) for x in module_def["layers"].split(",")] x = torch.cat([layer_outputs[i] for i in layer_i], 1) elif module_def["type"] == "shortcut": layer_i = int(module_def["from"]) x = layer_outputs[-1] + layer_outputs[layer_i] elif module_def["type"] == "yolo": # Train phase: get loss if is_training: x, *losses = module[0](x, targets) for name, loss in zip(self.loss_names, losses): self.losses[name] += loss # Test phase: Get detections else: x = module(x) output.append(x) layer_outputs.append(x) self.losses["recall"] /= 3 self.losses["precision"] /= 3 return sum(output) if is_training else torch.cat(output, 1) def load_weights(self, weights_path): """Parses and loads the weights stored in 'weights_path'""" # Open the weights file fp = open(weights_path, "rb") header = np.fromfile(fp, dtype=np.int32, count=5) # First five are header values # Needed to write header when saving weights self.header_info = header self.seen = header[3] weights = np.fromfile(fp, dtype=np.float32) # The rest are weights fp.close() ptr = 0 for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)): if module_def["type"] == "convolutional": conv_layer = module[0] if module_def["batch_normalize"]: # Load BN bias, weights, running mean and running variance bn_layer = module[1] num_b = bn_layer.bias.numel() # Number of biases # Bias bn_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.bias) bn_layer.bias.data.copy_(bn_b) ptr += num_b # Weight bn_w = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.weight) bn_layer.weight.data.copy_(bn_w) ptr += num_b # Running Mean bn_rm = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_mean) bn_layer.running_mean.data.copy_(bn_rm) ptr += num_b # Running Var bn_rv = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_var) bn_layer.running_var.data.copy_(bn_rv) ptr += num_b else: # Load conv. bias num_b = conv_layer.bias.numel() conv_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(conv_layer.bias) conv_layer.bias.data.copy_(conv_b) ptr += num_b # Load conv. weights num_w = conv_layer.weight.numel() conv_w = torch.from_numpy(weights[ptr : ptr + num_w]).view_as(conv_layer.weight) conv_layer.weight.data.copy_(conv_w) ptr += num_w """ @:param path - path of the new weights file @:param cutoff - save layers between 0 and cutoff (cutoff = -1 -> all are saved) """ def save_weights(self, path, cutoff=-1): fp = open(path, "wb") self.header_info[3] = self.seen self.header_info.tofile(fp) # Iterate through layers for i, (module_def, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])): if module_def["type"] == "convolutional": conv_layer = module[0] # If batch norm, load bn first if module_def["batch_normalize"]: bn_layer = module[1] bn_layer.bias.data.cpu().numpy().tofile(fp) bn_layer.weight.data.cpu().numpy().tofile(fp) bn_layer.running_mean.data.cpu().numpy().tofile(fp) bn_layer.running_var.data.cpu().numpy().tofile(fp) # Load conv bias else: conv_layer.bias.data.cpu().numpy().tofile(fp) # Load conv weights conv_layer.weight.data.cpu().numpy().tofile(fp) fp.close() ================================================ FILE: paper_experiments/requirements.txt ================================================ absl-py==0.7.0 astor==0.7.1 backcall==0.1.0 bleach==3.3.0 catkin-pkg==0.4.12 certifi==2018.11.29 chardet==3.0.4 cloudpickle==0.7.0 cupy==6.0.0 cycler==0.10.0 Cython==0.29.7 dask==2021.10.0 decorator==4.3.2 defusedxml==0.5.0 docutils==0.14 entrypoints==0.3 fastrlock==0.4 ffmpeg==1.4 gast==0.2.2 grpcio==1.18.0 gurobipy==8.1.0 html5lib==0.9999999 idna==2.8 imageio==2.5.0 imageio-ffmpeg==0.3.0 ipydatawidgets==4.0.0 ipykernel==5.1.0 ipympl==0.2.1 ipython==7.16.3 ipython-genutils==0.2.0 ipyvolume==0.5.1 ipywebrtc==0.4.3 ipywidgets==7.4.2 jedi==0.13.2 Jinja2==2.11.3 jsonschema==2.6.0 jupyter-client==5.2.4 jupyter-core==4.4.0 jupyterlab==1.2.21 jupyterlab-server==0.2.0 kiwisolver==1.0.1 lap==0.4.0 lapjv==1.3.1 line-profiler==2.1.1 llvmlite==0.28.0 Markdown==3.0.1 MarkupSafe==1.1.0 matplotlib==3.0.2 mistune==0.8.4 nbconvert==5.4.1 nbformat==4.4.0 networkx==2.2 notebook==6.4.1 numba==0.43.1 numpy==1.21.0 open3d-python==0.7.0.0 opencv-python==4.2.0.32 pandas==0.24.1 pandocfilters==1.4.2 parso==0.3.3 pexpect==4.6.0 pickleshare==0.7.5 Pillow==9.0.0 pptk==0.1.0 prometheus-client==0.5.0 prompt-toolkit==2.0.8 protobuf==3.6.1 ptyprocess==0.6.0 pycocotools==2.0.0 Pygments==2.7.4 pyparsing==2.3.1 pypcd==0.1.1 python-dateutil==2.8.0 python-lzf==0.2.4 pythreejs==2.0.2 pytz==2018.9 PyWavelets==1.0.1 PyYAML==5.4 pyzmq==17.1.2 requests==2.21.0 rospkg==1.1.9 scikit-image==0.14.2 scikit-learn==0.20.2 scipy==1.2.0 seaborn==0.9.0 Send2Trash==1.5.0 six==1.12.0 sklearn==0.0 tensorboard==1.8.0 tensorboardX==1.6 tensorflow-gpu==2.5.2 termcolor==1.1.0 terminado==0.8.1 testpath==0.4.2 toolz==0.9.0 torch==1.0.1 torchvision==0.2.1 tornado==5.1.1 tqdm==4.30.0 traitlets==4.3.2 traittypes==0.2.1 urllib3==1.26.5 wcwidth==0.1.7 Werkzeug==0.15.3 widgetsnbextension==3.4.2 ================================================ FILE: paper_experiments/track.py ================================================ import open3d as o3d import torch import argparse import os, pdb, sys, copy, pickle import time import random import numpy as np import tensorflow as tf from torch.utils.data import DataLoader from tqdm import tqdm from models.aligned_reid_model import Model as aligned_reid_model from utils.yolo_utils.utils import non_max_suppression, load_classes from models.combination_model import CombiNet, CombiLSTM from utils.dataset import SequenceDataset, STIPDataset, collate_fn from models.deep_sort_model import ImageEncoder as deep_sort_model from utils.tracker import Tracker from utils.tracker_3d import Tracker_3d from utils.deep_sort_utils import non_max_suppression as deepsort_nms from utils.visualise import draw_track from utils.read_detections import read_ground_truth_2d_detections, read_ground_truth_3d_detections from utils.tracking_utils import create_detector, convert_detections, combine_features from utils.tracking_utils import non_max_suppression_3D, non_max_suppression_3D_prime from utils.aligned_reid_utils import generate_features, generate_features_batched, get_image_patches, create_appearance_model from utils.featurepointnet_model_util import generate_detections_3d, convert_depth_features from models.featurepointnet_model import create_depth_model def parse_arguments(): parser = argparse.ArgumentParser() parser.add_argument('--sequence_folder', type=str, default='data/KITTI/sequences/0001', help='path to image sequence') parser.add_argument('--output_folder', type=str, default='results', help='output folder') parser.add_argument('--aligned_reid_ckpt', type=str, default='weights/aligned_reid_market_weights.ckpt', help='path to model config file') parser.add_argument('--resnet_reid_ckpt', type=str, default='weights/resnet_reid.ckpt', help='path to model config file') parser.add_argument('--depth_model', type=str, default='FPointNet', help='type of depth model to use') parser.add_argument('--depth_config_path', type=str, default='config/featurepointnet.cfg', help='path to model config file') parser.add_argument('--appearance_model', type=str, default='resnet_reid', help='type of appearance model to use aligned_reid or deepsort or resnet_reid') parser.add_argument('--conf_thres', type=float, default=0.8, help='object confidence threshold') parser.add_argument('--depth_weight', type=float, default=1, help='weight of depth feature while concatenating') parser.add_argument('--nms_thresh', type=float, default=0.56, help='iou thresshold for non-maximum suppression') parser.add_argument('--n_cpu', type=int, default=4, help='number of cpu threads to use during batch generation') parser.add_argument('--use_cuda', type=bool, default=True, help='whether to use cuda if available') parser.add_argument('-p', '--point_cloud', action='store_false', help='Use to disable pointcloud') parser.add_argument('-o', '--optical_flow_initiation', action='store_false', help='Use to enable optical flow based velocity initiation') parser.add_argument('-q', '--perfect', action='store_true', help='whether to use perfect assignments') parser.add_argument('-g', '--ground_truth', action='store_true', help='whether to use ground truth detections') parser.add_argument('-r', '--reference', action='store_false', help='whether to use reference detections') parser.add_argument('-t', '--track_3d', action='store_true', help='whether to do 3d tracking') parser.add_argument('--ref_det', type = str, default = 'new_rrc_subcnn_car', help='lsvm, subcnn, regionlets, maskrcnn') parser.add_argument("--nn_budget", type=int, default=100, help="Maximum size of the appearance descriptors gallery. If None, no budget is enforced.") parser.add_argument("--dummy_node_cost_app", type=float, default=0.99, help="Dummy node appearance cost for JPDA (or maximum distnce when using deepsort)") parser.add_argument("--dummy_node_cost_iou", type=float, default=0.97, help="Dummy node iou cost for JPDA (or maximum distnce when using deepsort)") parser.add_argument("-c", "--combine_features", action = 'store_false', help="Whether to use trained MLP to combine features") parser.add_argument("-f", "--fpointnet", action = 'store_false', help="Whether to use F-PointNet for 3d detection") parser.add_argument("--combo_model", default = 'weights/resnet_reid_fpointnet_combo_car/mlp__1570759353.0113978/best_checkpoint.tar"', help="Trained MLP checkpoint to combine features") parser.add_argument("-j", "--JPDA", action = 'store_false', help="Whether to use JPDA for soft assignments") parser.add_argument("-l", "--LSTM", action = 'store_true', help="Whether to use LSTM for feature combination and update") parser.add_argument("--lstm_model", default = 'weights/aligned_reid_fpointnet_combo/lstm/best_checkpoint.tar', help="Trained LSTM checkpoint to combine features") parser.add_argument("-m","--m_best_sol", type=int, default=10, help="Number of solutions for JPDA") parser.add_argument("--log_data", action='store_true', help="Turn on full data logging") parser.add_argument("--max_age", type=int, default=2, help="Number of misses before termination") parser.add_argument("--n_init", type=int, default=2, help="Consecutive frames for tentative->confirmed") parser.add_argument("--assn_thresh", type=float, default=0.65, help="min prob for match") parser.add_argument("--matching_strategy", type=str, default="hungarian", help="matching strategy for JPDA (max_and_threshold, strict_max_pair, or hungarian)") parser.add_argument("--kf_appearance_feature", type=bool, default=False, help="Whether to use kf state for apperance features") parser.add_argument('-i', "--use_imm", action = 'store_true', help='Whether to use IMM') parser.add_argument('-v', "--verbose", action = 'store_true', help='Verbose') parser.add_argument('--kf_process', type=float, default=5.2, help='kf 2d process noise factor') parser.add_argument('--kf_2d_meas', type=float, default=3.2, help='kf 2d measurement noise factor') parser.add_argument('--kf_3d_meas', type=float, default=0.25, help='kf 3d measurement noise factor') parser.add_argument('--pos_weight_3d', type=float, default=1, help='Weight on position covariance process noise in KF') parser.add_argument('--pos_weight', type=float, default=0.006, help='Weight on position covariance process noise in KF') parser.add_argument('--vel_weight', type=float, default=0.008, help='Weight on velocity covariance process noise in KF') parser.add_argument('--theta_weight', type=float, default=0.02, help='Weight on velocity covariance process noise in KF') parser.add_argument('--gate_limit', type=float, default=600, help='Maximum covariance value of the gate') parser.add_argument('--initial_uncertainty', type=float, default=1, help='Uncertainty scaling for initial covariance of track') parser.add_argument('--uncertainty_limit', type=float, default=1.5, help='Uncertainty limit at which to terminate tracks') parser.add_argument("--gate_full_state", action='store_true', help="Whether to gate on full kalman state, default is only position") parser.add_argument("--near_online", action = 'store_true', help="Whether to do near online tracking") parser.add_argument("--omni", action = 'store_true', help="Omni directional camera (JRDB)") opt = parser.parse_args() opt.sequence_folder = opt.sequence_folder.rstrip(os.sep) opt.using_cuda = torch.cuda.is_available() and opt.use_cuda if not opt.point_cloud and opt.track_3d: raise("Must provide point cloud if doing 3D tracking!") if opt.verbose: print(opt) if not os.path.exists(opt.output_folder): os.makedirs(opt.output_folder) return opt # @profile def main(opt): if opt.verbose: print("------------------------") print("RUNNING SET UP") print("------------------------") tf.logging.set_verbosity(40) random.seed(0) Tensor = torch.cuda.FloatTensor if opt.using_cuda else torch.FloatTensor os.makedirs(opt.output_folder, exist_ok=True) if opt.LSTM: opt.max_cosine_distance = 1 lstm = CombiLSTM() checkpoint = torch.load(opt.lstm_model) lstm.load_state_dict(checkpoint['state_dict']) if opt.using_cuda: lstm.cuda() lstm.eval() else: lstm = None if opt.combine_features: combination_model = CombiNet() checkpoint = torch.load(opt.combo_model) combination_model.load_state_dict(checkpoint['state_dict']) if opt.using_cuda: combination_model.cuda() combination_model.eval() else: combination_model = None dataset = SequenceDataset(opt.sequence_folder, point_cloud=opt.point_cloud, omni=opt.omni) dataloader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=opt.n_cpu, collate_fn = collate_fn) appearance_model = create_appearance_model(opt.appearance_model, opt.aligned_reid_ckpt, opt.resnet_reid_ckpt, opt.using_cuda) if opt.point_cloud: depth_model = create_depth_model(opt.depth_model, opt.depth_config_path) if opt.track_3d: tracker = Tracker_3d(appearance_model=appearance_model, cuda=opt.using_cuda, JPDA = opt.JPDA, m_best_sol=opt.m_best_sol, max_age = opt.max_age, n_init=opt.n_init, assn_thresh=opt.assn_thresh, matching_strategy=opt.matching_strategy, gate_full_state=opt.gate_full_state, kf_vel_params=(opt.pos_weight_3d, opt.pos_weight, opt.vel_weight, opt.theta_weight, opt.kf_process, opt.kf_2d_meas, opt.kf_3d_meas, opt.initial_uncertainty), calib=dataset.calib, dummy_node_cost_iou=opt.dummy_node_cost_iou, dummy_node_cost_app=opt.dummy_node_cost_app, nn_budget=opt.nn_budget, use_imm=opt.use_imm, uncertainty_limit=opt.uncertainty_limit, gate_limit=opt.gate_limit, omni=opt.omni) else: tracker = Tracker(appearance_model=appearance_model, cuda=opt.using_cuda, JPDA = opt.JPDA, m_best_sol=opt.m_best_sol, max_age = opt.max_age, n_init=opt.n_init, assn_thresh=opt.assn_thresh, matching_strategy=opt.matching_strategy, kf_appearance_feature=opt.kf_appearance_feature, gate_full_state=opt.gate_full_state, kf_vel_params=(opt.pos_weight, opt.vel_weight, opt.kf_process, opt.kf_2d_meas, opt.initial_uncertainty), kf_walk_params=(opt.pos_weight, opt.vel_weight, opt.kf_process, opt.kf_2d_meas, opt.initial_uncertainty), calib=dataset.calib, dummy_node_cost_iou=opt.dummy_node_cost_iou, dummy_node_cost_app=opt.dummy_node_cost_app, nn_budget=opt.nn_budget, use_imm=opt.use_imm, uncertainty_limit=opt.uncertainty_limit, optical_flow=opt.optical_flow_initiation, gate_limit=opt.gate_limit) results = [] results_3d = [] n_frames = len(dataloader) if opt.log_data: full_log = [{'tracks':[], 'detections':[], 'detections_3d':[]} for _ in range(n_frames)] det_matrix = None seq_name = os.path.split(opt.sequence_folder)[-1] frame_times = [] if opt.verbose: print("------------------------") print("BEGINNING TRACKING OF SEQUENCE %s"%seq_name) print("------------------------") for frame_idx, img_path, input_img, point_cloud in tqdm(dataloader, ncols = 100, disable=not opt.verbose): # if frame_idx > 120: # break # elif frame_idx < 98: # continue if opt.log_data: full_log[frame_idx]['img_path'] = copy.copy(img_path) input_img = input_img.type(Tensor) if opt.reference: detections, object_ids, det_matrix = read_ground_truth_2d_detections(os.path.join(opt.sequence_folder,'det',opt.ref_det+'.txt'), frame_idx, det_matrix, threshold = 0, nms_threshold = opt.nms_thresh) elif opt.ground_truth: detections, object_ids, det_matrix = read_ground_truth_2d_detections(os.path.join(opt.sequence_folder,'gt','gt.txt'), frame_idx, det_matrix, nms_threshold = opt.nms_thresh) else: raise("Must specify ground truth or detections") # --- START OF TRACKING --- # start_time = time.time() if detections is None or len(detections)==0: tracker.predict() if opt.log_data: full_log[frame_idx]['predicted_tracks'] = copy.deepcopy(tracker.tracks) start_time = time.time() tracker.update(input_img, []) else: total_dets = len(detections) patches = get_image_patches(input_img, detections) appearance_features = generate_features_batched(appearance_model, patches, opt, object_ids) if opt.point_cloud: if not opt.omni: point_cloud = point_cloud[point_cloud[:,2]>=0] if opt.fpointnet: boxes_3d, valid_3d, _, scores_3d, depth_features = generate_detections_3d(depth_model, detections, np.asarray(point_cloud), dataset.calib, input_img.shape, peds='ped' in opt.ref_det or opt.omni) depth_features = convert_depth_features(depth_features, valid_3d) else: boxes_3d, valid_3d = read_ground_truth_3d_detections(os.path.join(opt.sequence_folder,'gt','3d_detections.txt'), frame_idx) features, appearance_features = combine_features(appearance_features, depth_features, valid_3d, combination_model, depth_weight = opt.depth_weight) # boxes_3d = boxes_3d[valid_3d != -1] # Old and buggy way of handling missing box # detections = detections[valid_3d != -1] if np.any(valid_3d == -1): compare_2d = True else: compare_2d = False if len(boxes_3d) > 0: detections_3d = [] for idx, box in enumerate(boxes_3d): if valid_3d[idx] == -1: detections_3d.append(None) else: detections_3d.append(np.array(box).astype(np.float32)) else: detections_3d = None else: appearance_features = [appearance_features[i] for i in range(total_dets)] features = [None]*len(appearance_features) compare_2d = True detections_3d = None detections = convert_detections(detections, features, appearance_features, detections_3d) tracker.predict() if opt.log_data: full_log[frame_idx]['predicted_tracks'] = copy.deepcopy(tracker.tracks) start_time = time.time() tracker.update(input_img, detections, compare_2d) # --- END OF TRACKING --- end_time = time.time() frame_times.append(end_time - start_time) if opt.log_data: full_tracks = copy.deepcopy(tracker.tracks) temp_tracks = [] for track in full_tracks: bbox = track.to_tlwh(None) if not (bbox[0] < 0-10 or bbox[1] < 0-10 or bbox[0] + bbox[2] > input_img.shape[2]+10 or bbox[1] + bbox[3] > input_img.shape[1]+10): temp_tracks.append(track) full_log[frame_idx]['tracks'] = temp_tracks full_log[frame_idx]['detections'] = copy.deepcopy(detections) for track in tracker.tracks: if opt.track_3d: bbox_3d = track.to_tlwh3d() else: bbox = track.to_tlwh(None) if bbox[0] < 0-10 or bbox[1] < 0-10 or bbox[0] + bbox[2] > input_img.shape[2]+10 or bbox[1] + bbox[3] > input_img.shape[1]+10: continue bbox[0] = max(0,bbox[0]) # Frame adjustments bbox[1] = max(0,bbox[1]) bbox[2] = min(bbox[0]+bbox[2], input_img.shape[2])-bbox[0] bbox[3] = min(bbox[1]+bbox[3], input_img.shape[1])-bbox[1] track_status = 1 if not track.is_confirmed(): # or track.time_since_update > 0: if opt.near_online: if not track.is_confirmed(): track_status = 0 else: track_status = 2 continue else: continue if opt.near_online: if opt.track_3d: results_3d.append([frame_idx, track.track_id, bbox_3d[0], bbox_3d[1], bbox_3d[2], bbox_3d[3], bbox_3d[4], bbox_3d[5], bbox_3d[6], track_status]) else: results.append([frame_idx, track.track_id, bbox[0], bbox[1], bbox[2], bbox[3], track_status]) if track_status == 1: #updates 0s for row_i in range(len(results)): if results[row_i][1] == track.track_id: results[row_i][6] = 1 if opt.point_cloud: if results_3d[row_i][1] == track.track_id: results_3d[row_i][7] = 1 else: if opt.track_3d: results_3d.append([frame_idx, track.track_id, bbox_3d[0], bbox_3d[1], bbox_3d[2], bbox_3d[3], bbox_3d[4], bbox_3d[5], bbox_3d[6]]) else: results.append([frame_idx, track.track_id, bbox[0], bbox[1], bbox[2], bbox[3]]) # if opt.point_cloud: frame_times = np.asarray(frame_times) if opt.verbose: print("------------------------") print("COMPLETED TRACKING, SAVING RESULTS") print("------------------------") print('\n\n','Total Tracking Time:',np.sum(frame_times),'Average Time Per Frame:',np.mean(frame_times)) if opt.track_3d: output_file_3d = os.path.join(opt.output_folder, seq_name+"_3d.txt") if len(results_3d) > 0: with open(output_file_3d, 'w+') as f: for row in results_3d: if opt.near_online and row[9] != 1: continue print('%d,%d,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%.4f,1,1,1,-1' % ( row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8]), file=f) else: output_file = os.path.join(opt.output_folder, seq_name+".txt") if len(results) > 0: with open(output_file, 'w+') as f: for row in results: if opt.near_online and row[6] != 1: continue print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,1,1,-1' % ( row[0], row[1], row[2], row[3], row[4], row[5]), file=f) if opt.log_data: output_file = os.path.join(opt.output_folder, seq_name+".p") with open(output_file, 'wb') as f: pickle.dump(full_log, f) if __name__=='__main__': opt = parse_arguments() main(opt) ================================================ FILE: paper_experiments/utils/EKF.py ================================================ # vim: expandtab:ts=4:sw=4 import numpy as np import scipy.linalg import pdb """ Table for the 0.95 quantile of the chi-square distribution with N degrees of freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv function and used as Mahalanobis gating threshold. """ chi2inv95 = { 1: 3.8415, 2: 5.9915, 3: 7.8147, 4: 9.4877, 5: 11.070, 6: 12.592, 7: 14.067, 8: 15.507, 9: 16.919} chi2inv90 = { 1: 2.706, 2: 4.605, 3: 6.251, 4: 7.779, 5: 9.236, 6: 10.645, 7: 12.017, 8: 13.363, 9: 14.684} chi2inv975 = { 1: 5.025, 2: 7.378, 3: 9.348, 4: 11.143, 5: 12.833, 6: 14.449, 7: 16.013, 8: 17.535, 9: 19.023} chi2inv10 = { 1: .016, 2: .221, 3: .584, 4: 1.064, 5: 1.610, 6: 2.204, 7: 2.833, 8: 3.490, 9: 4.168} chi2inv995 = { 1: 0.0000393, 2: 0.0100, 3: .0717, 4: .207, 5: .412, 6: .676, 7: .989, 8: 1.344, 9: 1.735} chi2inv75 = { 1: 1.323, 2: 2.773, 3: 4.108, 4: 5.385, 5: 6.626, 6: 7.841, 7: 9.037, 8: 10.22, 9: 11.39} def squared_mahalanobis_distance(mean, covariance, measurements): # cholesky factorization used to solve for # z = d * inv(covariance) # so z is also the solution to # covariance * z = d d = measurements - mean # Note: The cholesky factorization is giving weird answers. This is marginally slower but correct return np.matmul(np.matmul(d, np.linalg.inv(covariance)), d.T).diagonal() # print("Measurements:", measurements) # print("Mean:", mean) # print("dshape:", d.shape, "d:", d) # print("d*inv(cov)", np.matmul(d, np.linalg.inv(covariance))) cholesky_factor = np.linalg.cholesky(covariance) z = scipy.linalg.solve_triangular( cholesky_factor, d.T, lower=True, check_finite=False, overwrite_b=True) squared_maha = np.sum(z * (measurements-mean).T, axis=0) # print("Squared maha dist:", squared_maha) # print("cov:", covariance) # print("z", z, '\n') return squared_maha class EKF(object): """ Generic extended kalman filter class """ def __init__(self): pass def initiate(self, measurement): """Create track from unassociated measurement. Parameters ---------- measurement : ndarray Returns ------- (ndarray, ndarray) Returns the mean vector and covariance matrix of the new track. Unobserved velocities are initialized to 0 mean. """ pass def predict_mean(self, mean): # Updates predicted state from previous state (function g) # Calculates motion update Jacobian (Gt) # Returns (g(mean), Gt) pass def get_process_noise(self, mean, covariance): # Returns Rt the motion noise covariance pass def predict_covariance(self, mean, covariance): pass def project_mean(self, mean): # Measurement prediction from state (function h) # Calculations sensor update Jacobian (Ht) # Returns (h(mean), Ht) pass def project_cov(self, mean, covariance): pass def predict(self, mean, covariance, last_detection, next_to_last_detection): """Run Kalman filter prediction step. Parameters ---------- mean : ndarray The mean vector of the object state at the previous time step. covariance : ndarray The covariance matrix of the object state at the previous time step. Returns ------- (ndarray, ndarray) Returns the mean vector and covariance matrix of the predicted state. Unobserved velocities are initialized to 0 mean. """ # Perform prediction covariance = self.predict_covariance(mean, covariance, last_detection, next_to_last_detection) mean = self.predict_mean(mean) return mean, covariance def get_innovation_cov(self, covariance): pass def project(self, mean, covariance): """Project state distribution to measurement space. Parameters ---------- mean : ndarray The state's mean vector covariance : ndarray The state's covariance matrix Returns ------- (ndarray, ndarray) Returns the projected mean and covariance matrix of the given state estimate. """ # Measurement uncertainty scaled by estimated height return self.project_mean(mean), self.project_cov(mean, covariance) def update(self, mean, covariance, measurement_t, marginalization=None, JPDA=False): """Run Kalman filter correction step. Parameters ---------- mean : ndarray The predicted state's mean vector (8 dimensional). covariance : ndarray The state's covariance matrix (8x8 dimensional). measurement : ndarray The 4 dimensional measurement vector (x, y, a, h), where (x, y) is the center position, a the aspect ratio, and h the height of the bounding box. Returns ------- (ndarray, ndarray) Returns the measurement-corrected state distribution. """ predicted_measurement, innovation_cov = self.project(mean, covariance) # cholesky factorization used to solve for kalman gain since # K = covariance * update_mat.T * inv(innovation_cov) # so K is also the solution to # innovation_cov * K = covariance * update_mat.T try: chol_factor, lower = scipy.linalg.cho_factor( innovation_cov, lower=True, check_finite=False) kalman_gain = scipy.linalg.cho_solve( (chol_factor, lower), np.dot(covariance, self._observation_mat.T).T, check_finite=False).T except: # in case cholesky factorization fails, revert to standard solver kalman_gain = np.linalg.solve(innovation_cov, np.dot(covariance, self._observation_mat.T).T).T if JPDA: # marginalization innovation = np.zeros((self.ndim)) cov_soft = np.zeros((self.ndim, self.ndim)) for measurement_idx, measurement in enumerate(measurement_t): p_ij = marginalization[measurement_idx + 1] # + 1 for dummy y_ij = measurement - predicted_measurement innovation += y_ij * p_ij cov_soft += p_ij * np.outer(y_ij, y_ij) cov_soft = cov_soft - np.outer(innovation, innovation) P_star = covariance - np.linalg.multi_dot(( kalman_gain, innovation_cov, kalman_gain.T)) p_0 = marginalization[0] P_0 = p_0 * covariance + (1 - p_0) * P_star new_covariance = P_0 + np.linalg.multi_dot((kalman_gain, cov_soft, kalman_gain.T)) else: innovation = measurement_t - predicted_measurement new_covariance = covariance - np.linalg.multi_dot(( kalman_gain, innovation_cov, kalman_gain.T)) new_mean = mean + np.dot(innovation, kalman_gain.T) return new_mean, new_covariance ================================================ FILE: paper_experiments/utils/JPDA_matching.py ================================================ # vim: expandtab:ts=4:sw=4 from __future__ import absolute_import import numpy as np from linear_assignment import min_marg_matching import pdb def get_unmatched(all_idx, matches, i, marginalization=None): assigned = [match[i] for match in matches] unmatched = set(all_idx) - set(assigned) if marginalization is not None: # from 1 for dummy node in_gate_dets = np.nonzero(np.sum( marginalization[:, 1:], axis=0))[0].tolist() # unmatched = [d for d in unmatched if d not in in_gate_dets] # TODO: Filter by gate? return list(unmatched) class Matcher: def __init__(self, detections, marginalizations, confirmed_tracks, matching_strategy, assignment_threshold=None): self.detections = detections self.marginalizations = marginalizations self.confirmed_tracks = confirmed_tracks self.assignment_threshold = assignment_threshold self.detection_indices = np.arange(len(detections)) self.matching_strategy = matching_strategy def match(self): self.get_matches() self.get_unmatched_tracks() self.get_unmatched_detections() return self.matches, self.unmatched_tracks, self.unmatched_detections def get_matches(self): if self.matching_strategy == "max_and_threshold": self.max_and_threshold_matching() elif self.matching_strategy == "hungarian": self.hungarian() elif self.matching_strategy == "max_match": self.max_match() elif self.matching_strategy == "none": self.matches = [] else: raise Exception('Unrecognized matching strategy: {}'. format(self.matching_strategy)) def get_unmatched_tracks(self): self.unmatched_tracks = get_unmatched(self.confirmed_tracks, self.matches, 0) def get_unmatched_detections(self): self.unmatched_detections = get_unmatched(self.detection_indices, self.matches, 1, self.marginalizations) def max_match(self): self.matches = [] if self.marginalizations.shape[0] == 0: return detection_map = {} for i, track_idx in enumerate(self.confirmed_tracks): marginalization = self.marginalizations[i,:] detection_id = np.argmax(marginalization) - 1 # subtract one for dummy if detection_id < 0: continue if detection_id not in detection_map.keys(): detection_map[detection_id] = track_idx else: cur_track = detection_map[detection_id] track_update = track_idx if self.marginalizations[track_idx, detection_id] > self.marginalizations[cur_track, detection_id] else cur_track detection_map[detection_id] = track_update threshold_p = marginalization[detection_id + 1] if threshold_p < self.assignment_threshold: continue for detection in detection_map.keys(): self.matches.append((detection_map[detection], detection)) def max_and_threshold_matching(self): self.matches = [] if self.marginalizations.shape[0] == 0: return for i, track_idx in enumerate(self.confirmed_tracks): marginalization = self.marginalizations[i,:] detection_id = np.argmax(marginalization) - 1 # subtract one for dummy if detection_id < 0: continue threshold_p = marginalization[detection_id + 1] if threshold_p < self.assignment_threshold: continue self.matches.append((track_idx, detection_id)) def hungarian(self): self.matches, _, _ = min_marg_matching(self.marginalizations, self.confirmed_tracks, self.assignment_threshold) ================================================ FILE: paper_experiments/utils/aligned_reid_utils.py ================================================ from __future__ import print_function import os import os.path as osp import pickle from scipy import io import datetime import time from contextlib import contextmanager import numpy as np from PIL import Image import torch from torch.autograd import Variable from models.aligned_reid_model import Model as aligned_reid_model from models.deep_sort_model import ImageEncoder as deep_sort_model from utils.resnet_reid_utils import ResNet_Loader def time_str(fmt=None): if fmt is None: fmt = '%Y-%m-%d_%H:%M:%S' return datetime.datetime.today().strftime(fmt) def load_pickle(path): """Check and load pickle object. According to this post: https://stackoverflow.com/a/41733927, cPickle and disabling garbage collector helps with loading speed.""" assert osp.exists(path) # gc.disable() with open(path, 'rb') as f: ret = pickle.load(f) # gc.enable() return ret def save_pickle(obj, path): """Create dir and save file.""" may_make_dir(osp.dirname(osp.abspath(path))) with open(path, 'wb') as f: pickle.dump(obj, f, protocol=2) def save_mat(ndarray, path): """Save a numpy ndarray as .mat file.""" io.savemat(path, dict(ndarray=ndarray)) def to_scalar(vt): """Transform a length-1 pytorch Variable or Tensor to scalar. Suppose tx is a torch Tensor with shape tx.size() = torch.Size([1]), then npx = tx.cpu().numpy() has shape (1,), not 1.""" if isinstance(vt, Variable): return vt.data.cpu().numpy().flatten()[0] if torch.is_tensor(vt): return vt.cpu().numpy().flatten()[0] raise TypeError('Input should be a variable or tensor') def transfer_optim_state(state, device_id=-1): """Transfer an optimizer.state to cpu or specified gpu, which means transferring tensors of the optimizer.state to specified device. The modification is in place for the state. Args: state: An torch.optim.Optimizer.state device_id: gpu id, or -1 which means transferring to cpu """ for key, val in state.items(): if isinstance(val, dict): transfer_optim_state(val, device_id=device_id) elif isinstance(val, Variable): raise RuntimeError("Oops, state[{}] is a Variable!".format(key)) elif isinstance(val, torch.nn.Parameter): raise RuntimeError("Oops, state[{}] is a Parameter!".format(key)) else: try: if device_id == -1: state[key] = val.cpu() else: state[key] = val.cuda(device=device_id) except: pass def may_transfer_optims(optims, device_id=-1): """Transfer optimizers to cpu or specified gpu, which means transferring tensors of the optimizer to specified device. The modification is in place for the optimizers. Args: optims: A list, which members are either torch.nn.optimizer or None. device_id: gpu id, or -1 which means transferring to cpu """ for optim in optims: if isinstance(optim, torch.optim.Optimizer): transfer_optim_state(optim.state, device_id=device_id) def may_transfer_modules_optims(modules_and_or_optims, device_id=-1): """Transfer optimizers/modules to cpu or specified gpu. Args: modules_and_or_optims: A list, which members are either torch.nn.optimizer or torch.nn.Module or None. device_id: gpu id, or -1 which means transferring to cpu """ for item in modules_and_or_optims: if isinstance(item, torch.optim.Optimizer): transfer_optim_state(item.state, device_id=device_id) elif isinstance(item, torch.nn.Module): if device_id == -1: item.cpu() else: item.cuda(device=device_id) elif item is not None: print('[Warning] Invalid type {}'.format(item.__class__.__name__)) class TransferVarTensor(object): """Return a copy of the input Variable or Tensor on specified device.""" def __init__(self, device_id=-1): self.device_id = device_id def __call__(self, var_or_tensor): return var_or_tensor.cpu() if self.device_id == -1 \ else var_or_tensor.cuda(self.device_id) class TransferModulesOptims(object): """Transfer optimizers/modules to cpu or specified gpu.""" def __init__(self, device_id=-1): self.device_id = device_id def __call__(self, modules_and_or_optims): may_transfer_modules_optims(modules_and_or_optims, self.device_id) def set_devices(sys_device_ids): """ It sets some GPUs to be visible and returns some wrappers to transferring Variables/Tensors and Modules/Optimizers. Args: sys_device_ids: a tuple; which GPUs to use e.g. sys_device_ids = (), only use cpu sys_device_ids = (3,), use the 4th gpu sys_device_ids = (0, 1, 2, 3,), use first 4 gpus sys_device_ids = (0, 2, 4,), use the 1st, 3rd and 5th gpus Returns: TVT: a `TransferVarTensor` callable TMO: a `TransferModulesOptims` callable """ # Set the CUDA_VISIBLE_DEVICES environment variable import os visible_devices = '' for i in sys_device_ids: visible_devices += '{}, '.format(i) os.environ['CUDA_VISIBLE_DEVICES'] = visible_devices # Return wrappers. # Models and user defined Variables/Tensors would be transferred to the # first device. device_id = 0 if len(sys_device_ids) > 0 else -1 TVT = TransferVarTensor(device_id) TMO = TransferModulesOptims(device_id) return TVT, TMO def set_devices_for_ml(sys_device_ids): """This version is for mutual learning. It sets some GPUs to be visible and returns some wrappers to transferring Variables/Tensors and Modules/Optimizers. Args: sys_device_ids: a tuple of tuples; which devices to use for each model, len(sys_device_ids) should be equal to number of models. Examples: sys_device_ids = ((-1,), (-1,)) the two models both on CPU sys_device_ids = ((-1,), (2,)) the 1st model on CPU, the 2nd model on GPU 2 sys_device_ids = ((3,),) the only one model on the 4th gpu sys_device_ids = ((0, 1), (2, 3)) the 1st model on GPU 0 and 1, the 2nd model on GPU 2 and 3 sys_device_ids = ((0,), (0,)) the two models both on GPU 0 sys_device_ids = ((0,), (0,), (1,), (1,)) the 1st and 2nd model on GPU 0, the 3rd and 4th model on GPU 1 Returns: TVTs: a list of `TransferVarTensor` callables, one for one model. TMOs: a list of `TransferModulesOptims` callables, one for one model. relative_device_ids: a list of lists; `sys_device_ids` transformed to relative ids; to be used in `DataParallel` """ import os all_ids = [] for ids in sys_device_ids: all_ids += ids unique_sys_device_ids = list(set(all_ids)) unique_sys_device_ids.sort() if -1 in unique_sys_device_ids: unique_sys_device_ids.remove(-1) # Set the CUDA_VISIBLE_DEVICES environment variable visible_devices = '' for i in unique_sys_device_ids: visible_devices += '{}, '.format(i) os.environ['CUDA_VISIBLE_DEVICES'] = visible_devices # Return wrappers relative_device_ids = [] TVTs, TMOs = [], [] for ids in sys_device_ids: relative_ids = [] for id in ids: if id != -1: id = find_index(unique_sys_device_ids, id) relative_ids.append(id) relative_device_ids.append(relative_ids) # Models and user defined Variables/Tensors would be transferred to the # first device. TVTs.append(TransferVarTensor(relative_ids[0])) TMOs.append(TransferModulesOptims(relative_ids[0])) return TVTs, TMOs, relative_device_ids def load_ckpt(modules_optims, ckpt_file, load_to_cpu=True, verbose=True): """Load state_dict's of modules/optimizers from file. Args: modules_optims: A list, which members are either torch.nn.optimizer or torch.nn.Module. ckpt_file: The file path. load_to_cpu: Boolean. Whether to transform tensors in modules/optimizers to cpu type. """ map_location = (lambda storage, loc: storage) if load_to_cpu else None ckpt = torch.load(ckpt_file, map_location=map_location) for m, sd in zip(modules_optims, ckpt['state_dicts']): if 'fc.weight' in sd: del sd['fc.weight'] if 'fc.bias' in sd: del sd['fc.bias'] load_state_dict(m, sd) if verbose: print('Resume from ckpt {}, \nepoch {}, \nscores {}'.format( ckpt_file, ckpt['ep'], ckpt['scores'])) return ckpt['ep'], ckpt['scores'] def save_ckpt(modules_optims, ep, scores, ckpt_file): """Save state_dict's of modules/optimizers to file. Args: modules_optims: A list, which members are either torch.nn.optimizer or torch.nn.Module. ep: the current epoch number scores: the performance of current model ckpt_file: The file path. Note: torch.save() reserves device type and id of tensors to save, so when loading ckpt, you have to inform torch.load() to load these tensors to cpu or your desired gpu, if you change devices. """ state_dicts = [m.state_dict() for m in modules_optims] ckpt = dict(state_dicts=state_dicts, ep=ep, scores=scores) may_make_dir(osp.dirname(osp.abspath(ckpt_file))) torch.save(ckpt, ckpt_file) def load_state_dict(model, src_state_dict): """Copy parameters and buffers from `src_state_dict` into `model` and its descendants. The `src_state_dict.keys()` NEED NOT exactly match `model.state_dict().keys()`. For dict key mismatch, just skip it; for copying error, just output warnings and proceed. Arguments: model: A torch.nn.Module object. src_state_dict (dict): A dict containing parameters and persistent buffers. Note: This is modified from torch.nn.modules.module.load_state_dict(), to make the warnings and errors more detailed. """ from torch.nn import Parameter dest_state_dict = model.state_dict() for name, param in src_state_dict.items(): ### CHANGED HERE FOR FINE TUNING if name not in dest_state_dict: continue if isinstance(param, Parameter): # backwards compatibility for serialized parameters param = param.data try: dest_state_dict[name].copy_(param) except Exception as e: print("Warning: Error occurs when copying '{}': {}" .format(name, str(e))) # src_missing = set(dest_state_dict.keys()) - set(src_state_dict.keys()) # if len(src_missing) > 0: # print("Keys not found in source state_dict: ") # for n in src_missing: # print('\t', n) # dest_missing = set(src_state_dict.keys()) - set(dest_state_dict.keys()) # if len(dest_missing) > 0: # print("Keys not found in destination state_dict: ") # for n in dest_missing: # print('\t', n) def is_iterable(obj): return hasattr(obj, '__len__') def may_set_mode(maybe_modules, mode): """maybe_modules: an object or a list of objects.""" assert mode in ['train', 'eval'] if not is_iterable(maybe_modules): maybe_modules = [maybe_modules] for m in maybe_modules: if isinstance(m, torch.nn.Module): if mode == 'train': m.train() else: m.eval() def may_make_dir(path): """ Args: path: a dir, or result of `osp.dirname(osp.abspath(file_path))` Note: `osp.exists('')` returns `False`, while `osp.exists('.')` returns `True`! """ # This clause has mistakes: # if path is None or '': if path in [None, '']: return if not osp.exists(path): os.makedirs(path) class AverageMeter(object): """Modified from Tong Xiao's open-reid. Computes and stores the average and current value""" def __init__(self): self.val = 0 self.avg = 0 self.sum = 0 self.count = 0 def reset(self): self.val = 0 self.avg = 0 self.sum = 0 self.count = 0 def update(self, val, n=1): self.val = val self.sum += val * n self.count += n self.avg = float(self.sum) / (self.count + 1e-20) class RunningAverageMeter(object): """Computes and stores the running average and current value""" def __init__(self, hist=0.99): self.val = None self.avg = None self.hist = hist def reset(self): self.val = None self.avg = None def update(self, val): if self.avg is None: self.avg = val else: self.avg = self.avg * self.hist + val * (1 - self.hist) self.val = val class RecentAverageMeter(object): """Stores and computes the average of recent values.""" def __init__(self, hist_size=100): self.hist_size = hist_size self.fifo = [] self.val = 0 def reset(self): self.fifo = [] self.val = 0 def update(self, val): self.val = val self.fifo.append(val) if len(self.fifo) > self.hist_size: del self.fifo[0] @property def avg(self): assert len(self.fifo) > 0 return float(sum(self.fifo)) / len(self.fifo) def get_model_wrapper(model, multi_gpu): from torch.nn.parallel import DataParallel if multi_gpu: return DataParallel(model) else: return model class ReDirectSTD(object): """Modified from Tong Xiao's `Logger` in open-reid. This class overwrites sys.stdout or sys.stderr, so that console logs can also be written to file. Args: fpath: file path console: one of ['stdout', 'stderr'] immediately_visible: If `False`, the file is opened only once and closed after exiting. In this case, the message written to file may not be immediately visible (Because the file handle is occupied by the program?). If `True`, each writing operation of the console will open, write to, and close the file. If your program has tons of writing operations, the cost of opening and closing file may be obvious. (?) Usage example: `ReDirectSTD('stdout.txt', 'stdout', False)` `ReDirectSTD('stderr.txt', 'stderr', False)` NOTE: File will be deleted if already existing. Log dir and file is created lazily -- if no message is written, the dir and file will not be created. """ def __init__(self, fpath=None, console='stdout', immediately_visible=False): import sys import os import os.path as osp assert console in ['stdout', 'stderr'] self.console = sys.stdout if console == 'stdout' else sys.stderr self.file = fpath self.f = None self.immediately_visible = immediately_visible if fpath is not None: # Remove existing log file. if osp.exists(fpath): os.remove(fpath) # Overwrite if console == 'stdout': sys.stdout = self else: sys.stderr = self def __del__(self): self.close() def __enter__(self): pass def __exit__(self, *args): self.close() def write(self, msg): self.console.write(msg) if self.file is not None: may_make_dir(os.path.dirname(osp.abspath(self.file))) if self.immediately_visible: with open(self.file, 'a') as f: f.write(msg) else: if self.f is None: self.f = open(self.file, 'w') self.f.write(msg) def flush(self): self.console.flush() if self.f is not None: self.f.flush() import os os.fsync(self.f.fileno()) def close(self): self.console.close() if self.f is not None: self.f.close() def set_seed(seed): import random random.seed(seed) print('setting random-seed to {}'.format(seed)) import numpy as np np.random.seed(seed) print('setting np-random-seed to {}'.format(seed)) import torch torch.backends.cudnn.enabled = False print('cudnn.enabled set to {}'.format(torch.backends.cudnn.enabled)) # set seed for CPU torch.manual_seed(seed) print('setting torch-seed to {}'.format(seed)) def print_array(array, fmt='{:.2f}', end=' '): """Print a 1-D tuple, list, or numpy array containing digits.""" s = '' for x in array: s += fmt.format(float(x)) + end s += '\n' print(s) return s # Great idea from https://github.com/amdegroot/ssd.pytorch def str2bool(v): return v.lower() in ("yes", "true", "t", "1") def tight_float_str(x, fmt='{:.4f}'): return fmt.format(x).rstrip('0').rstrip('.') def find_index(seq, item): for i, x in enumerate(seq): if item == x: return i return -1 def adjust_lr_exp(optimizer, base_lr, ep, total_ep, start_decay_at_ep): """Decay exponentially in the later phase of training. All parameters in the optimizer share the same learning rate. Args: optimizer: a pytorch `Optimizer` object base_lr: starting learning rate ep: current epoch, ep >= 1 total_ep: total number of epochs to train start_decay_at_ep: start decaying at the BEGINNING of this epoch Example: base_lr = 2e-4 total_ep = 300 start_decay_at_ep = 201 It means the learning rate starts at 2e-4 and begins decaying after 200 epochs. And training stops after 300 epochs. NOTE: It is meant to be called at the BEGINNING of an epoch. """ assert ep >= 1, "Current epoch number should be >= 1" if ep < start_decay_at_ep: return for g in optimizer.param_groups: g['lr'] = (base_lr * (0.001 ** (float(ep + 1 - start_decay_at_ep) / (total_ep + 1 - start_decay_at_ep)))) print('=====> lr adjusted to {:.10f}'.format(g['lr']).rstrip('0')) def adjust_lr_staircase(optimizer, base_lr, ep, decay_at_epochs, factor): """Multiplied by a factor at the BEGINNING of specified epochs. All parameters in the optimizer share the same learning rate. Args: optimizer: a pytorch `Optimizer` object base_lr: starting learning rate ep: current epoch, ep >= 1 decay_at_epochs: a list or tuple; learning rate is multiplied by a factor at the BEGINNING of these epochs factor: a number in range (0, 1) Example: base_lr = 1e-3 decay_at_epochs = [51, 101] factor = 0.1 It means the learning rate starts at 1e-3 and is multiplied by 0.1 at the BEGINNING of the 51'st epoch, and then further multiplied by 0.1 at the BEGINNING of the 101'st epoch, then stays unchanged till the end of training. NOTE: It is meant to be called at the BEGINNING of an epoch. """ assert ep >= 1, "Current epoch number should be >= 1" if ep not in decay_at_epochs: return ind = find_index(decay_at_epochs, ep) for g in optimizer.param_groups: g['lr'] = base_lr * factor ** (ind + 1) print('=====> lr adjusted to {:.10f}'.format(g['lr']).rstrip('0')) @contextmanager def measure_time(enter_msg): st = time.time() print(enter_msg) yield print('Done, {:.2f}s'.format(time.time() - st)) # @profile def generate_features(appearance_model, patches, opt, object_ids = None): Tensor = torch.cuda.FloatTensor if opt.using_cuda else torch.FloatTensor features = [] for i, patch in enumerate(patches): if patch is None or patch.nelement()==0: features.append(None) continue patch = patch.unsqueeze(0) if opt.perfect: feature = torch.zeros(1024) feature[object_ids[i]] = 1 feature = feature.type(Tensor) else: if opt.appearance_model == 'aligned_reid': with torch.no_grad(): feature ,_ = appearance_model(patch.cuda()) feature = feature.squeeze(0).type(Tensor) elif opt.appearance_model == 'deepsort': patch = patch.permute(0,2,3,1).cpu().numpy() feature = appearance_model(patch) feature = feature[0] elif opt.appearance_model == 'resnet_reid': patch = patch.permute(0,2,3,1) feature = appearance_model.inference([patch.squeeze(0)]) feature = feature[0][0].type(Tensor) features.append(feature) return features # @profile def generate_features_batched(appearance_model, patches, opt, object_ids = None): if opt.perfect or opt.appearance_model == 'deepsort': # Do old/slow way if perfect features or deepsort features return generate_features(appearance_model, patches, opt, object_ids) if opt.appearance_model == 'resnet_reid': Tensor = torch.cuda.FloatTensor if opt.using_cuda else torch.FloatTensor features = [] resnet_patches = [] for i, patch in enumerate(patches): if patch is None or patch.nelement()==0: features.append(None) else: features.append(1) resnet_patches.append(patch.permute(1,2,0)) resnet_features = appearance_model.inference(resnet_patches) ctr = 0 for i in range(len(features)): if features[i] is not None: features[i] = resnet_features[ctr].type(Tensor) ctr += 1 return features elif opt.appearance_model == 'aligned_reid': return generate_features(appearance_model, patches, opt, object_ids) #TODO: Fix batched appearance features. This currently gives bad features Tensor = torch.cuda.FloatTensor if opt.using_cuda else torch.FloatTensor maxx = -1 maxy = -1 idxs = [] features = [] for i, patch in enumerate(patches): if patch is None or patch.nelement()==0: continue maxx = max(maxx, patch.size()[1]) maxy = max(maxy, patch.size()[2]) idxs.append(i) if(maxx==-1 and maxy==-1): return features batch = torch.zeros(len(idxs),3,maxx,maxy).cuda() for i, idx in enumerate(idxs): patch = patches[idx] patchx = patch.size()[1] patchy = patch.size()[2] batch[i,:,:patchx,:patchy] = patch with torch.no_grad(): features_torch, _ = appearance_model(batch) features_torch = features_torch.type(Tensor) i = 0 ctr = 0 for idx in idxs: while(i < idx): features.append(None) i+=1 features.append(features_torch[ctr,:]) i+=1 ctr+=1 while(i conf_threshold] ######## det_frames = detections[:, 0] det_confidence = detections[:, 6] gt_boxes = np.asarray(list(zip(gt[:, 2], gt[:, 3], gt[:, 4], gt[:, 5]))) det_boxes = np.asarray(list(zip(detections[:, 2], detections[:, 3], detections[:, 4], detections[:, 5]))) out_matrix = [] assigned_ids = [] for frame in np.unique(det_frames): frame_mask_det = det_frames == frame frame_mask_gt = gt_frames == frame gt_ids = gt[frame_mask_gt, 1] frame_gt_boxes = gt_boxes[frame_mask_gt] frame_det_boxes = det_boxes[frame_mask_det] for i, det_box in enumerate(frame_det_boxes): iou_list = np.asarray([iou(gt_box, det_box) for gt_box in frame_gt_boxes]) iou_sorted = np.argsort(iou_list) positive_idx = np.where(iou_list >= iou_threshold)[0] if len(positive_idx)==0: assigned_ids.append(-1) else: assigned_ids.append(gt_ids[iou_sorted[-1]]) assigned_ids = np.expand_dims(np.asarray(assigned_ids), 1) try: out_matrix = np.hstack([np.expand_dims(detections[:,0], 1), assigned_ids, detections[:,2:]]) except: pdb.set_trace() np.savetxt(detection_path, out_matrix, delimiter=',', fmt = '%.2f') return if __name__=='__main__': ap = [] KITTI_root = 'data/KITTI/sequences' for sequence in tqdm(range(21)): assign_detection_id(os.path.join(KITTI_root, '%.4d'%sequence, 'det','rrc_subcnn_car_det.txt'), os.path.join(KITTI_root, '%.4d'%sequence, 'gt', 'gt_car.txt')) ================================================ FILE: paper_experiments/utils/calibration.py ================================================ import numpy as np import cv2 import os import yaml import torch import pdb class Calibration(object): ''' Calibration matrices and utils 3d XYZ in