[
  {
    "path": "CMakeLists.txt",
    "content": "cmake_minimum_required(VERSION 2.8.3)\nproject(jpda_rospack)\n\n## Compile as C++11, supported in ROS Kinetic and newer\n# add_compile_options(-std=c++11)\n\n## Find catkin macros and libraries\n## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz)\n## is used, also find other catkin packages\nfind_package(catkin REQUIRED COMPONENTS\n  roscpp\n  rospy\n  std_msgs\n  vision_msgs\n  message_generation\n)\n\n## System dependencies are found with CMake's conventions\n# find_package(Boost REQUIRED COMPONENTS system)\n\n\n## Uncomment this if the package has a setup.py. This macro ensures\n## modules and global scripts declared therein get installed\n## See http://ros.org/doc/api/catkin/html/user_guide/setup_dot_py.html\n# catkin_python_setup()\n\n################################################\n## Declare ROS messages, services and actions ##\n################################################\n\n## To declare and build messages, services or actions from within this\n## package, follow these steps:\n## * Let MSG_DEP_SET be the set of packages whose message types you use in\n##   your messages/services/actions (e.g. std_msgs, actionlib_msgs, ...).\n## * In the file package.xml:\n##   * add a build_depend tag for \"message_generation\"\n##   * add a build_depend and a exec_depend tag for each package in MSG_DEP_SET\n##   * If MSG_DEP_SET isn't empty the following dependency has been pulled in\n##     but can be declared for certainty nonetheless:\n##     * add a exec_depend tag for \"message_runtime\"\n## * In this file (CMakeLists.txt):\n##   * add \"message_generation\" and every package in MSG_DEP_SET to\n##     find_package(catkin REQUIRED COMPONENTS ...)\n##   * add \"message_runtime\" and every package in MSG_DEP_SET to\n##     catkin_package(CATKIN_DEPENDS ...)\n##   * uncomment the add_*_files sections below as needed\n##     and list every .msg/.srv/.action file to be processed\n##   * uncomment the generate_messages entry below\n##   * add every package in MSG_DEP_SET to generate_messages(DEPENDENCIES ...)\n\n## Generate messages in the 'msg' folder\nadd_message_files(\n   FILES\n   detection2d_with_feature.msg\n   detection2d_with_feature_array.msg\n   detection3d_with_feature.msg\n   detection3d_with_feature_array.msg\n )\n\n## Generate services in the 'srv' folder\n# add_service_files(\n#   FILES\n#   Service1.srv\n#   Service2.srv\n# )\n\n## Generate actions in the 'action' folder\n# add_action_files(\n#   FILES\n#   Action1.action\n#   Action2.action\n# )\n\n## Generate added messages and services with any dependencies listed here\n generate_messages(\n   DEPENDENCIES\n   std_msgs\n   vision_msgs\n   jpda_rospack\n  )\n\n################################################\n## Declare ROS dynamic reconfigure parameters ##\n################################################\n\n## To declare and build dynamic reconfigure parameters within this\n## package, follow these steps:\n## * In the file package.xml:\n##   * add a build_depend and a exec_depend tag for \"dynamic_reconfigure\"\n## * In this file (CMakeLists.txt):\n##   * add \"dynamic_reconfigure\" to\n##     find_package(catkin REQUIRED COMPONENTS ...)\n##   * uncomment the \"generate_dynamic_reconfigure_options\" section below\n##     and list every .cfg file to be processed\n\n## Generate dynamic reconfigure parameters in the 'cfg' folder\n# generate_dynamic_reconfigure_options(\n#   cfg/DynReconf1.cfg\n#   cfg/DynReconf2.cfg\n# )\n\n###################################\n## catkin specific configuration ##\n###################################\n## The catkin_package macro generates cmake config files for your package\n## Declare things to be passed to dependent projects\n## INCLUDE_DIRS: uncomment this if your package contains header files\n## LIBRARIES: libraries you create in this project that dependent projects also need\n## CATKIN_DEPENDS: catkin_packages dependent projects also need\n## DEPENDS: system dependencies of this project that dependent projects also need\ncatkin_package(\n#  INCLUDE_DIRS include\n#  LIBRARIES jpda_rospack\n  CATKIN_DEPENDS roscpp rospy std_msgs vision_msgs message_runtime\n#  DEPENDS system_lib\n)\n\n###########\n## Build ##\n###########\n\n## Specify additional locations of header files\n## Your package locations should be listed before other locations\ninclude_directories(\n# include\n  ${catkin_INCLUDE_DIRS}\n)\n\n## Declare a C++ library\n# add_library(${PROJECT_NAME}\n#   src/${PROJECT_NAME}/jpda_rospack.cpp\n# )\n\n## Add cmake target dependencies of the library\n## as an example, code may need to be generated before libraries\n## either from message generation or dynamic reconfigure\n# add_dependencies(${PROJECT_NAME} ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})\n\n## Declare a C++ executable\n## With catkin_make all packages are built within a single CMake context\n## The recommended prefix ensures that target names across packages don't collide\n# add_executable(${PROJECT_NAME}_node src/jpda_rospack_node.cpp)\n\n## Rename C++ executable without prefix\n## The above recommended prefix causes long target names, the following renames the\n## target back to the shorter version for ease of user use\n## e.g. \"rosrun someones_pkg node\" instead of \"rosrun someones_pkg someones_pkg_node\"\n# set_target_properties(${PROJECT_NAME}_node PROPERTIES OUTPUT_NAME node PREFIX \"\")\n\n## Add cmake target dependencies of the executable\n## same as for the library above\n# add_dependencies(${PROJECT_NAME}_node ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})\n\n## Specify libraries to link a library or executable target against\n# target_link_libraries(${PROJECT_NAME}_node\n#   ${catkin_LIBRARIES}\n# )\n\n#############\n## Install ##\n#############\n\n# all install targets should use catkin DESTINATION variables\n# See http://ros.org/doc/api/catkin/html/adv_user_guide/variables.html\n\n## Mark executable scripts (Python etc.) for installation\n## in contrast to setup.py, you can choose the destination\n# install(PROGRAMS\n#   scripts/my_python_script\n#   DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}\n# )\n\n## Mark executables and/or libraries for installation\n# install(TARGETS ${PROJECT_NAME} ${PROJECT_NAME}_node\n#   ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}\n#   LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}\n#   RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}\n# )\n\n## Mark cpp header files for installation\n# install(DIRECTORY include/${PROJECT_NAME}/\n#   DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION}\n#   FILES_MATCHING PATTERN \"*.h\"\n#   PATTERN \".svn\" EXCLUDE\n# )\n\n## Mark other files for installation (e.g. launch and bag files, etc.)\n# install(FILES\n#   # myfile1\n#   # myfile2\n#   DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}\n# )\n\n#############\n## Testing ##\n#############\n\n## Add gtest based cpp test target and link libraries\n# catkin_add_gtest(${PROJECT_NAME}-test test/test_jpda_rospack.cpp)\n# if(TARGET ${PROJECT_NAME}-test)\n#   target_link_libraries(${PROJECT_NAME}-test ${PROJECT_NAME})\n# endif()\n\n## Add folders to be run by python nosetests\n# catkin_add_nosetests(test)\n"
  },
  {
    "path": "LICENSE",
    "content": "MIT License\n\nCopyright (c) 2020 Stanford Vision and Learning Group\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n"
  },
  {
    "path": "README.md",
    "content": "# JRMOT ROS package\n\nThe repository contains the code for the work \"JRMOT: A Real-Time 3D Multi-Object Tracker and a New Large-Scale Dataset\".\n\nNote that due to the global pandemic, this repository is still a work in progress. Updates will be made as soon as possible.\n\n## Introduction\n\nJRMOT is a 3D multi object tracking system that:\n- Is real-time\n- Is online\n- Fuses 2D and 3D information\n- Achieves State of the Art performance on KITTI\n\nWe also release JRDB:\n- A dataset with over 2 million annotated boxes and 3500 time consistent trajectories in 2D and 3D\n- Captured in social, human-centric settings\n- Captured by our social mobile-manipulator JackRabbot\n- Contains 360 degree cylindrical images, stereo camera images, 3D pointclouds and more sensing modalties\n\nAll information, including download links for JRDB can be found [here](https://jrdb.stanford.edu).\n\n## JRMOT\n![system overview](https://github.com/StanfordVL/JRMOT_ROS/blob/master/assets/framework.png)\n\n- Our system is built on top of state of the art 2D and 3D detectors (mask-RCNN and F-PointNet respectively). These detections are associated with predicted track locations at every time step. \n- Association is done via a novel feature fusion, as well as a cost selection procedure, followed by Kalman state gating and JPDA. \n- Given the JPDA output, we use both 2D and 3D detections in a novel multi-modal Kalman filter to update the track locations.\n\n\n## Using the code\n\nThere are 3 nodes forming parts of the ROS package:\n+ 3d_detector.py: Runs F-PointNet, which performs 3D detection and 3D feature extraction\n+ template.py: Runs Aligned-Re-ID, which performs 2D feature extraction\n+ tracker_3d_node.py: Performs tracking while taking both 2D detections + features and 3D detections + features as input\n\nThe launch file in the folder \"launch\" launches all 3 nodes.\n\n## Dependencies\n\nThe following are dependencies of the code:\n\n+ 2D detector: The 2D detector is not included in this package. To interface with your own 2D detector, please modify the file template.py to subscribe to the correct topic, and also to handle the conversion from ROS message to numpy array.\n+ Spencer People Tracking messages: The final tracker output is in a Spencer People Tracking message. Please install this package and include these message types.\n+ Various python packages: These can be found in [requirements.txt](./requirements.txt).. Please install all dependencies prior to running the code (including CUDA and cuDNN. Additionally, this code requires a solver called Gurobi. Instructions to install gurobipy can be found [here](https://www.gurobi.com/documentation/9.0/quickstart_mac/the_grb_python_interface_f.html).\n+ Weight files: The trained weights, (trained on JRDB) for FPointNet and Aligne-ReID can be found [here](https://drive.google.com/open?id=1YQinMPVWEI44KezS9inXe0mvVnm4aL3s).\n\n## Citation\n\nIf you find this work useful, please cite:\n```\n@INPROCEEDINGS{shenoi2020jrmot,\n  author={A. {Shenoi} and M. {Patel} and J. {Gwak} and P. {Goebel} and A. {Sadeghian} and H. {Rezatofighi} and R. {Mart\\'in-Mart\\'in} and S. {Savarese}},\n  booktitle={2020 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)}, \n  title={JRMOT: A Real-Time 3D Multi-Object Tracker and a New Large-Scale Dataset}, \n  year={2020},\n  volume={},\n  number={},\n  pages={10335-10342},\n  doi={10.1109/IROS45743.2020.9341635}}\n```\n\nIf you utilise our dataset, please also cite:\n\n```\n@article{martin2019jrdb,\n  title={JRDB: A dataset and benchmark of egocentric visual perception for navigation in human environments},\n  author={Mart{\\'i}n-Mart{\\'i}n, Roberto and Patel, Mihir and Rezatofighi, Hamid and Shenoi, Abhijeet and Gwak, JunYoung and Frankel, Eric and Sadeghian, Amir and Savarese, Silvio},\n  journal={arXiv preprint arXiv:1910.11792},\n  year={2019}\n}\n```\n## \n"
  },
  {
    "path": "calib/cameras.yaml",
    "content": "stitching:\n  radius: 3360000\n  rotation: 0\n  scalewidth: 1831\n  crop: 1\ncameras:\n  # camera order matters!\n  sensor_0:\n    width: 752\n    height: 480\n    D: -0.336591 0.159742 0.00012697 -7.22557e-05 -0.0461953\n    # K = fx   0    cx\n    #     0    fy   cy\n    #     0    0    1\n    K: >\n      476.71 0 350.738\n      0 479.505 209.532\n      0 0 1\n    R: >\n      0.999994 0.000654539 0.00340293\n      -0.000654519 1 -6.81963e-06\n      -0.00340293 4.59231e-06 0.999994\n    T: -0.0104242 -3.70974 -56.9177\n  sensor_1:\n    width: 752\n    height: 480\n    D: -0.335073 0.151959 -0.000232061 0.00032014 -0.0396825\n    K: >\n      483.254 0 365.33\n      0 485.78 210.953\n      0 0 1\n    R: >\n      0.305706 -0.00895443 -0.952084\n      0.0110396 0.999922 -0.00585963\n      0.952062 -0.0087193 0.305781\n    T: 0.93957 -4.05131 -52.03\n  sensor_2:\n    width: 752\n    height: 480\n    D: -0.338469 0.156256 -0.000385467 0.000295485 -0.0401965\n    K: >\n      483.911 0 355.144\n      0 486.466 223.026\n      0 0 1\n    R: >\n      -0.806828 0.0136361 -0.590629\n      0.00870468 0.999899 0.011194\n      0.590723 0.00389039 -0.806865\n    T: -0.25753 -6.54978 -47.7311\n  sensor_3:\n    width: 752\n    height: 480\n    D: -0.330848 0.14747 8.59247e-05 0.000262599 -0.0385311\n    K: >\n      475.807 0 339.53\n      0 478.371 188.481\n      0 0 1\n    R: >\n      -0.811334 0.0033829 0.584574\n      0.00046071 0.999987 -0.00514746\n      -0.584583 -0.00390699 -0.811324\n    T: 2.72207 -6.82928 -45.9778\n  sensor_4:\n    width: 752\n    height: 480\n    D: -0.34064 0.168338 0.000147292 0.000229372 -0.0516133\n    K: >\n      485.046 0 368.864\n      0 488.185 208.215\n      0 0 1\n    R: >\n      0.310275 0.00160497 0.950645\n      -0.00648686 0.999979 0.000428942\n      -0.950625 -0.00629979 0.310279\n    T: -0.333857 -5.12974 -56.0573\n  sensor_5:\n    width: 752\n    height: 480\n    D: -0.338422 0.163703 -0.000376267 7.73351e-06 -0.0479871\n    K: >\n      478.406 0 353.499\n      0 481.322 190.225\n      0 0 1\n    R: >\n      0.999995 0.00282205 0.00163291\n      -0.00282345 0.999996 0.000852931\n      -0.00163049 -0.000857537 0.999998\n    T: -0.903588 -126.851 -56.6256\n  sensor_6:\n    width: 752\n    height: 480\n    D: -0.340676 0.165511 -0.00035978 0.000181532 -0.0493721\n    K: >\n      480.459 0 362.503\n      0 482.924 197.949\n      0 0 1\n    R: >\n      0.308288 -0.0110391 -0.951229\n      -0.000933102 0.999929 -0.0119067\n      0.951293 0.00455829 0.308256\n    T: 1.74525 -127.214 -51.7722\n  sensor_7:\n    width: 752\n    height: 480\n    D: -0.344379 0.170343 -0.000137847 0.000141047 -0.0510536\n    K: >\n      486.491 0 361.559\n      0 489.22 210.547\n      0 0 1\n    R: >\n      -0.808201 0.0313998 -0.588068\n      0.026057 0.999506 0.0175574\n      0.588329 -0.00113337 -0.808621\n    T: -2.56535 -129.191 -47.5803\n  sensor_8:\n    width: 752\n    height: 480\n    D: -0.331228 0.144696 0.000117553 0.000566449 -0.0343506\n    K: >\n      476.708 0 354.16\n      0 479.424 209.383\n      0 0 1\n    R: >\n      -0.807384 -0.00296577 0.590019\n      -0.0122001 0.999857 -0.0116688\n      -0.589901 -0.0166195 -0.807305\n    T: 3.39727 -129.381 -45.2409\n  sensor_9:\n    width: 752\n    height: 480\n    D: -0.345189 0.180808 0.000276465 0.000131868 -0.062103\n    K: >\n      484.219 0 345.303\n      0 487.312 192.371\n      0 0 1\n    R: >\n      0.308505 0.00370159 0.951215\n      -0.00403535 0.999988 -0.00258261\n      -0.951214 -0.00304174 0.308517\n    T: 0.354966 -128.218 -54.0617\n\n"
  },
  {
    "path": "calib/defaults.yaml",
    "content": "calibrated:\n  # the lidar_to_rgb parameters allow tweaking of the transformation between lidar and rgb frames\n  # the default transformation is taken from the TF Tree\n  # NOTE: applied to the original (sensor/velodyne) frame [x forward, y left, z up]:\n  lidar_upper_to_rgb:\n    # in meters: [x,y,z]\n    translation: [0, 0, -0.33529]\n    # in radians: [x,y,z]\n    rotation: [0, 0, 0.085]\n \n  lidar_lower_to_rgb:\n\n    translation: [0, 0, 0.13511]\n    \n    rotation: [0, 0, 0]\nimage:\n  # all in pixels\n  width: 3760\n  height: 480\n  # y-axis forward pixel offset (e.g. 3760/2 => 1880, b/c center of the cylindrical image is forward)\n  #  TODO: move into calibrated params, when auto-calibration is possible\n  stitched_image_offset: 1880\n\nframes:\n  # lookup for people transforms\n  global: base_link\n  # name of the rgb360 camera frame to which we wish to transform\n  rgb360: occam\n"
  },
  {
    "path": "config/featurepointnet.cfg",
    "content": "[general]\nnum_point = 1024\nmodel_path = /home/sibot/jr2_catkin_ws/src/jpda_rospack/src/fpointnet_jrdb/model.ckpt\n"
  },
  {
    "path": "launch/jpda_tracker.launch",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n\n<launch>\n  <!-- Console launch prefix -->\n  <arg name=\"output\"        default=\"screen\"/>\n\n  <!-- Config and weights folder. -->\n  <arg name=\"aligned_reid_model\"         default=\"$(find jpda_rospack)/src/aligned_reid_JRDB_weights.pth\"/>\n  <arg name=\"fpointnet_config\"           default=\"$(find jpda_rospack)/config/featurepointnet.cfg\"/>\n  <arg name=\"calib_3d\"                   default=\"$(find jpda_rospack)/calib\"/>\n  <arg name=\"combination_depth_weight\"   default=\"1\"/>\n  <arg name=\"combination_model_path\"     default=\"0\"/>\n\n\n  <node pkg=\"jpda_rospack\" type=\"template.py\" name=\"jpda_aligned_reid\" output=\"$(arg output)\" respawn=\"false\">\n    <param name=\"aligned_reid_model\"          value=\"$(arg aligned_reid_model)\" />\n  </node>\n\n  <node pkg=\"jpda_rospack\" type=\"3d_detector.py\" name=\"jpda_3d_detector\" output=\"$(arg output)\" respawn=\"false\">\n    <param name=\"fpointnet_config\"          value=\"$(arg fpointnet_config)\" />\n    <param name=\"calib_3d\"                  value=\"$(arg calib_3d)\" />\n  </node>\n  \n  <node pkg=\"jpda_rospack\" type=\"tracker_3d_node.py\" name=\"jpda_tracker_3d\" output=\"$(arg output)\" respawn=\"false\">\n    <param name=\"combination_depth_weight\"           value=\"$(arg combination_depth_weight)\" />\n    <param name=\"calib_3d\"                           value=\"$(arg calib_3d)\" />\n    <param name=\"combination_model_path\"             value=\"$(arg combination_model_path)\" />\n  </node>\n\n</launch>\n"
  },
  {
    "path": "msg/__init__.py",
    "content": ""
  },
  {
    "path": "msg/detection2d_with_feature.msg",
    "content": "# This message contains a 2D bounding box corresponding to the detection of a person\n# Also contains the feature of this person used for re-ID\n\nHeader header #header timestamp is time of frame acquisition\n\n\nuint64 x1 # x coordinate of the top left of the bounding box\nuint64 y1 # y coordinate of the top left of the bounding box\nuint64 x2 # x coordinate of the bottom right of the bounding box\nuint64 y2 # y coordinate of the bottom right of the bounding box\n\nfloat64[] feature # re-ID feature\n\nuint8 frame_det_id #unique id of this detection within this frame (used for associating 2D and 3D detections)\nbool valid # whether detection is valid (within the boundaries of the image and has minimum required size)"
  },
  {
    "path": "msg/detection2d_with_feature_array.msg",
    "content": "Header header\ndetection2d_with_feature[] detection2d_with_features"
  },
  {
    "path": "msg/detection3d_with_feature.msg",
    "content": "# This message contains a 3D bounding box corresponding to the detection of a person\n# Also contains the feature of this person used for re-ID\n\nHeader header #header timestamp is time of frame acquisition\n\n\nfloat32 x # x coordinate of the center of the bottom face of the bounding box\nfloat32 y # y coordinate of the center of the bottom face of the bounding box\nfloat32 z # x coordinate of the center of the bottom face of the bounding box\nfloat32 l # size of bounding box along x dimension\nfloat32 h # size of bounding box along y dimension\nfloat32 w # size of bounding box along z dimension\nfloat32 theta # rotation of bounding box with respect to the positive x axis\n\n\nfloat64[] feature # re-ID feature\n\nuint8 frame_det_id #unique id of this detection within this frame (used for associating 2D and 3D detections)\nbool valid # whether detection is valid (enough lidar points)"
  },
  {
    "path": "msg/detection3d_with_feature_array.msg",
    "content": "Header header\ndetection3d_with_feature[] detection3d_with_features"
  },
  {
    "path": "package.xml",
    "content": "<?xml version=\"1.0\"?>\n<package format=\"2\">\n  <name>jpda_rospack</name>\n  <version>0.0.1</version>\n  <description>The jpda_rospack package</description>\n\n  <!-- One maintainer tag required, multiple allowed, one person per tag -->\n  <!-- Example:  -->\n  <!-- <maintainer email=\"jane.doe@example.com\">Jane Doe</maintainer> -->\n  <maintainer email=\"ashenoi@cs.stanford.edu\">ashenoi</maintainer>\n\n\n  <!-- One license tag required, multiple allowed, one license per tag -->\n  <!-- Commonly used license strings: -->\n  <!--   BSD, MIT, Boost Software License, GPLv2, GPLv3, LGPLv2.1, LGPLv3 -->\n  <license>TODO</license>\n\n\n  <!-- Url tags are optional, but multiple are allowed, one per tag -->\n  <!-- Optional attribute type can be: website, bugtracker, or repository -->\n  <!-- Example: -->\n  <!-- <url type=\"website\">http://wiki.ros.org/jpda_rospack</url> -->\n\n\n  <!-- Author tags are optional, multiple are allowed, one per tag -->\n  <!-- Authors do not have to be maintainers, but could be -->\n  <!-- Example: -->\n  <!-- <author email=\"jane.doe@example.com\">Jane Doe</author> -->\n\n\n  <!-- The *depend tags are used to specify dependencies -->\n  <!-- Dependencies can be catkin packages or system dependencies -->\n  <!-- Examples: -->\n  <!-- Use depend as a shortcut for packages that are both build and exec dependencies -->\n  <!--   <depend>roscpp</depend> -->\n  <!--   Note that this is equivalent to the following: -->\n  <!--   <build_depend>roscpp</build_depend> -->\n  <!--   <exec_depend>roscpp</exec_depend> -->\n  <!-- Use build_depend for packages you need at compile time: -->\n    <build_depend>message_generation</build_depend>\n  <!-- Use build_export_depend for packages you need in order to build against this package: -->\n  <!--   <build_export_depend>message_generation</build_export_depend> -->\n  <!-- Use buildtool_depend for build tool packages: -->\n  <!--   <buildtool_depend>catkin</buildtool_depend> -->\n  <!-- Use exec_depend for packages you need at runtime: -->\n    <exec_depend>message_runtime</exec_depend>\n  <!-- Use test_depend for packages you need only for testing: -->\n  <!--   <test_depend>gtest</test_depend> -->\n  <!-- Use doc_depend for packages you need only for building documentation: -->\n  <!--   <doc_depend>doxygen</doc_depend> -->\n  <buildtool_depend>catkin</buildtool_depend>\n  <build_depend>roscpp</build_depend>\n  <build_depend>rospy</build_depend>\n  <build_depend>std_msgs</build_depend>\n  <build_depend>vision_msgs</build_depend>\n  <build_export_depend>roscpp</build_export_depend>\n  <build_export_depend>rospy</build_export_depend>\n  <build_export_depend>std_msgs</build_export_depend>\n  <build_export_depend>vision_msgs</build_export_depend>\n  <exec_depend>roscpp</exec_depend>\n  <exec_depend>rospy</exec_depend>\n  <exec_depend>std_msgs</exec_depend>\n  <exec_depend>vision_msgs</exec_depend>\n\n\n  <!-- The export tag contains other, unspecified, tags -->\n  <export>\n    <!-- Other tools can request additional information be placed here -->\n\n  </export>\n</package>\n"
  },
  {
    "path": "paper_experiments/models/__init__.py",
    "content": ""
  },
  {
    "path": "paper_experiments/models/aligned_reid_model.py",
    "content": "import torch\nimport torch.nn as nn\nimport torch.nn.init as init\nimport torch.nn.functional as F\nimport torch.utils.model_zoo as model_zoo\nimport os\nimport math\n\n\nclass Model(nn.Module):\n  def __init__(self, local_conv_out_channels=128, num_classes=None):\n    super(Model, self).__init__()\n    self.base = resnet50(pretrained=True)\n    planes = 2048\n    self.local_conv = nn.Conv2d(planes, local_conv_out_channels, 1)\n    self.local_bn = nn.BatchNorm2d(local_conv_out_channels)\n    self.local_relu = nn.ReLU(inplace=True)\n\n    if num_classes is not None:\n      self.fc = nn.Linear(planes, num_classes)\n      init.normal(self.fc.weight, std=0.001)\n      init.constant(self.fc.bias, 0)\n\n  def forward(self, x):\n    \"\"\"\n    Returns:\n      global_feat: shape [N, C]\n      local_feat: shape [N, H, c]\n    \"\"\"\n    # shape [N, C, H, W]\n    feat = self.base(x)\n    global_feat = F.avg_pool2d(feat, feat.size()[2:])\n    # shape [N, C]\n    global_feat = global_feat.view(global_feat.size(0), -1)\n    # shape [N, C, H, 1]\n    local_feat = torch.mean(feat, -1, keepdim=True)\n    local_feat = self.local_relu(self.local_bn(self.local_conv(local_feat)))\n    # shape [N, H, c]\n    local_feat = local_feat.squeeze(-1).permute(0, 2, 1)\n\n    if hasattr(self, 'fc'):\n      logits = self.fc(global_feat)\n      return global_feat, local_feat, logits\n\n    return global_feat, local_feat\n\n\n__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',\n           'resnet152']\n\nmodel_urls = {\n  'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',\n  'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',\n  'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',\n  'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',\n  'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',\n}\nos.environ[\"TORCH_HOME\"] = \"./ResNet_Model\"\n\ndef conv3x3(in_planes, out_planes, stride=1):\n  \"\"\"3x3 convolution with padding\"\"\"\n  return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,\n                   padding=1, bias=False)\n\n\nclass BasicBlock(nn.Module):\n  expansion = 1\n\n  def __init__(self, inplanes, planes, stride=1, downsample=None):\n    super(BasicBlock, self).__init__()\n    self.conv1 = conv3x3(inplanes, planes, stride)\n    self.bn1 = nn.BatchNorm2d(planes)\n    self.relu = nn.ReLU(inplace=True)\n    self.conv2 = conv3x3(planes, planes)\n    self.bn2 = nn.BatchNorm2d(planes)\n    self.downsample = downsample\n    self.stride = stride\n\n  def forward(self, x):\n    residual = x\n\n    out = self.conv1(x)\n    out = self.bn1(out)\n    out = self.relu(out)\n\n    out = self.conv2(out)\n    out = self.bn2(out)\n\n    if self.downsample is not None:\n      residual = self.downsample(x)\n\n    out += residual\n    out = self.relu(out)\n\n    return out\n\n\nclass Bottleneck(nn.Module):\n  expansion = 4\n\n  def __init__(self, inplanes, planes, stride=1, downsample=None):\n    super(Bottleneck, self).__init__()\n    self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)\n    self.bn1 = nn.BatchNorm2d(planes)\n    self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,\n                           padding=1, bias=False)\n    self.bn2 = nn.BatchNorm2d(planes)\n    self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)\n    self.bn3 = nn.BatchNorm2d(planes * 4)\n    self.relu = nn.ReLU(inplace=True)\n    self.downsample = downsample\n    self.stride = stride\n\n  def forward(self, x):\n    residual = x\n\n    out = self.conv1(x)\n    out = self.bn1(out)\n    out = self.relu(out)\n\n    out = self.conv2(out)\n    out = self.bn2(out)\n    out = self.relu(out)\n\n    out = self.conv3(out)\n    out = self.bn3(out)\n\n    if self.downsample is not None:\n      residual = self.downsample(x)\n\n    out += residual\n    out = self.relu(out)\n\n    return out\n\n\nclass ResNet(nn.Module):\n\n  def __init__(self, block, layers):\n    self.inplanes = 64\n    super(ResNet, self).__init__()\n    self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,\n                           bias=False)\n    self.bn1 = nn.BatchNorm2d(64)\n    self.relu = nn.ReLU(inplace=True)\n    self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)\n    self.layer1 = self._make_layer(block, 64, layers[0])\n    self.layer2 = self._make_layer(block, 128, layers[1], stride=2)\n    self.layer3 = self._make_layer(block, 256, layers[2], stride=2)\n    self.layer4 = self._make_layer(block, 512, layers[3], stride=2)\n\n    for m in self.modules():\n      if isinstance(m, nn.Conv2d):\n        n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels\n        m.weight.data.normal_(0, math.sqrt(2. / n))\n      elif isinstance(m, nn.BatchNorm2d):\n        m.weight.data.fill_(1)\n        m.bias.data.zero_()\n\n  def _make_layer(self, block, planes, blocks, stride=1):\n    downsample = None\n    if stride != 1 or self.inplanes != planes * block.expansion:\n      downsample = nn.Sequential(\n        nn.Conv2d(self.inplanes, planes * block.expansion,\n                  kernel_size=1, stride=stride, bias=False),\n        nn.BatchNorm2d(planes * block.expansion),\n      )\n\n    layers = []\n    layers.append(block(self.inplanes, planes, stride, downsample))\n    self.inplanes = planes * block.expansion\n    for i in range(1, blocks):\n      layers.append(block(self.inplanes, planes))\n\n    return nn.Sequential(*layers)\n\n  def forward(self, x):\n    x = self.conv1(x)\n    x = self.bn1(x)\n    x = self.relu(x)\n    x = self.maxpool(x)\n\n    x = self.layer1(x)\n    x = self.layer2(x)\n    x = self.layer3(x)\n    x = self.layer4(x)\n\n    return x\n\n\ndef remove_fc(state_dict):\n  \"\"\"Remove the fc layer parameters from state_dict.\"\"\"\n  new_state_dict = state_dict.copy()\n  for key, value in state_dict.items():\n    if key.startswith('fc.'):\n      del new_state_dict[key]\n  return new_state_dict\n\n\ndef resnet18(pretrained=False):\n  \"\"\"Constructs a ResNet-18 model.\n\n  Args:\n      pretrained (bool): If True, returns a model pre-trained on ImageNet\n  \"\"\"\n  model = ResNet(BasicBlock, [2, 2, 2, 2])\n  if pretrained:\n    model.load_state_dict(remove_fc(model_zoo.load_url(model_urls['resnet18'])))\n  return model\n\n\ndef resnet34(pretrained=False):\n  \"\"\"Constructs a ResNet-34 model.\n\n  Args:\n      pretrained (bool): If True, returns a model pre-trained on ImageNet\n  \"\"\"\n  model = ResNet(BasicBlock, [3, 4, 6, 3])\n  if pretrained:\n    model.load_state_dict(remove_fc(model_zoo.load_url(model_urls['resnet34'])))\n  return model\n\n\ndef resnet50(pretrained=False):\n  \"\"\"Constructs a ResNet-50 model.\n\n  Args:\n      pretrained (bool): If True, returns a model pre-trained on ImageNet\n  \"\"\"\n  model = ResNet(Bottleneck, [3, 4, 6, 3])\n  if pretrained:\n    model.load_state_dict(remove_fc(model_zoo.load_url(model_urls['resnet50'], model_dir=\"./ResNet_Model\")))### ADDED MODEL_DIR\n  return model\n\n\ndef resnet101(pretrained=False):\n  \"\"\"Constructs a ResNet-101 model.\n\n  Args:\n      pretrained (bool): If True, returns a model pre-trained on ImageNet\n  \"\"\"\n  model = ResNet(Bottleneck, [3, 4, 23, 3])\n  if pretrained:\n    model.load_state_dict(\n      remove_fc(model_zoo.load_url(model_urls['resnet101'])))\n  return model\n\n\ndef resnet152(pretrained=False):\n  \"\"\"Constructs a ResNet-152 model.\n\n  Args:\n      pretrained (bool): If True, returns a model pre-trained on ImageNet\n  \"\"\"\n  model = ResNet(Bottleneck, [3, 8, 36, 3])\n  if pretrained:\n    model.load_state_dict(\n      remove_fc(model_zoo.load_url(model_urls['resnet152'])))\n  return model\n"
  },
  {
    "path": "paper_experiments/models/combination_model.py",
    "content": "import pdb\n\nimport numpy as np\nimport torch.nn as nn\n\nclass CombiNet(nn.Module):\n\tdef __init__(self, in_dim = 2560, hidden_units = 512, out_dim = 2560):\n\t\tsuper().__init__()\n\t\tself.fc1 = nn.Linear(in_dim, 2*hidden_units)\n\t\t# self.bn1 = nn.BatchNorm1d(hidden_units)\n\t\tself.fc2 = nn.Linear(2*hidden_units, 2*hidden_units)\n\t\t# self.bn2 = nn.BatchNorm1d(2*hidden_units)\n\t\tself.fc3 = nn.Linear(2*hidden_units, out_dim)\n\t\tself.relu = nn.ReLU()\n\t\tself.apply(weight_init)\n\tdef forward(self, x):\n\t\t# out = nn.functional.normalize(x)\n\t\tskip = x\n\t\tout = self.fc1(x)\n\t\t# out = self.bn1(out)\n\t\tout = self.relu(out)\n\t\tout = self.fc2(out)\n\t\t# out = self.bn2(out)\n\t\tout = self.relu(out)\n\t\tout = self.fc3(out)\n\t\t# out = nn.functional.normalize(out)\n\t\tout += skip\n\t\treturn out\n\nclass CombiLSTM(nn.Module):\n\tdef __init__(self, in_dim = 2560, hidden_units = 512, out_dim = 2560):\n\t\tsuper().__init__()\n\t\tself.in_linear1 = nn.Linear(in_dim, hidden_units)\n\t\t# self.bn1 = nn.BatchNorm1d(hidden_units)\n\t\tself.in_linear2 = nn.Linear(hidden_units, hidden_units)\n\t\tself.rnn = nn.LSTM(input_size = hidden_units, hidden_size = hidden_units, dropout = 0)\n\t\tself.out_linear1 = nn.Linear(hidden_units, hidden_units)\n\t\t# self.bn2 = nn.BatchNorm1d(hidden_units)\n\t\tself.out_linear2 = nn.Linear(hidden_units, out_dim)\n\t\tself.relu = nn.ReLU()\n\t\tself.apply(weight_init)\n\n\tdef forward(self, x, hidden = None):\n\t\tout = nn.functional.normalize(x)\n\t\tskip = out\n\t\tout = self.in_linear1(out)\n\t\t# out = self.bn1(out)\n\t\tout = self.relu(out)\n\t\tout = self.in_linear2(out)\n\t\tout = out.unsqueeze(1) #Adding batch dimension\n\t\tif hidden is None:\n\t\t\tout, hidden = self.rnn(out)\n\t\telse:\n\t\t\tout, hidden = self.rnn(out, hidden)\n\n\t\tout = out.squeeze(1) #removing batch dimension\n\t\tout = self.out_linear1(out)\n\t\t# out = self.bn2(out)\n\t\tout = self.relu(out)\n\t\tout = self.out_linear2(out)\n\t\tout = nn.functional.normalize(out)\n\t\tout += skip\n\t\treturn out, hidden\n\ndef weight_init(m):\n\tif type(m)==nn.Linear:\n\t\tnn.init.xavier_normal_(m.weight, gain=np.sqrt(2))\n\telif type(m)==nn.LSTM:\n\t\tnn.init.xavier_normal_(m.weight_ih_l0)\n\t\tnn.init.xavier_normal_(m.weight_hh_l0)\n"
  },
  {
    "path": "paper_experiments/models/deep_sort_model.py",
    "content": "import tensorflow as tf\nfrom skimage.transform import resize\nimport numpy as np\n\nclass ImageEncoder(object):\n\n    def __init__(self, checkpoint_filename=\"weights/deep_sort_weights.pb\", input_name=\"images\",\n                 output_name=\"features\"):\n        config = tf.ConfigProto()\n        config.gpu_options.allow_growth = True\n        self.session = tf.Session(config=config)\n        with tf.gfile.GFile(checkpoint_filename, \"rb\") as file_handle:\n            graph_def = tf.GraphDef()\n            graph_def.ParseFromString(file_handle.read())\n        tf.import_graph_def(graph_def, name=\"net\")\n        self.input_var = tf.get_default_graph().get_tensor_by_name(\n            \"net/%s:0\" % input_name)\n        self.output_var = tf.get_default_graph().get_tensor_by_name(\n            \"net/%s:0\" % output_name)\n\n        assert len(self.output_var.get_shape()) == 2\n        assert len(self.input_var.get_shape()) == 4\n        self.feature_dim = self.output_var.get_shape().as_list()[-1]\n        self.image_shape = self.input_var.get_shape().as_list()[1:]\n\n    def __call__(self, data_x):\n        #Resize input to expected size for model\n        data_x = resize(data_x[0], self.image_shape, anti_aliasing=True, mode='reflect')\n        data_x = np.expand_dims(data_x, 0)\n        out = self.session.run(self.output_var, feed_dict={self.input_var: data_x})\n        return out\n\nif __name__ == '__main__':\n    encoder = ImageEncoder()"
  },
  {
    "path": "paper_experiments/models/featurepointnet_model.py",
    "content": "import os, pdb\nimport numpy as np\nimport tensorflow as tf\ntf.logging.set_verbosity(tf.logging.ERROR)\nimport configparser\n\nimport utils.featurepointnet_tf_util as tf_util\nimport utils.featurepointnet_model_util as model_util\nfrom utils.calibration import Calibration, OmniCalibration\n\nbatch_size = 45 #TODO: Update if needed?\n\nclass FPointNet():\n    def __init__(self, config_path):\n        parser = configparser.SafeConfigParser()\n        parser.read(config_path)\n        self.num_point = parser.getint('general', 'num_point')\n        self.model_path = parser.get('general', 'model_path')\n\n        with tf.device('/gpu:'+str('0')):\n            pointclouds_pl, one_hot_vec_pl, labels_pl, centers_pl, \\\n            heading_class_label_pl, heading_residual_label_pl, \\\n            size_class_label_pl, size_residual_label_pl = model_util.placeholder_inputs(batch_size, self.num_point)\n            is_training_pl = tf.placeholder(tf.bool, shape=())\n            end_points, depth_feature = self.get_model(pointclouds_pl, one_hot_vec_pl, is_training_pl)\n            self.object_pointcloud = tf.placeholder(tf.float32, shape=(None, None, 3))\n            #depth_feature = self.get_depth_feature_op(is_training_pl)\n            loss = model_util.get_loss(labels_pl, centers_pl, heading_class_label_pl, heading_residual_label_pl, size_class_label_pl, size_residual_label_pl, end_points)\n            self.saver = tf.train.Saver()\n\n        # Create a session\n        config = tf.ConfigProto()\n        config.gpu_options.allow_growth = True\n        config.allow_soft_placement = True\n        self.sess = tf.Session(config=config)\n\n        #Initialize variables\n        self.sess.run(tf.global_variables_initializer())\n        # Restore variables from disk.\n        self.saver.restore(self.sess, self.model_path)\n        self.ops = {'pointclouds_pl': pointclouds_pl,\n               'one_hot_vec_pl': one_hot_vec_pl,\n               'labels_pl': labels_pl,\n               'centers_pl': centers_pl,\n               'heading_class_label_pl': heading_class_label_pl,\n               'heading_residual_label_pl': heading_residual_label_pl,\n               'size_class_label_pl': size_class_label_pl,\n               'size_residual_label_pl': size_residual_label_pl,\n               'is_training_pl': is_training_pl,\n               'logits': end_points['mask_logits'],\n               'center': end_points['center'],\n               'end_points': end_points,\n               'depth_feature':depth_feature,\n               'loss': loss}\n\n    # @profile\n    def __call__(self, input_point_cloud, rot_angle, peds=False):\n        '''\n        one_hot_vec = np.zeros((batch_size, 3))\n        feed_dict = {self.pointclouds_pl: input_point_cloud,\n                     self.one_hot_vec_pl: one_hot_vec,\n                     self.is_training_pl: False}\n        features = self.sess.run(self.feature,feed_dict=feed_dict)\n        return features '''\n\n        ''' Run inference for frustum pointnets in batch mode '''\n        \n        one_hot_vec = np.zeros((batch_size,3))\n        if peds:\n            one_hot_vec[:, 1] = 1\n        num_batches = input_point_cloud.shape[0]//batch_size + 1\n        num_inputs = input_point_cloud.shape[0]\n        if input_point_cloud.shape[0]%batch_size !=0:\n            input_point_cloud = np.vstack([input_point_cloud, np.zeros((batch_size - input_point_cloud.shape[0]%batch_size, self.num_point, 4))])\n        else:\n            num_batches -= 1\n        logits = np.zeros((input_point_cloud.shape[0], input_point_cloud.shape[1], 2))\n        centers = np.zeros((input_point_cloud.shape[0], 3))\n        heading_logits = np.zeros((input_point_cloud.shape[0], model_util.NUM_HEADING_BIN))\n        heading_residuals = np.zeros((input_point_cloud.shape[0], model_util.NUM_HEADING_BIN))\n        size_logits = np.zeros((input_point_cloud.shape[0], model_util.NUM_SIZE_CLUSTER))\n        size_residuals = np.zeros((input_point_cloud.shape[0], model_util.NUM_SIZE_CLUSTER, 3))\n        mask_mean_prob = np.zeros((input_point_cloud.shape[0],)) # Step scores\n        heading_prob = np.zeros((input_point_cloud.shape[0],))\n        size_prob = np.zeros((input_point_cloud.shape[0],))\n        scores = np.zeros((input_point_cloud.shape[0],)) # 3D box score \n        features = np.zeros((input_point_cloud.shape[0], 512))\n        \n        for i in range(num_batches):    \n            ep = self.ops['end_points'] \n            feed_dict = {\\\n                self.ops['pointclouds_pl']: input_point_cloud[i*batch_size: (i+1)*batch_size],\n                self.ops['one_hot_vec_pl']: one_hot_vec,\n                self.ops['is_training_pl']: False}\n\n            batch_logits, batch_centers, \\\n            batch_heading_scores, batch_heading_residuals, \\\n            batch_size_scores, batch_size_residuals, batch_features = \\\n                self.sess.run([self.ops['logits'], self.ops['center'],\n                    ep['heading_scores'], ep['heading_residuals'],\n                    ep['size_scores'], ep['size_residuals'], self.ops['depth_feature']],\n                    feed_dict=feed_dict)\n\n            logits[i*batch_size: (i+1)*batch_size] = batch_logits\n            centers[i*batch_size: (i+1)*batch_size] = batch_centers\n            heading_logits[i*batch_size: (i+1)*batch_size] = batch_heading_scores\n            heading_residuals[i*batch_size: (i+1)*batch_size] = batch_heading_residuals\n            size_logits[i*batch_size: (i+1)*batch_size] = batch_size_scores\n            size_residuals[i*batch_size: (i+1)*batch_size] = batch_size_residuals\n            features[i*batch_size: (i+1)*batch_size] = batch_features[:,0,:]\n\n        heading_cls = np.argmax(heading_logits, 1) # B\n        size_cls = np.argmax(size_logits, 1) # B\n        heading_res = np.vstack([heading_residuals[i, heading_cls[i]] for i in range(heading_cls.shape[0])])\n        size_res = np.vstack([size_residuals[i, size_cls[i], :] for i in range(size_cls.shape[0])])\n\n        #TODO: Make this accept batches if wanted\n        boxes = []\n        for i in range(num_inputs):\n            box = np.array(model_util.from_prediction_to_label_format(centers[i], heading_cls[i], heading_res[i], size_cls[i], size_res[i], rot_angle[i]))\n            box[6] = np.squeeze(box[6])\n            swp = box[5]\n            box[5] = box[4]\n            box[4] = swp\n            boxes.append(box)       \n        boxes = np.vstack(boxes)\n        return boxes, mask_mean_prob[:num_inputs], features[:num_inputs]\n\n\n    def get_instance_seg_v1_net(self, point_cloud, one_hot_vec, is_training, bn_decay, end_points):\n        ''' 3D instance segmentation PointNet v1 network.\n        Input:\n            point_cloud: TF tensor in shape (B,N,4)\n                frustum point clouds with XYZ and intensity in point channels\n                XYZs are in frustum coordinate\n            one_hot_vec: TF tensor in shape (B,3)\n                length-3 vectors indicating predicted object type\n            is_training: TF boolean scalar\n            bn_decay: TF float scalar\n            end_points: dict\n        Output:\n            logits: TF tensor in shape (B,N,2), scores for bkg/clutter and object\n            end_points: dict\n        '''\n        num_point = point_cloud.get_shape()[1].value\n\n        net = tf.expand_dims(point_cloud, 2)\n\n        net = tf_util.conv2d(net, 64, [1,1],\n                             padding='VALID', stride=[1,1],\n                             bn=True, is_training=is_training,\n                             scope='conv1', bn_decay=bn_decay)\n        net = tf_util.conv2d(net, 64, [1,1],\n                             padding='VALID', stride=[1,1],\n                             bn=True, is_training=is_training,\n                             scope='conv2', bn_decay=bn_decay)\n        point_feat = tf_util.conv2d(net, 64, [1,1],\n                             padding='VALID', stride=[1,1],\n                             bn=True, is_training=is_training,\n                             scope='conv3', bn_decay=bn_decay)\n        net = tf_util.conv2d(point_feat, 128, [1,1],\n                             padding='VALID', stride=[1,1],\n                             bn=True, is_training=is_training,\n                             scope='conv4', bn_decay=bn_decay)\n        net = tf_util.conv2d(net, 1024, [1,1],\n                             padding='VALID', stride=[1,1],\n                             bn=True, is_training=is_training,\n                             scope='conv5', bn_decay=bn_decay)\n\n        global_feat = tf_util.max_pool2d(net, [num_point,1],\n                                         padding='VALID', scope='maxpool')\n\n        global_feat = tf.concat([global_feat, tf.expand_dims(tf.expand_dims(one_hot_vec, 1), 1)], axis=3)\n        global_feat_expand = tf.tile(global_feat, [1, num_point, 1, 1])\n        concat_feat = tf.concat(axis=3, values=[point_feat, global_feat_expand])\n\n        net = tf_util.conv2d(concat_feat, 512, [1,1],\n                             padding='VALID', stride=[1,1],\n                             bn=True, is_training=is_training,\n                             scope='conv6', bn_decay=bn_decay)\n        net = tf_util.conv2d(net, 256, [1,1],\n                             padding='VALID', stride=[1,1],\n                             bn=True, is_training=is_training,\n                             scope='conv7', bn_decay=bn_decay)\n        net = tf_util.conv2d(net, 128, [1,1],\n                             padding='VALID', stride=[1,1],\n                             bn=True, is_training=is_training,\n                             scope='conv8', bn_decay=bn_decay)\n        net = tf_util.conv2d(net, 128, [1,1],\n                             padding='VALID', stride=[1,1],\n                             bn=True, is_training=is_training,\n                             scope='conv9', bn_decay=bn_decay)\n        net = tf_util.dropout(net, is_training, 'dp1', keep_prob=0.5)\n\n        logits = tf_util.conv2d(net, 2, [1,1],\n                             padding='VALID', stride=[1,1], activation_fn=None,\n                             scope='conv10')\n        logits = tf.squeeze(logits, [2]) # BxNxC\n        return logits, end_points\n     \n    def get_3d_box_estimation_v1_net(self, object_point_cloud, one_hot_vec,is_training, bn_decay, end_points):\n        ''' 3D Box Estimation PointNet v1 network.\n        Input:\n            object_point_cloud: TF tensor in shape (B,M,C)\n                point clouds in object coordinate\n            one_hot_vec: TF tensor in shape (B,3)\n                length-3 vectors indicating predicted object type\n        Output:\n            output: TF tensor in shape (B,3+NUM_HEADING_BIN*2+NUM_SIZE_CLUSTER*4)\n                including box centers, heading bin class scores and residuals,\n                and size cluster scores and residuals\n        ''' \n        num_point = object_point_cloud.get_shape()[1].value\n        net = tf.expand_dims(object_point_cloud, 2)\n        net = tf_util.conv2d(net, 128, [1,1],\n                             padding='VALID', stride=[1,1],\n                             bn=True, is_training=is_training,\n                             scope='conv-reg1', bn_decay=bn_decay)\n        net = tf_util.conv2d(net, 128, [1,1],\n                             padding='VALID', stride=[1,1],\n                             bn=True, is_training=is_training,\n                             scope='conv-reg2', bn_decay=bn_decay)\n        net = tf_util.conv2d(net, 256, [1,1],\n                             padding='VALID', stride=[1,1],\n                             bn=True, is_training=is_training,\n                             scope='conv-reg3', bn_decay=bn_decay)\n        net = tf_util.conv2d(net, 512, [1,1],\n                             padding='VALID', stride=[1,1],\n                             bn=True, is_training=is_training,\n                             scope='conv-reg4', bn_decay=bn_decay)\n\n        features = tf.reduce_max(net, axis = 1)\n\n        net = tf_util.max_pool2d(net, [num_point,1],\n            padding='VALID', scope='maxpool2')\n        net = tf.squeeze(net, axis=[1,2])\n        net = tf.concat([net, one_hot_vec], axis=1)\n        net = tf_util.fully_connected(net, 512, scope='fc1', bn=True,\n            is_training=is_training, bn_decay=bn_decay)\n        net = tf_util.fully_connected(net, 256, scope='fc2', bn=True,\n            is_training=is_training, bn_decay=bn_decay)\n\n        # The first 3 numbers: box center coordinates (cx,cy,cz),\n        # the next NUM_HEADING_BIN*2:  heading bin class scores and bin residuals\n        # next NUM_SIZE_CLUSTER*4: box cluster scores and residuals\n        output = tf_util.fully_connected(net,\n            3+model_util.NUM_HEADING_BIN*2+model_util.NUM_SIZE_CLUSTER*4, activation_fn=None, scope='fc3')\n        return output, end_points, features\n\n    def get_model(self, point_cloud, one_hot_vec, is_training, bn_decay=None):\n        ''' Frustum PointNets model. The model predict 3D object masks and\n        amodel bounding boxes for objects in frustum point clouds.\n        Input:\n            point_cloud: TF tensor in shape (B,N,4)\n                frustum point clouds with XYZ and intensity in point channels\n                XYZs are in frustum coordinate\n            one_hot_vec: TF tensor in shape (B,3)\n                length-3 vectors indicating predicted object type\n            is_training: TF boolean scalar\n            bn_decay: TF float scalar\n        Output:\n            end_points: dict (map from name strings to TF tensors)\n        '''\n        end_points = {}\n        \n        # 3D Instance Segmentation PointNet\n        logits, end_points = self.get_instance_seg_v1_net(\\\n            point_cloud, one_hot_vec,\n            is_training, bn_decay, end_points)\n        end_points['mask_logits'] = logits\n\n        # Masking\n        # select masked points and translate to masked points' centroid\n        object_point_cloud_xyz, mask_xyz_mean, end_points = \\\n            model_util.point_cloud_masking(point_cloud, logits, end_points)\n\n        # T-Net and coordinate translation\n        center_delta, end_points = model_util.get_center_regression_net(\\\n            object_point_cloud_xyz, one_hot_vec,\n            is_training, bn_decay, end_points)\n        stage1_center = center_delta + mask_xyz_mean # Bx3\n        end_points['stage1_center'] = stage1_center\n        # Get object point cloud in object coordinate\n        object_point_cloud_xyz_new = \\\n            object_point_cloud_xyz - tf.expand_dims(center_delta, 1)\n\n        # Amodel Box Estimation PointNet\n        output, end_points, features = self.get_3d_box_estimation_v1_net(\\\n            object_point_cloud_xyz_new, one_hot_vec,\n            is_training, bn_decay, end_points)\n\n        # Parse output to 3D box parameters\n        end_points = model_util.parse_output_to_tensors(output, end_points)\n        end_points['center'] = end_points['center_boxnet'] + stage1_center # Bx3\n\n        return end_points, features\n    \n    def get_depth_feature_op(self, is_training):\n\n        net = tf.expand_dims(self.object_pointcloud, 2)\n        net = tf_util.conv2d(net, 128, [1,1],\n                             padding='VALID', stride=[1,1],\n                             bn=True, is_training=is_training,\n                             scope='conv-reg1', bn_decay=None)\n        net = tf_util.conv2d(net, 128, [1,1],\n                             padding='VALID', stride=[1,1],\n                             bn=True, is_training=is_training,\n                             scope='conv-reg2', bn_decay=None)\n        net = tf_util.conv2d(net, 256, [1,1],\n                             padding='VALID', stride=[1,1],\n                             bn=True, is_training=is_training,\n                             scope='conv-reg3', bn_decay=None)\n        net = tf_util.conv2d(net, 512, [1,1],\n                             padding='VALID', stride=[1,1],\n                             bn=True, is_training=is_training,\n                             scope='conv-reg4', bn_decay=None)\n        net = tf.reduce_max(net, axis = 1)\n        \n        return net\n\n    def get_depth_feature(self, object_pointcloud):\n        \n        feed_dict = {self.object_pointcloud:object_pointcloud, self.ops['is_training_pl']:False}\n        depth_feature = self.sess.run([self.ops['depth_feature']], feed_dict = feed_dict)\n        return depth_feature\n\n    def softmax(self, x):\n        ''' Numpy function for softmax'''\n        shape = x.shape\n        probs = np.exp(x - np.max(x, axis=len(shape)-1, keepdims=True))\n        probs /= np.sum(probs, axis=len(shape)-1, keepdims=True)\n        return probs\n\ndef create_depth_model(model, config_path):\n    #Note that folder path must be the folder containing the config.yaml file if omni_camera is True\n    if model == 'FPointNet':\n        return FPointNet(config_path)\n    elif model == 'PointNet':\n        return PointNet(config_path)"
  },
  {
    "path": "paper_experiments/models/pointnet_model.py",
    "content": "import os, pdb\nimport tensorflow as tf\ntf.logging.set_verbosity(tf.logging.ERROR)\nimport configparser\nfrom utils.pointnet_transform_nets import input_transform_net, feature_transform_net\nimport utils.pointnet_tf_util as pointnet_tf_util\n\n\nclass PointNet():\n    def __init__(self, config_path):\n        parser = configparser.SafeConfigParser()\n        parser.read(config_path)\n        num_points = parser.getint('general', 'num_point')\n        depth_model_path = parser.get('general', 'depth_model_path')\n\n        with tf.device('/gpu:'+str(0)):\n            self.pointclouds_pl, _ = self.placeholder_inputs(1, num_points)\n            self.is_training_pl = tf.placeholder(tf.bool, shape=())\n\n            # simple model\n            feature = self.get_model(self.pointclouds_pl, self.is_training_pl)\n            self.feature = feature\n            # Add ops to save and restore all the variables.\n        \n        self.saver = tf.train.Saver()\n        #Create session\n        config = tf.ConfigProto()\n        config.gpu_options.allow_growth = True\n        config.allow_soft_placement = True\n        config.log_device_placement = False\n        self.sess = tf.Session(config=config)\n        #Initialize variables\n        self.sess.run(tf.global_variables_initializer())\n        #Restore model weights\n        self.saver.restore(self.sess, depth_model_path)\n\n    def __call__(self, input_point_cloud):\n        feed_dict = {self.pointclouds_pl: input_point_cloud,\n                     self.is_training_pl: False}\n        features = self.sess.run(self.feature,feed_dict=feed_dict)\n        return features\n\n    def placeholder_inputs(self, batch_size, num_point):\n        pointclouds_pl = tf.placeholder(tf.float32, shape=(batch_size, None, 3))\n        labels_pl = tf.placeholder(tf.int32, shape=(batch_size))\n        return pointclouds_pl, labels_pl\n\n\n    def get_model(self, point_cloud, is_training, bn_decay=None):\n        \"\"\" Classification PointNet, input is BxNx3, output Bx40 \"\"\"\n        batch_size = point_cloud.get_shape()[0].value\n        end_points = {}\n\n        with tf.variable_scope('transform_net1', reuse=tf.AUTO_REUSE) as sc:\n            transform = input_transform_net(point_cloud, is_training, bn_decay, K=3)\n        point_cloud_transformed = tf.matmul(point_cloud, transform)\n        input_image = tf.expand_dims(point_cloud_transformed, -1)\n\n        net = pointnet_tf_util.conv2d(input_image, 64, [1,3],\n                             padding='VALID', stride=[1,1],\n                             bn=True, is_training=is_training,\n                             scope='conv1', bn_decay=bn_decay)\n        net = pointnet_tf_util.conv2d(net, 64, [1,1],\n                             padding='VALID', stride=[1,1],\n                             bn=True, is_training=is_training,\n                             scope='conv2', bn_decay=bn_decay)\n\n        with tf.variable_scope('transform_net2', reuse=tf.AUTO_REUSE) as sc:\n            transform = feature_transform_net(net, is_training, bn_decay, K=64)\n        end_points['transform'] = transform\n        net_transformed = tf.matmul(tf.squeeze(net, axis=[2]), transform)\n        net_transformed = tf.expand_dims(net_transformed, [2])\n\n        net = pointnet_tf_util.conv2d(net_transformed, 64, [1,1],\n                             padding='VALID', stride=[1,1],\n                             bn=True, is_training=is_training,\n                             scope='conv3', bn_decay=bn_decay)\n        net = pointnet_tf_util.conv2d(net, 128, [1,1],\n                             padding='VALID', stride=[1,1],\n                             bn=True, is_training=is_training,\n                             scope='conv4', bn_decay=bn_decay)\n        net = pointnet_tf_util.conv2d(net, 1024, [1,1],\n                             padding='VALID', stride=[1,1],\n                             bn=True, is_training=is_training,\n                             scope='conv5', bn_decay=bn_decay)\n\n        # Symmetric function: max pooling\n        net = tf.reduce_max(net, axis = 1)\n\n        net = tf.reshape(net, [batch_size, -1])\n        feature = net\n\n        return feature\n\n\n    def get_loss(self, pred, label, end_points, reg_weight=0.001):\n        \"\"\" pred: B*NUM_CLASSES,\n            label: B, \"\"\"\n        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=pred, labels=label)\n        classify_loss = tf.reduce_mean(loss)\n        tf.summary.scalar('classify loss', classify_loss)\n\n        # Enforce the transformation as orthogonal matrix\n        transform = end_points['transform'] # BxKxK\n        K = transform.get_shape()[1].value\n        mat_diff = tf.matmul(transform, tf.transpose(transform, perm=[0,2,1]))\n        mat_diff = mat_diff - tf.constant(np.eye(K), dtype=tf.float32)\n        mat_diff_loss = tf.nn.l2_loss(mat_diff) \n        tf.summary.scalar('mat loss', mat_diff_loss)\n\n        return classify_loss + mat_diff_loss * reg_weight\n    "
  },
  {
    "path": "paper_experiments/models/resnet_reid_models.py",
    "content": "import torch\nimport torch.nn as nn\nfrom torch.autograd import Variable\nimport torchvision.models as models\nfrom torchvision import transforms\nimport torch.nn.functional as F\n\nclass FeatureResNet(nn.Module):\n    def __init__(self,n_layers=50,pretrained=True):\n        super(FeatureResNet,self).__init__()\n        if n_layers == 50:\n            old_model= models.resnet50(pretrained=pretrained)\n        elif n_layers == 34:\n            old_model= models.resnet34(pretrained=pretrained)\n        elif n_layers == 18:\n            old_model= models.resnet18(pretrained=pretrained)\n        else:\n            raise NotImplementedError('resnet%s is not found'%(n_layers))\n\n        for name,modules in old_model._modules.items():\n            if name.find('fc') == -1:\n                self.add_module(name,modules)\n        self.output_dim = old_model.fc.in_features\n        self.pretrained = pretrained\n    def forward(self,x):\n        for name,module in self._modules.items():\n            x = nn.parallel.data_parallel(module, x)\n        return x.view(x.size(0), -1)\n\nclass ResNet(nn.Module):\n    def __init__(self,n_id,n_layers=50,pretrained=True):\n        super(ResNet,self).__init__()\n        if n_layers == 50:\n            old_model= models.resnet50(pretrained=pretrained)\n        elif n_layers == 34:\n            old_model= models.resnet34(pretrained=pretrained)\n        elif n_layers == 18:\n            old_model= models.resnet18(pretrained=pretrained)\n        else:\n            raise NotImplementedError('resnet%s is not found'%(n_layers))\n\n        for name,modules in old_model._modules.items():\n            self.add_module(name,modules)\n        self.fc = nn.Linear(self.fc.in_features,n_id)\n        #########\n        self.pretrained = pretrained\n    def forward(self,x):\n        for name,module in self._modules.items():\n            if name != 'fc':\n                x = module(x)\n        out = self.fc(x.view(x.size(0),-1))\n        return out, x.view(x.size(0), -1)\n\nclass NLayersFC(nn.Module):\n    def __init__(self, in_dim, out_dim, hidden_dim=1, n_layers=0):\n        super(NLayersFC, self).__init__()\n        if n_layers == 0:\n            model = [nn.Linear(in_dim, out_dim)]\n        else:\n            model = []\n            model += [nn.Linear(in_dim, hidden_dim),\n                      nn.ReLU(True)]\n            for i in range(n_layers-1):\n                model += [nn.Linear(hidden_dim, hidden_dim),\n                          nn.ReLU(True)]\n            model += [nn.Linear(hidden_dim, out_dim)]\n        self.model = nn.Sequential(*model)\n\n    def forward(self, x):\n        return self.model(x)\n\nclass ICT_ResNet(nn.Module):\n    def __init__(self,n_id,n_color,n_type,n_layers=50,pretrained=True):\n        super(ICT_ResNet,self).__init__()\n        if n_layers == 50:\n            old_model= models.resnet50(pretrained=pretrained)\n        elif n_layers == 34:\n            old_model= models.resnet34(pretrained=pretrained)\n        elif n_layers == 18:\n            old_model= models.resnet18(pretrained=pretrained)\n        else:\n            raise NotImplementedError('resnet%s is not found'%(n_layers))\n\n        for name,modules in old_model._modules.items():\n            self.add_module(name,modules)\n        self.fc = nn.Linear(self.fc.in_features,n_id)\n        self.fc_c = nn.Linear(self.fc.in_features,n_color)\n        self.fc_t = nn.Linear(self.fc.in_features,n_type)\n        #########\n        self.pretrained = pretrained\n    def forward(self,x):\n        for name,module in self._modules.items():\n            if name.find('fc')==-1:\n                x = module(x)\n        x = x.view(x.size(0),-1)\n        x_i = self.fc(x)\n        x_c = self.fc_c(x)\n        x_t = self.fc_t(x)\n        return x_i,x_c,x_t\n\nclass TripletNet(nn.Module):\n    def __init__(self, net):\n        super(TripletNet, self).__init__()\n        self.net = net\n\n    def forward(self, x, y, z):\n        pred_x, feat_x = self.net(x)\n        pred_y, feat_y = self.net(y)\n        pred_z, feat_z = self.net(z)\n        dist_pos = F.pairwise_distance(feat_x, feat_y, 2)\n        dist_neg = F.pairwise_distance(feat_x, feat_z, 2)\n        return dist_pos, dist_neg, pred_x, pred_y, pred_z\n\nif __name__ == '__main__':\n    netM = ICT_ResNet(n_id=1000,n_color=7,n_type=7,n_layers=18,pretrained=True).cuda()\n\n    print(netM)\n    output = netM(Variable(torch.ones(1,3,224,224).cuda()/2.))\n    print(output[0].size())\n    print(output[1].size())\n    print(output[2].size())\n"
  },
  {
    "path": "paper_experiments/models/yolo_models.py",
    "content": "from __future__ import division\n\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom torch.autograd import Variable\nimport numpy as np\n\nfrom PIL import Image\n\nfrom utils.yolo_utils.parse_config import *\nfrom utils.yolo_utils.utils import build_targets\nfrom collections import defaultdict\n\nimport matplotlib.pyplot as plt\nimport matplotlib.patches as patches\n\n\ndef create_modules(module_defs):\n    \"\"\"\n    Constructs module list of layer blocks from module configuration in module_defs\n    \"\"\"\n    hyperparams = module_defs.pop(0)\n    output_filters = [int(hyperparams[\"channels\"])]\n    module_list = nn.ModuleList()\n    for i, module_def in enumerate(module_defs):\n        modules = nn.Sequential()\n\n        if module_def[\"type\"] == \"convolutional\":\n            bn = int(module_def[\"batch_normalize\"])\n            filters = int(module_def[\"filters\"])\n            kernel_size = int(module_def[\"size\"])\n            pad = (kernel_size - 1) // 2 if int(module_def[\"pad\"]) else 0\n            modules.add_module(\n                \"conv_%d\" % i,\n                nn.Conv2d(\n                    in_channels=output_filters[-1],\n                    out_channels=filters,\n                    kernel_size=kernel_size,\n                    stride=int(module_def[\"stride\"]),\n                    padding=pad,\n                    bias=not bn,\n                ),\n            )\n            if bn:\n                modules.add_module(\"batch_norm_%d\" % i, nn.BatchNorm2d(filters))\n            if module_def[\"activation\"] == \"leaky\":\n                modules.add_module(\"leaky_%d\" % i, nn.LeakyReLU(0.1))\n\n        elif module_def[\"type\"] == \"maxpool\":\n            kernel_size = int(module_def[\"size\"])\n            stride = int(module_def[\"stride\"])\n            if kernel_size == 2 and stride == 1:\n                padding = nn.ZeroPad2d((0, 1, 0, 1))\n                modules.add_module(\"_debug_padding_%d\" % i, padding)\n            maxpool = nn.MaxPool2d(\n                kernel_size=int(module_def[\"size\"]),\n                stride=int(module_def[\"stride\"]),\n                padding=int((kernel_size - 1) // 2),\n            )\n            modules.add_module(\"maxpool_%d\" % i, maxpool)\n\n        elif module_def[\"type\"] == \"upsample\":\n            upsample = Interpolate(scale_factor=int(module_def[\"stride\"]), mode=\"nearest\")\n            modules.add_module(\"upsample_%d\" % i, upsample)\n\n        elif module_def[\"type\"] == \"route\":\n            layers = [int(x) for x in module_def[\"layers\"].split(\",\")]\n            filters = sum([output_filters[layer_i] for layer_i in layers])\n            modules.add_module(\"route_%d\" % i, EmptyLayer())\n\n        elif module_def[\"type\"] == \"shortcut\":\n            filters = output_filters[int(module_def[\"from\"])]\n            modules.add_module(\"shortcut_%d\" % i, EmptyLayer())\n\n        elif module_def[\"type\"] == \"yolo\":\n            anchor_idxs = [int(x) for x in module_def[\"mask\"].split(\",\")]\n            # Extract anchors\n            anchors = [int(x) for x in module_def[\"anchors\"].split(\",\")]\n            anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]\n            anchors = [anchors[i] for i in anchor_idxs]\n            num_classes = int(module_def[\"classes\"])\n            img_height = int(hyperparams[\"height\"])\n            # Define detection layer\n            yolo_layer = YOLOLayer(anchors, num_classes, img_height)\n            modules.add_module(\"yolo_%d\" % i, yolo_layer)\n        # Register module list and number of output filters\n        module_list.append(modules)\n        output_filters.append(filters)\n\n    return hyperparams, module_list\n\n\nclass EmptyLayer(nn.Module):\n    \"\"\"Placeholder for 'route' and 'shortcut' layers\"\"\"\n\n    def __init__(self):\n        super(EmptyLayer, self).__init__()\n\nclass Interpolate(nn.Module):\n    def __init__(self, scale_factor, mode):\n        super(Interpolate, self).__init__()\n        self.interp = nn.functional.interpolate\n        self.scale_factor = scale_factor\n        self.mode = mode\n        \n    def forward(self, x):\n        x = self.interp(x, scale_factor=self.scale_factor, mode=self.mode)\n        return x\n\nclass YOLOLayer(nn.Module):\n    \"\"\"Detection layer\"\"\"\n\n    def __init__(self, anchors, num_classes, img_dim):\n        super(YOLOLayer, self).__init__()\n        self.anchors = anchors\n        self.num_anchors = len(anchors)\n        self.num_classes = num_classes\n        self.bbox_attrs = 5 + num_classes\n        self.image_dim = img_dim\n        self.ignore_thres = 0.5\n        self.lambda_coord = 1\n\n        self.mse_loss = nn.MSELoss(reduction = 'elementwise_mean')  # Coordinate loss\n        self.bce_loss = nn.BCELoss(reduction = 'elementwise_mean')  # Confidence loss\n        self.ce_loss = nn.CrossEntropyLoss()  # Class loss\n\n    def forward(self, x, targets=None):\n        nA = self.num_anchors\n        nB = x.size(0)\n        nG = x.size(2)\n        stride = self.image_dim / nG\n\n        # Tensors for cuda support\n        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor\n        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor\n        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor\n\n        prediction = x.view(nB, nA, self.bbox_attrs, nG, nG).permute(0, 1, 3, 4, 2).contiguous()\n\n        # Get outputs\n        x = torch.sigmoid(prediction[..., 0])  # Center x\n        y = torch.sigmoid(prediction[..., 1])  # Center y\n        w = prediction[..., 2]  # Width\n        h = prediction[..., 3]  # Height\n        pred_conf = torch.sigmoid(prediction[..., 4])  # Conf\n        pred_cls = torch.sigmoid(prediction[..., 5:])  # Cls pred.\n\n        # Calculate offsets for each grid\n        grid_x = torch.arange(nG).repeat(nG, 1).view([1, 1, nG, nG]).type(FloatTensor)\n        grid_y = torch.arange(nG).repeat(nG, 1).t().view([1, 1, nG, nG]).type(FloatTensor)\n        scaled_anchors = FloatTensor([(a_w / stride, a_h / stride) for a_w, a_h in self.anchors])\n        anchor_w = scaled_anchors[:, 0:1].view((1, nA, 1, 1))\n        anchor_h = scaled_anchors[:, 1:2].view((1, nA, 1, 1))\n\n        # Add offset and scale with anchors\n        pred_boxes = FloatTensor(prediction[..., :4].shape)\n        pred_boxes[..., 0] = x.data + grid_x\n        pred_boxes[..., 1] = y.data + grid_y\n        pred_boxes[..., 2] = torch.exp(w.data) * anchor_w\n        pred_boxes[..., 3] = torch.exp(h.data) * anchor_h\n\n        # Training\n        if targets is not None:\n\n            if x.is_cuda:\n                self.mse_loss = self.mse_loss.cuda()\n                self.bce_loss = self.bce_loss.cuda()\n                self.ce_loss = self.ce_loss.cuda()\n\n            nGT, nCorrect, mask, conf_mask, tx, ty, tw, th, tconf, tcls = build_targets(\n                pred_boxes=pred_boxes.cpu().data,\n                pred_conf=pred_conf.cpu().data,\n                pred_cls=pred_cls.cpu().data,\n                target=targets.cpu().data,\n                anchors=scaled_anchors.cpu().data,\n                num_anchors=nA,\n                num_classes=self.num_classes,\n                grid_size=nG,\n                ignore_thres=self.ignore_thres,\n                img_dim=self.image_dim,\n            )\n\n            nProposals = int((pred_conf > 0.5).sum().item())\n            recall = float(nCorrect / nGT) if nGT else 1\n            precision = float(nCorrect / nProposals)\n\n            # Handle masks\n            mask = Variable(mask.type(ByteTensor))\n            conf_mask = Variable(conf_mask.type(ByteTensor))\n\n            # Handle target variables\n            tx = Variable(tx.type(FloatTensor), requires_grad=False)\n            ty = Variable(ty.type(FloatTensor), requires_grad=False)\n            tw = Variable(tw.type(FloatTensor), requires_grad=False)\n            th = Variable(th.type(FloatTensor), requires_grad=False)\n            tconf = Variable(tconf.type(FloatTensor), requires_grad=False)\n            tcls = Variable(tcls.type(LongTensor), requires_grad=False)\n\n            # Get conf mask where gt and where there is no gt\n            conf_mask_true = mask\n            conf_mask_false = conf_mask - mask\n\n            # Mask outputs to ignore non-existing objects\n            loss_x = self.mse_loss(x[mask], tx[mask])\n            loss_y = self.mse_loss(y[mask], ty[mask])\n            loss_w = self.mse_loss(w[mask], tw[mask])\n            loss_h = self.mse_loss(h[mask], th[mask])\n            loss_conf = self.bce_loss(pred_conf[conf_mask_false], tconf[conf_mask_false]) + self.bce_loss(\n                pred_conf[conf_mask_true], tconf[conf_mask_true]\n            )\n            loss_cls = (1 / nB) * self.ce_loss(pred_cls[mask], torch.argmax(tcls[mask], 1))\n            loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls\n\n            return (\n                loss,\n                loss_x.item(),\n                loss_y.item(),\n                loss_w.item(),\n                loss_h.item(),\n                loss_conf.item(),\n                loss_cls.item(),\n                recall,\n                precision,\n            )\n\n        else:\n            # If not in training phase return predictions\n            output = torch.cat(\n                (\n                    pred_boxes.view(nB, -1, 4) * stride,\n                    pred_conf.view(nB, -1, 1),\n                    pred_cls.view(nB, -1, self.num_classes),\n                ),\n                -1,\n            )\n            return output\n\n\nclass Darknet(nn.Module):\n    \"\"\"YOLOv3 object detection model\"\"\"\n\n    def __init__(self, config_path):\n        super(Darknet, self).__init__()\n        self.module_defs = parse_model_config(config_path)\n        self.hyperparams, self.module_list = create_modules(self.module_defs)\n        self.seen = 0\n        self.header_info = np.array([0, 0, 0, self.seen, 0])\n        self.loss_names = [\"x\", \"y\", \"w\", \"h\", \"conf\", \"cls\", \"recall\", \"precision\"]\n        self.load_weights(self.module_defs[-1]['path'])\n\n    def forward(self, x, targets=None):\n        is_training = targets is not None\n        output = []\n        self.losses = defaultdict(float)\n        layer_outputs = []\n        for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):\n            if module_def[\"type\"] in [\"convolutional\", \"upsample\", \"maxpool\"]:\n                x = module(x)\n            elif module_def[\"type\"] == \"route\":\n                layer_i = [int(x) for x in module_def[\"layers\"].split(\",\")]\n                x = torch.cat([layer_outputs[i] for i in layer_i], 1)\n            elif module_def[\"type\"] == \"shortcut\":\n                layer_i = int(module_def[\"from\"])\n                x = layer_outputs[-1] + layer_outputs[layer_i]\n            elif module_def[\"type\"] == \"yolo\":\n                # Train phase: get loss\n                if is_training:\n                    x, *losses = module[0](x, targets)\n                    for name, loss in zip(self.loss_names, losses):\n                        self.losses[name] += loss\n                # Test phase: Get detections\n                else:\n                    x = module(x)\n                output.append(x)\n            layer_outputs.append(x)\n\n        self.losses[\"recall\"] /= 3\n        self.losses[\"precision\"] /= 3\n        return sum(output) if is_training else torch.cat(output, 1)\n\n    def load_weights(self, weights_path):\n        \"\"\"Parses and loads the weights stored in 'weights_path'\"\"\"\n\n        # Open the weights file\n        fp = open(weights_path, \"rb\")\n        header = np.fromfile(fp, dtype=np.int32, count=5)  # First five are header values\n\n        # Needed to write header when saving weights\n        self.header_info = header\n\n        self.seen = header[3]\n        weights = np.fromfile(fp, dtype=np.float32)  # The rest are weights\n        fp.close()\n\n        ptr = 0\n        for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):\n            if module_def[\"type\"] == \"convolutional\":\n                conv_layer = module[0]\n                if module_def[\"batch_normalize\"]:\n                    # Load BN bias, weights, running mean and running variance\n                    bn_layer = module[1]\n                    num_b = bn_layer.bias.numel()  # Number of biases\n                    # Bias\n                    bn_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.bias)\n                    bn_layer.bias.data.copy_(bn_b)\n                    ptr += num_b\n                    # Weight\n                    bn_w = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.weight)\n                    bn_layer.weight.data.copy_(bn_w)\n                    ptr += num_b\n                    # Running Mean\n                    bn_rm = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_mean)\n                    bn_layer.running_mean.data.copy_(bn_rm)\n                    ptr += num_b\n                    # Running Var\n                    bn_rv = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_var)\n                    bn_layer.running_var.data.copy_(bn_rv)\n                    ptr += num_b\n                else:\n                    # Load conv. bias\n                    num_b = conv_layer.bias.numel()\n                    conv_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(conv_layer.bias)\n                    conv_layer.bias.data.copy_(conv_b)\n                    ptr += num_b\n                # Load conv. weights\n                num_w = conv_layer.weight.numel()\n                conv_w = torch.from_numpy(weights[ptr : ptr + num_w]).view_as(conv_layer.weight)\n                conv_layer.weight.data.copy_(conv_w)\n                ptr += num_w\n\n    \"\"\"\n        @:param path    - path of the new weights file\n        @:param cutoff  - save layers between 0 and cutoff (cutoff = -1 -> all are saved)\n    \"\"\"\n\n    def save_weights(self, path, cutoff=-1):\n\n        fp = open(path, \"wb\")\n        self.header_info[3] = self.seen\n        self.header_info.tofile(fp)\n\n        # Iterate through layers\n        for i, (module_def, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):\n            if module_def[\"type\"] == \"convolutional\":\n                conv_layer = module[0]\n                # If batch norm, load bn first\n                if module_def[\"batch_normalize\"]:\n                    bn_layer = module[1]\n                    bn_layer.bias.data.cpu().numpy().tofile(fp)\n                    bn_layer.weight.data.cpu().numpy().tofile(fp)\n                    bn_layer.running_mean.data.cpu().numpy().tofile(fp)\n                    bn_layer.running_var.data.cpu().numpy().tofile(fp)\n                # Load conv bias\n                else:\n                    conv_layer.bias.data.cpu().numpy().tofile(fp)\n                # Load conv weights\n                conv_layer.weight.data.cpu().numpy().tofile(fp)\n\n        fp.close()\n"
  },
  {
    "path": "paper_experiments/requirements.txt",
    "content": "absl-py==0.7.0\nastor==0.7.1\nbackcall==0.1.0\nbleach==3.3.0\ncatkin-pkg==0.4.12\ncertifi==2018.11.29\nchardet==3.0.4\ncloudpickle==0.7.0\ncupy==6.0.0\ncycler==0.10.0\nCython==0.29.7\ndask==2021.10.0\ndecorator==4.3.2\ndefusedxml==0.5.0\ndocutils==0.14\nentrypoints==0.3\nfastrlock==0.4\nffmpeg==1.4\ngast==0.2.2\ngrpcio==1.18.0\ngurobipy==8.1.0\nhtml5lib==0.9999999\nidna==2.8\nimageio==2.5.0\nimageio-ffmpeg==0.3.0\nipydatawidgets==4.0.0\nipykernel==5.1.0\nipympl==0.2.1\nipython==7.16.3\nipython-genutils==0.2.0\nipyvolume==0.5.1\nipywebrtc==0.4.3\nipywidgets==7.4.2\njedi==0.13.2\nJinja2==2.11.3\njsonschema==2.6.0\njupyter-client==5.2.4\njupyter-core==4.4.0\njupyterlab==1.2.21\njupyterlab-server==0.2.0\nkiwisolver==1.0.1\nlap==0.4.0\nlapjv==1.3.1\nline-profiler==2.1.1\nllvmlite==0.28.0\nMarkdown==3.0.1\nMarkupSafe==1.1.0\nmatplotlib==3.0.2\nmistune==0.8.4\nnbconvert==5.4.1\nnbformat==4.4.0\nnetworkx==2.2\nnotebook==6.4.1\nnumba==0.43.1\nnumpy==1.21.0\nopen3d-python==0.7.0.0\nopencv-python==4.2.0.32\npandas==0.24.1\npandocfilters==1.4.2\nparso==0.3.3\npexpect==4.6.0\npickleshare==0.7.5\nPillow==9.0.0\npptk==0.1.0\nprometheus-client==0.5.0\nprompt-toolkit==2.0.8\nprotobuf==3.6.1\nptyprocess==0.6.0\npycocotools==2.0.0\nPygments==2.7.4\npyparsing==2.3.1\npypcd==0.1.1\npython-dateutil==2.8.0\npython-lzf==0.2.4\npythreejs==2.0.2\npytz==2018.9\nPyWavelets==1.0.1\nPyYAML==5.4\npyzmq==17.1.2\nrequests==2.21.0\nrospkg==1.1.9\nscikit-image==0.14.2\nscikit-learn==0.20.2\nscipy==1.2.0\nseaborn==0.9.0\nSend2Trash==1.5.0\nsix==1.12.0\nsklearn==0.0\ntensorboard==1.8.0\ntensorboardX==1.6\ntensorflow-gpu==2.5.2\ntermcolor==1.1.0\nterminado==0.8.1\ntestpath==0.4.2\ntoolz==0.9.0\ntorch==1.0.1\ntorchvision==0.2.1\ntornado==5.1.1\ntqdm==4.30.0\ntraitlets==4.3.2\ntraittypes==0.2.1\nurllib3==1.26.5\nwcwidth==0.1.7\nWerkzeug==0.15.3\nwidgetsnbextension==3.4.2\n"
  },
  {
    "path": "paper_experiments/track.py",
    "content": "import open3d as o3d\nimport torch\nimport argparse\nimport os, pdb, sys, copy, pickle\nimport time\nimport random\nimport numpy as np\nimport tensorflow as tf\nfrom torch.utils.data import DataLoader\nfrom tqdm import tqdm\n\nfrom models.aligned_reid_model import Model as aligned_reid_model\nfrom utils.yolo_utils.utils import non_max_suppression, load_classes\nfrom models.combination_model import CombiNet, CombiLSTM\nfrom utils.dataset import SequenceDataset, STIPDataset, collate_fn\nfrom models.deep_sort_model import ImageEncoder as deep_sort_model\nfrom utils.tracker import Tracker\nfrom utils.tracker_3d import Tracker_3d\nfrom utils.deep_sort_utils import non_max_suppression as deepsort_nms\nfrom utils.visualise import draw_track\nfrom utils.read_detections import read_ground_truth_2d_detections, read_ground_truth_3d_detections\nfrom utils.tracking_utils import create_detector, convert_detections, combine_features\nfrom utils.tracking_utils import non_max_suppression_3D, non_max_suppression_3D_prime\nfrom utils.aligned_reid_utils import generate_features, generate_features_batched, get_image_patches, create_appearance_model\nfrom utils.featurepointnet_model_util import generate_detections_3d, convert_depth_features\nfrom models.featurepointnet_model import create_depth_model\n\ndef parse_arguments():\n    parser = argparse.ArgumentParser()\n    parser.add_argument('--sequence_folder', type=str, default='data/KITTI/sequences/0001', help='path to image sequence')\n    parser.add_argument('--output_folder', type=str, default='results', help='output folder')\n    parser.add_argument('--aligned_reid_ckpt', type=str, default='weights/aligned_reid_market_weights.ckpt', help='path to model config file')\n    parser.add_argument('--resnet_reid_ckpt', type=str, default='weights/resnet_reid.ckpt', help='path to model config file')\n    parser.add_argument('--depth_model', type=str, default='FPointNet', help='type of depth model to use')\n    parser.add_argument('--depth_config_path', type=str, default='config/featurepointnet.cfg', help='path to model config file')\n    parser.add_argument('--appearance_model', type=str, default='resnet_reid', help='type of appearance model to use aligned_reid or deepsort or resnet_reid')\n    parser.add_argument('--conf_thres', type=float, default=0.8, help='object confidence threshold')\n    parser.add_argument('--depth_weight', type=float, default=1, help='weight of depth feature while concatenating')\n    parser.add_argument('--nms_thresh', type=float, default=0.56, help='iou thresshold for non-maximum suppression')\n    parser.add_argument('--n_cpu', type=int, default=4, help='number of cpu threads to use during batch generation')\n    parser.add_argument('--use_cuda', type=bool, default=True, help='whether to use cuda if available')\n    parser.add_argument('-p', '--point_cloud', action='store_false', help='Use to disable pointcloud')\n    parser.add_argument('-o', '--optical_flow_initiation', action='store_false', help='Use to enable optical flow based velocity initiation')\n    parser.add_argument('-q', '--perfect', action='store_true', help='whether to use perfect assignments')\n    parser.add_argument('-g', '--ground_truth', action='store_true', help='whether to use ground truth detections')\n    parser.add_argument('-r', '--reference', action='store_false', help='whether to use reference detections')\n    parser.add_argument('-t', '--track_3d', action='store_true', help='whether to do 3d tracking')\n    parser.add_argument('--ref_det', type = str, default = 'new_rrc_subcnn_car', help='lsvm, subcnn, regionlets, maskrcnn')\n    parser.add_argument(\"--nn_budget\", type=int, default=100, help=\"Maximum size of the appearance descriptors gallery. If None, no budget is enforced.\")\n    parser.add_argument(\"--dummy_node_cost_app\", type=float, default=0.99, help=\"Dummy node appearance cost for JPDA (or maximum distnce when using deepsort)\")\n    parser.add_argument(\"--dummy_node_cost_iou\", type=float, default=0.97, help=\"Dummy node iou cost for JPDA (or maximum distnce when using deepsort)\")\n    parser.add_argument(\"-c\", \"--combine_features\", action = 'store_false', help=\"Whether to use trained MLP to combine features\")\n    parser.add_argument(\"-f\", \"--fpointnet\", action = 'store_false', help=\"Whether to use F-PointNet for 3d detection\")\n    parser.add_argument(\"--combo_model\", default = 'weights/resnet_reid_fpointnet_combo_car/mlp__1570759353.0113978/best_checkpoint.tar\"', help=\"Trained MLP checkpoint to combine features\")\n    parser.add_argument(\"-j\", \"--JPDA\", action = 'store_false', help=\"Whether to use JPDA for soft assignments\")\n    parser.add_argument(\"-l\", \"--LSTM\", action = 'store_true', help=\"Whether to use LSTM for feature combination and update\")\n    parser.add_argument(\"--lstm_model\", default = 'weights/aligned_reid_fpointnet_combo/lstm/best_checkpoint.tar', help=\"Trained LSTM checkpoint to combine features\")\n    parser.add_argument(\"-m\",\"--m_best_sol\", type=int, default=10, help=\"Number of solutions for JPDA\")\n    parser.add_argument(\"--log_data\", action='store_true', help=\"Turn on full data logging\")\n    parser.add_argument(\"--max_age\", type=int, default=2, help=\"Number of misses before termination\")\n    parser.add_argument(\"--n_init\", type=int, default=2, help=\"Consecutive frames for tentative->confirmed\")\n    parser.add_argument(\"--assn_thresh\", type=float, default=0.65, help=\"min prob for match\")\n    parser.add_argument(\"--matching_strategy\", type=str, default=\"hungarian\", help=\"matching strategy for JPDA (max_and_threshold, strict_max_pair, or hungarian)\")\n    parser.add_argument(\"--kf_appearance_feature\", type=bool, default=False, help=\"Whether to use kf state for apperance features\")\n    parser.add_argument('-i', \"--use_imm\", action = 'store_true', help='Whether to use IMM')\n    parser.add_argument('-v', \"--verbose\", action = 'store_true', help='Verbose')\n    parser.add_argument('--kf_process', type=float, default=5.2, help='kf 2d process noise factor')\n    parser.add_argument('--kf_2d_meas', type=float, default=3.2, help='kf 2d measurement noise factor')\n    parser.add_argument('--kf_3d_meas', type=float, default=0.25, help='kf 3d measurement noise factor')\n    parser.add_argument('--pos_weight_3d', type=float, default=1, help='Weight on position covariance process noise in KF')\n    parser.add_argument('--pos_weight', type=float, default=0.006, help='Weight on position covariance process noise in KF')\n    parser.add_argument('--vel_weight', type=float, default=0.008, help='Weight on velocity covariance process noise in KF')\n    parser.add_argument('--theta_weight', type=float, default=0.02, help='Weight on velocity covariance process noise in KF')\n    parser.add_argument('--gate_limit', type=float, default=600, help='Maximum covariance value of the gate')\n    parser.add_argument('--initial_uncertainty', type=float, default=1, help='Uncertainty scaling for initial covariance of track')\n    parser.add_argument('--uncertainty_limit', type=float, default=1.5, help='Uncertainty limit at which to terminate tracks')\n    parser.add_argument(\"--gate_full_state\", action='store_true', help=\"Whether to gate on full kalman state, default is only position\")\n    parser.add_argument(\"--near_online\", action = 'store_true', help=\"Whether to do near online tracking\")\n    parser.add_argument(\"--omni\", action = 'store_true', help=\"Omni directional camera (JRDB)\")\n    opt = parser.parse_args()\n    opt.sequence_folder = opt.sequence_folder.rstrip(os.sep)\n    opt.using_cuda = torch.cuda.is_available() and opt.use_cuda\n    if not opt.point_cloud and opt.track_3d:\n        raise(\"Must provide point cloud if doing 3D tracking!\")\n    if opt.verbose:\n        print(opt)\n    if not os.path.exists(opt.output_folder):\n        os.makedirs(opt.output_folder)\n    return opt\n\n# @profile\ndef main(opt):\n\n    if opt.verbose:\n        print(\"------------------------\")\n        print(\"RUNNING SET UP\")\n        print(\"------------------------\")\n    tf.logging.set_verbosity(40)\n    random.seed(0)\n    Tensor = torch.cuda.FloatTensor if opt.using_cuda else torch.FloatTensor\n    os.makedirs(opt.output_folder, exist_ok=True)\n    if opt.LSTM:\n        opt.max_cosine_distance = 1\n        lstm = CombiLSTM()\n        checkpoint = torch.load(opt.lstm_model)\n        lstm.load_state_dict(checkpoint['state_dict'])\n        if opt.using_cuda:\n            lstm.cuda()\n        lstm.eval()\n    else:\n        lstm = None\n    if opt.combine_features:\n        combination_model = CombiNet()\n        checkpoint = torch.load(opt.combo_model)\n        combination_model.load_state_dict(checkpoint['state_dict'])\n        if opt.using_cuda:\n            combination_model.cuda()\n        combination_model.eval()\n    else:\n        combination_model = None\n    \n    dataset = SequenceDataset(opt.sequence_folder, point_cloud=opt.point_cloud, omni=opt.omni)\n    dataloader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=opt.n_cpu, collate_fn = collate_fn)\n    appearance_model = create_appearance_model(opt.appearance_model, opt.aligned_reid_ckpt, opt.resnet_reid_ckpt, opt.using_cuda)\n    if opt.point_cloud:\n        depth_model = create_depth_model(opt.depth_model, opt.depth_config_path)\n    if opt.track_3d:\n        tracker = Tracker_3d(appearance_model=appearance_model, cuda=opt.using_cuda, JPDA = opt.JPDA, m_best_sol=opt.m_best_sol,\n                        max_age = opt.max_age, n_init=opt.n_init, assn_thresh=opt.assn_thresh,\n                        matching_strategy=opt.matching_strategy,\n                        gate_full_state=opt.gate_full_state,\n                        kf_vel_params=(opt.pos_weight_3d, opt.pos_weight, opt.vel_weight, opt.theta_weight,\n                                       opt.kf_process, opt.kf_2d_meas, opt.kf_3d_meas, opt.initial_uncertainty),\n                        calib=dataset.calib,\n                        dummy_node_cost_iou=opt.dummy_node_cost_iou,\n                        dummy_node_cost_app=opt.dummy_node_cost_app,\n                        nn_budget=opt.nn_budget,\n                        use_imm=opt.use_imm,\n                        uncertainty_limit=opt.uncertainty_limit,\n                        gate_limit=opt.gate_limit,\n                        omni=opt.omni)\n    else:\n        tracker = Tracker(appearance_model=appearance_model, cuda=opt.using_cuda, JPDA = opt.JPDA, m_best_sol=opt.m_best_sol,\n                        max_age = opt.max_age, n_init=opt.n_init, assn_thresh=opt.assn_thresh,\n                        matching_strategy=opt.matching_strategy,\n                        kf_appearance_feature=opt.kf_appearance_feature,\n                        gate_full_state=opt.gate_full_state,\n                        kf_vel_params=(opt.pos_weight, opt.vel_weight, opt.kf_process, opt.kf_2d_meas, opt.initial_uncertainty),\n                        kf_walk_params=(opt.pos_weight, opt.vel_weight, opt.kf_process, opt.kf_2d_meas, opt.initial_uncertainty),\n                        calib=dataset.calib,\n                        dummy_node_cost_iou=opt.dummy_node_cost_iou,\n                        dummy_node_cost_app=opt.dummy_node_cost_app,\n                        nn_budget=opt.nn_budget,\n                        use_imm=opt.use_imm,\n                        uncertainty_limit=opt.uncertainty_limit,\n                        optical_flow=opt.optical_flow_initiation,\n                        gate_limit=opt.gate_limit)\n\n    results = []\n    results_3d = []\n    n_frames = len(dataloader)\n    if opt.log_data:\n        full_log = [{'tracks':[], 'detections':[], 'detections_3d':[]} for _ in range(n_frames)]\n    det_matrix = None\n    seq_name = os.path.split(opt.sequence_folder)[-1]\n\n    frame_times = []\n    if opt.verbose:\n        print(\"------------------------\")\n        print(\"BEGINNING TRACKING OF SEQUENCE %s\"%seq_name)\n        print(\"------------------------\")\n    for frame_idx, img_path, input_img, point_cloud in tqdm(dataloader, ncols = 100, disable=not opt.verbose):\n        # if frame_idx > 120:\n        #     break\n        # elif frame_idx < 98:\n        #     continue\n\n        if opt.log_data:\n            full_log[frame_idx]['img_path'] = copy.copy(img_path)\n        input_img = input_img.type(Tensor)\n        if opt.reference:\n            detections, object_ids, det_matrix = read_ground_truth_2d_detections(os.path.join(opt.sequence_folder,'det',opt.ref_det+'.txt'), frame_idx, det_matrix, threshold = 0, nms_threshold = opt.nms_thresh)\n        elif opt.ground_truth:\n            detections, object_ids, det_matrix = read_ground_truth_2d_detections(os.path.join(opt.sequence_folder,'gt','gt.txt'), frame_idx, det_matrix, nms_threshold = opt.nms_thresh)\n        else:\n            raise(\"Must specify ground truth or detections\")\n\n        # --- START OF TRACKING ---\n        # start_time = time.time()\n        if detections is None or len(detections)==0:\n            tracker.predict()\n            if opt.log_data:\n                full_log[frame_idx]['predicted_tracks'] = copy.deepcopy(tracker.tracks)\n            start_time = time.time()\n            tracker.update(input_img, [])\n        else:\n            total_dets = len(detections)\n            patches = get_image_patches(input_img, detections)\n            appearance_features = generate_features_batched(appearance_model, patches, opt, object_ids)\n            if opt.point_cloud:\n                if not opt.omni:\n                    point_cloud = point_cloud[point_cloud[:,2]>=0]\n                if opt.fpointnet:\n                    boxes_3d, valid_3d, _, scores_3d, depth_features = generate_detections_3d(depth_model, \n                                                                        detections, np.asarray(point_cloud), \n                                                                        dataset.calib, input_img.shape,\n                                                                        peds='ped' in opt.ref_det or opt.omni)\n                    depth_features = convert_depth_features(depth_features, valid_3d)\n                else:\n                    boxes_3d, valid_3d = read_ground_truth_3d_detections(os.path.join(opt.sequence_folder,'gt','3d_detections.txt'), frame_idx)        \n                features, appearance_features = combine_features(appearance_features, depth_features, valid_3d, combination_model, depth_weight = opt.depth_weight)\n                # boxes_3d = boxes_3d[valid_3d != -1] # Old and buggy way of handling missing box\n                # detections = detections[valid_3d != -1]\n                if np.any(valid_3d == -1):\n                    compare_2d = True\n                else:\n                    compare_2d = False\n                if len(boxes_3d) > 0:\n                    detections_3d = []\n                    for idx, box in enumerate(boxes_3d):\n                        if valid_3d[idx] == -1:\n                            detections_3d.append(None)\n                        else:\n                            detections_3d.append(np.array(box).astype(np.float32))\n                else:\n                    detections_3d = None\n            else:\n                appearance_features = [appearance_features[i] for i in range(total_dets)]\n                features = [None]*len(appearance_features)\n                compare_2d = True\n                detections_3d = None\n            detections = convert_detections(detections, features, appearance_features, detections_3d)\n            tracker.predict()\n            if opt.log_data:\n                full_log[frame_idx]['predicted_tracks'] = copy.deepcopy(tracker.tracks)\n            start_time = time.time()\n            tracker.update(input_img, detections, compare_2d)\n\n        # --- END OF TRACKING ---\n        end_time = time.time()\n        frame_times.append(end_time - start_time)\n\n\n        if opt.log_data:\n            full_tracks = copy.deepcopy(tracker.tracks)\n            temp_tracks = []\n            for track in full_tracks:\n                bbox = track.to_tlwh(None)\n                if not (bbox[0] < 0-10 or bbox[1] < 0-10 or bbox[0] + bbox[2] > input_img.shape[2]+10 or bbox[1] + bbox[3] > input_img.shape[1]+10):\n                    temp_tracks.append(track)\n            full_log[frame_idx]['tracks'] = temp_tracks\n            full_log[frame_idx]['detections'] = copy.deepcopy(detections)\n\n        for track in tracker.tracks:\n            if opt.track_3d:\n                bbox_3d = track.to_tlwh3d()\n            else:\n                bbox = track.to_tlwh(None)\n            if bbox[0] < 0-10 or bbox[1] < 0-10 or bbox[0] + bbox[2] > input_img.shape[2]+10 or bbox[1] + bbox[3] > input_img.shape[1]+10:\n                continue\n            bbox[0] = max(0,bbox[0]) # Frame adjustments\n            bbox[1] = max(0,bbox[1])\n            bbox[2] = min(bbox[0]+bbox[2], input_img.shape[2])-bbox[0]\n            bbox[3] = min(bbox[1]+bbox[3], input_img.shape[1])-bbox[1]\n\n            track_status = 1\n            if not track.is_confirmed(): # or track.time_since_update > 0:\n                if opt.near_online:\n                    if not track.is_confirmed():\n                         track_status = 0\n                    else:\n                         track_status = 2\n                         continue\n                else:\n                    continue\n            if opt.near_online:\n                if opt.track_3d:\n                    results_3d.append([frame_idx, track.track_id, bbox_3d[0], bbox_3d[1], bbox_3d[2], bbox_3d[3], bbox_3d[4], bbox_3d[5], bbox_3d[6], track_status])\n                else:\n                    results.append([frame_idx, track.track_id, bbox[0], bbox[1], bbox[2], bbox[3], track_status])\n\n                if track_status == 1: #updates 0s\n                    for row_i in range(len(results)):\n                        if results[row_i][1] == track.track_id:\n                            results[row_i][6] = 1\n                        if opt.point_cloud:\n                            if results_3d[row_i][1] == track.track_id:\n                                results_3d[row_i][7] = 1\n            else:\n                if opt.track_3d:\n                    results_3d.append([frame_idx, track.track_id, bbox_3d[0], bbox_3d[1], bbox_3d[2], bbox_3d[3], bbox_3d[4], bbox_3d[5], bbox_3d[6]])\n                else:\n                    results.append([frame_idx, track.track_id, bbox[0], bbox[1], bbox[2], bbox[3]])\n                # if opt.point_cloud:\n\n    frame_times = np.asarray(frame_times)\n    if opt.verbose:\n        print(\"------------------------\")\n        print(\"COMPLETED TRACKING, SAVING RESULTS\")\n        print(\"------------------------\")\n        print('\\n\\n','Total Tracking Time:',np.sum(frame_times),'Average Time Per Frame:',np.mean(frame_times))\n\n    if opt.track_3d:\n        output_file_3d = os.path.join(opt.output_folder, seq_name+\"_3d.txt\")\n        if len(results_3d) > 0:\n            with open(output_file_3d, 'w+') as f:\n                for row in results_3d:\n                    if opt.near_online and row[9] != 1:\n                        continue\n                    print('%d,%d,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%.4f,1,1,1,-1' % (\n                        row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8]), file=f)\n    else:\n        output_file = os.path.join(opt.output_folder, seq_name+\".txt\")\n        if len(results) > 0:\n            with open(output_file, 'w+') as f:\n                for row in results:\n                    if opt.near_online and row[6] != 1:\n                        continue\n                    print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,1,1,-1' % (\n                        row[0], row[1], row[2], row[3], row[4], row[5]), file=f)\n\n    if opt.log_data:\n        output_file = os.path.join(opt.output_folder, seq_name+\".p\")\n        with open(output_file, 'wb') as f:\n            pickle.dump(full_log, f)\n\nif __name__=='__main__':\n    opt = parse_arguments()\n    main(opt)\n"
  },
  {
    "path": "paper_experiments/utils/EKF.py",
    "content": "# vim: expandtab:ts=4:sw=4\nimport numpy as np\nimport scipy.linalg\nimport pdb\n\n\"\"\"\nTable for the 0.95 quantile of the chi-square distribution with N degrees of\nfreedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv\nfunction and used as Mahalanobis gating threshold.\n\"\"\"\nchi2inv95 = {\n    1: 3.8415,\n    2: 5.9915,\n    3: 7.8147,\n    4: 9.4877,\n    5: 11.070,\n    6: 12.592,\n    7: 14.067,\n    8: 15.507,\n    9: 16.919}\n\nchi2inv90 = {\n    1: 2.706,\n    2: 4.605,\n    3: 6.251,\n    4: 7.779,\n    5: 9.236,\n    6: 10.645,\n    7: 12.017,\n    8: 13.363,\n    9: 14.684}\n\nchi2inv975 = {\n    1: 5.025,\n    2: 7.378,\n    3: 9.348,\n    4: 11.143,\n    5: 12.833,\n    6: 14.449,\n    7: 16.013,\n    8: 17.535,\n    9: 19.023}\n\nchi2inv10 = {\n    1: .016,\n    2: .221,\n    3: .584,\n    4: 1.064,\n    5: 1.610,\n    6: 2.204,\n    7: 2.833,\n    8: 3.490,\n    9: 4.168}\n\n\nchi2inv995 = {\n    1: 0.0000393,\n    2: 0.0100,\n    3: .0717,\n    4: .207,\n    5: .412,\n    6: .676,\n    7: .989,\n    8: 1.344,\n    9: 1.735}\n\n\nchi2inv75 = {\n    1: 1.323,\n    2: 2.773,\n    3: 4.108,\n    4: 5.385,\n    5: 6.626,\n    6: 7.841,\n    7: 9.037,\n    8: 10.22,\n    9: 11.39}\n\ndef squared_mahalanobis_distance(mean, covariance, measurements):\n    # cholesky factorization used to solve for \n    # z = d * inv(covariance)\n    # so z is also the solution to \n    # covariance * z = d       \n    d = measurements - mean\n\n    # Note: The cholesky factorization is giving weird answers. This is marginally slower but correct\n    return np.matmul(np.matmul(d, np.linalg.inv(covariance)), d.T).diagonal()\n\n    # print(\"Measurements:\", measurements)\n    # print(\"Mean:\", mean)\n    # print(\"dshape:\", d.shape, \"d:\", d)\n    # print(\"d*inv(cov)\", np.matmul(d, np.linalg.inv(covariance)))\n\n    cholesky_factor = np.linalg.cholesky(covariance)\n    z = scipy.linalg.solve_triangular(\n        cholesky_factor, d.T, lower=True, check_finite=False,\n        overwrite_b=True)\n\n    squared_maha = np.sum(z * (measurements-mean).T, axis=0)\n    # print(\"Squared maha dist:\", squared_maha)\n    # print(\"cov:\", covariance)\n    # print(\"z\", z, '\\n')\n    return squared_maha\n\n\nclass EKF(object):\n    \"\"\"\n    Generic extended kalman filter class\n\n    \"\"\"\n\n    def __init__(self):\n        pass\n\n    def initiate(self, measurement):\n        \"\"\"Create track from unassociated measurement.\n\n        Parameters\n        ----------\n        measurement : ndarray\n\n        Returns\n        -------\n        (ndarray, ndarray)\n            Returns the mean vector and covariance matrix of the new track. \n            Unobserved velocities are initialized to 0 mean.\n\n        \"\"\"\n        pass\n\n\n    def predict_mean(self, mean):\n        # Updates predicted state from previous state (function g)\n        # Calculates motion update Jacobian (Gt)\n        # Returns (g(mean), Gt)\n        pass\n\n    def get_process_noise(self, mean, covariance):\n        # Returns Rt the motion noise covariance\n        pass\n    def predict_covariance(self, mean, covariance):\n        pass\n\n    def project_mean(self, mean):\n        # Measurement prediction from state (function h)\n        # Calculations sensor update Jacobian (Ht)\n        # Returns (h(mean), Ht)\n        pass\n    def project_cov(self, mean, covariance):\n        pass\n\n    def predict(self, mean, covariance, last_detection, next_to_last_detection):\n        \"\"\"Run Kalman filter prediction step.\n\n        Parameters\n        ----------\n        mean : ndarray\n            The mean vector of the object state at the previous\n            time step.\n        covariance : ndarray\n            The covariance matrix of the object state at the\n            previous time step.\n\n        Returns\n        -------\n        (ndarray, ndarray)\n            Returns the mean vector and covariance matrix of the predicted\n            state. Unobserved velocities are initialized to 0 mean.\n\n        \"\"\"\n        # Perform prediction\n        covariance = self.predict_covariance(mean, covariance, last_detection, next_to_last_detection)\n        mean = self.predict_mean(mean)\n\n        return mean, covariance\n    def get_innovation_cov(self, covariance):\n        pass\n\n    def project(self, mean, covariance):\n        \"\"\"Project state distribution to measurement space.\n\n        Parameters\n        ----------\n        mean : ndarray\n            The state's mean vector \n        covariance : ndarray\n            The state's covariance matrix\n\n        Returns\n        -------\n        (ndarray, ndarray)\n            Returns the projected mean and covariance matrix of the given state\n            estimate.\n\n        \"\"\"\n\n        # Measurement uncertainty scaled by estimated height\n        return self.project_mean(mean), self.project_cov(mean, covariance)\n\n    def update(self, mean, covariance, measurement_t, marginalization=None, JPDA=False):\n        \"\"\"Run Kalman filter correction step.\n\n        Parameters\n        ----------\n        mean : ndarray\n            The predicted state's mean vector (8 dimensional).\n        covariance : ndarray\n            The state's covariance matrix (8x8 dimensional).\n        measurement : ndarray\n            The 4 dimensional measurement vector (x, y, a, h), where (x, y)\n            is the center position, a the aspect ratio, and h the height of the\n            bounding box.\n\n        Returns\n        -------\n        (ndarray, ndarray)\n            Returns the measurement-corrected state distribution.\n\n        \"\"\"\n        predicted_measurement, innovation_cov  = self.project(mean, covariance)\n        # cholesky factorization used to solve for kalman gain since\n        # K = covariance * update_mat.T * inv(innovation_cov)\n        # so K is also the solution to \n        # innovation_cov * K = covariance * update_mat.T\n        try:\n            chol_factor, lower = scipy.linalg.cho_factor(\n                innovation_cov, lower=True, check_finite=False)\n            kalman_gain = scipy.linalg.cho_solve(\n                (chol_factor, lower), np.dot(covariance, self._observation_mat.T).T,\n                check_finite=False).T\n        except:\n            # in case cholesky factorization fails, revert to standard solver\n            kalman_gain = np.linalg.solve(innovation_cov, np.dot(covariance, self._observation_mat.T).T).T\n\n        if JPDA:\n            # marginalization\n            innovation = np.zeros((self.ndim)) \n            cov_soft = np.zeros((self.ndim, self.ndim))\n\n            for measurement_idx, measurement in enumerate(measurement_t):\n\n                p_ij = marginalization[measurement_idx + 1] # + 1 for dummy\n                y_ij = measurement - predicted_measurement\n                innovation += y_ij * p_ij\n                cov_soft += p_ij * np.outer(y_ij, y_ij)\n\n            cov_soft = cov_soft - np.outer(innovation, innovation)\n\n            P_star = covariance - np.linalg.multi_dot((\n                kalman_gain, innovation_cov, kalman_gain.T))\n\n            p_0 = marginalization[0]\n            P_0 = p_0 * covariance + (1 - p_0) * P_star\n\n            new_covariance = P_0 + np.linalg.multi_dot((kalman_gain, cov_soft, kalman_gain.T))\n            \n        else:\n            innovation = measurement_t - predicted_measurement\n\n            new_covariance = covariance - np.linalg.multi_dot((\n                kalman_gain, innovation_cov, kalman_gain.T))\n\n        new_mean = mean + np.dot(innovation, kalman_gain.T)\n        return new_mean, new_covariance\n"
  },
  {
    "path": "paper_experiments/utils/JPDA_matching.py",
    "content": "# vim: expandtab:ts=4:sw=4\nfrom __future__ import absolute_import\nimport numpy as np\nfrom linear_assignment import min_marg_matching\nimport pdb\n\n\ndef get_unmatched(all_idx, matches, i, marginalization=None):\n    assigned = [match[i] for match in matches]\n    unmatched = set(all_idx) - set(assigned)\n    if marginalization is not None:\n        # from 1 for dummy node\n        in_gate_dets = np.nonzero(np.sum(\n            marginalization[:, 1:], axis=0))[0].tolist()\n        # unmatched = [d for d in unmatched if d not in in_gate_dets] # TODO: Filter by gate?\n    return list(unmatched)\n\n\nclass Matcher:\n\n    def __init__(self, detections, marginalizations, confirmed_tracks,\n                 matching_strategy,\n                 assignment_threshold=None):\n        self.detections = detections\n        self.marginalizations = marginalizations\n        self.confirmed_tracks = confirmed_tracks\n        self.assignment_threshold = assignment_threshold\n        self.detection_indices = np.arange(len(detections))\n        self.matching_strategy = matching_strategy\n\n    def match(self):\n        self.get_matches()\n        self.get_unmatched_tracks()\n        self.get_unmatched_detections()\n        return self.matches, self.unmatched_tracks, self.unmatched_detections\n\n    def get_matches(self):\n\n        if self.matching_strategy == \"max_and_threshold\":\n            self.max_and_threshold_matching()\n        elif self.matching_strategy == \"hungarian\":\n            self.hungarian()\n        elif self.matching_strategy == \"max_match\":\n            self.max_match()\n        elif self.matching_strategy == \"none\":\n            self.matches = []\n        else: \n            raise Exception('Unrecognized matching strategy: {}'.\n                            format(self.matching_strategy))\n\n    def get_unmatched_tracks(self):\n        self.unmatched_tracks = get_unmatched(self.confirmed_tracks,\n                                              self.matches, 0)\n\n    def get_unmatched_detections(self):\n        self.unmatched_detections = get_unmatched(self.detection_indices, self.matches, 1, self.marginalizations)\n\n    def max_match(self):\n        self.matches = []\n        if self.marginalizations.shape[0] == 0:\n            return\n\n        detection_map = {}\n        for i, track_idx in enumerate(self.confirmed_tracks):\n            marginalization = self.marginalizations[i,:]\n            detection_id = np.argmax(marginalization) - 1  # subtract one for dummy\n\n            if detection_id < 0:\n                continue\n\n            if detection_id not in detection_map.keys():\n                detection_map[detection_id] = track_idx\n            else:\n                cur_track = detection_map[detection_id]\n                track_update = track_idx if self.marginalizations[track_idx, detection_id] > self.marginalizations[cur_track, detection_id] else cur_track\n                detection_map[detection_id] = track_update\n            threshold_p = marginalization[detection_id + 1]\n            if threshold_p < self.assignment_threshold:\n                continue\n\n        for detection in detection_map.keys():\n            self.matches.append((detection_map[detection], detection))\n\n    def max_and_threshold_matching(self):\n\n        self.matches = []\n        if self.marginalizations.shape[0] == 0:\n            return\n\n        for i, track_idx in enumerate(self.confirmed_tracks):\n            marginalization = self.marginalizations[i,:]\n            detection_id = np.argmax(marginalization) - 1  # subtract one for dummy\n\n            if detection_id < 0:\n                continue\n\n            threshold_p = marginalization[detection_id + 1]\n            if threshold_p < self.assignment_threshold:\n                continue\n\n            self.matches.append((track_idx, detection_id))\n\n    def hungarian(self):\n        self.matches, _, _ = min_marg_matching(self.marginalizations,\n                                               self.confirmed_tracks,\n                                               self.assignment_threshold)\n                               \n"
  },
  {
    "path": "paper_experiments/utils/aligned_reid_utils.py",
    "content": "from __future__ import print_function\nimport os\nimport os.path as osp\nimport pickle\nfrom scipy import io\nimport datetime\nimport time\nfrom contextlib import contextmanager\nimport numpy as np\nfrom PIL import Image\nimport torch\nfrom torch.autograd import Variable\nfrom models.aligned_reid_model import Model as aligned_reid_model\nfrom models.deep_sort_model import ImageEncoder as deep_sort_model\nfrom utils.resnet_reid_utils import ResNet_Loader\n\ndef time_str(fmt=None):\n  if fmt is None:\n    fmt = '%Y-%m-%d_%H:%M:%S'\n  return datetime.datetime.today().strftime(fmt)\n\n\ndef load_pickle(path):\n  \"\"\"Check and load pickle object.\n  According to this post: https://stackoverflow.com/a/41733927, cPickle and \n  disabling garbage collector helps with loading speed.\"\"\"\n  assert osp.exists(path)\n  # gc.disable()\n  with open(path, 'rb') as f:\n    ret = pickle.load(f)\n  # gc.enable()\n  return ret\n\n\ndef save_pickle(obj, path):\n  \"\"\"Create dir and save file.\"\"\"\n  may_make_dir(osp.dirname(osp.abspath(path)))\n  with open(path, 'wb') as f:\n    pickle.dump(obj, f, protocol=2)\n\n\ndef save_mat(ndarray, path):\n  \"\"\"Save a numpy ndarray as .mat file.\"\"\"\n  io.savemat(path, dict(ndarray=ndarray))\n\n\ndef to_scalar(vt):\n  \"\"\"Transform a length-1 pytorch Variable or Tensor to scalar. \n  Suppose tx is a torch Tensor with shape tx.size() = torch.Size([1]), \n  then npx = tx.cpu().numpy() has shape (1,), not 1.\"\"\"\n  if isinstance(vt, Variable):\n    return vt.data.cpu().numpy().flatten()[0]\n  if torch.is_tensor(vt):\n    return vt.cpu().numpy().flatten()[0]\n  raise TypeError('Input should be a variable or tensor')\n\n\ndef transfer_optim_state(state, device_id=-1):\n  \"\"\"Transfer an optimizer.state to cpu or specified gpu, which means \n  transferring tensors of the optimizer.state to specified device. \n  The modification is in place for the state.\n  Args:\n    state: An torch.optim.Optimizer.state\n    device_id: gpu id, or -1 which means transferring to cpu\n  \"\"\"\n  for key, val in state.items():\n    if isinstance(val, dict):\n      transfer_optim_state(val, device_id=device_id)\n    elif isinstance(val, Variable):\n      raise RuntimeError(\"Oops, state[{}] is a Variable!\".format(key))\n    elif isinstance(val, torch.nn.Parameter):\n      raise RuntimeError(\"Oops, state[{}] is a Parameter!\".format(key))\n    else:\n      try:\n        if device_id == -1:\n          state[key] = val.cpu()\n        else:\n          state[key] = val.cuda(device=device_id)\n      except:\n        pass\n\n\ndef may_transfer_optims(optims, device_id=-1):\n  \"\"\"Transfer optimizers to cpu or specified gpu, which means transferring \n  tensors of the optimizer to specified device. The modification is in place \n  for the optimizers.\n  Args:\n    optims: A list, which members are either torch.nn.optimizer or None.\n    device_id: gpu id, or -1 which means transferring to cpu\n  \"\"\"\n  for optim in optims:\n    if isinstance(optim, torch.optim.Optimizer):\n      transfer_optim_state(optim.state, device_id=device_id)\n\n\ndef may_transfer_modules_optims(modules_and_or_optims, device_id=-1):\n  \"\"\"Transfer optimizers/modules to cpu or specified gpu.\n  Args:\n    modules_and_or_optims: A list, which members are either torch.nn.optimizer \n      or torch.nn.Module or None.\n    device_id: gpu id, or -1 which means transferring to cpu\n  \"\"\"\n  for item in modules_and_or_optims:\n    if isinstance(item, torch.optim.Optimizer):\n      transfer_optim_state(item.state, device_id=device_id)\n    elif isinstance(item, torch.nn.Module):\n      if device_id == -1:\n        item.cpu()\n      else:\n        item.cuda(device=device_id)\n    elif item is not None:\n      print('[Warning] Invalid type {}'.format(item.__class__.__name__))\n\n\nclass TransferVarTensor(object):\n  \"\"\"Return a copy of the input Variable or Tensor on specified device.\"\"\"\n\n  def __init__(self, device_id=-1):\n    self.device_id = device_id\n\n  def __call__(self, var_or_tensor):\n    return var_or_tensor.cpu() if self.device_id == -1 \\\n      else var_or_tensor.cuda(self.device_id)\n\n\nclass TransferModulesOptims(object):\n  \"\"\"Transfer optimizers/modules to cpu or specified gpu.\"\"\"\n\n  def __init__(self, device_id=-1):\n    self.device_id = device_id\n\n  def __call__(self, modules_and_or_optims):\n    may_transfer_modules_optims(modules_and_or_optims, self.device_id)\n\n\ndef set_devices(sys_device_ids):\n  \"\"\"\n  It sets some GPUs to be visible and returns some wrappers to transferring \n  Variables/Tensors and Modules/Optimizers.\n  Args:\n    sys_device_ids: a tuple; which GPUs to use\n      e.g.  sys_device_ids = (), only use cpu\n            sys_device_ids = (3,), use the 4th gpu\n            sys_device_ids = (0, 1, 2, 3,), use first 4 gpus\n            sys_device_ids = (0, 2, 4,), use the 1st, 3rd and 5th gpus\n  Returns:\n    TVT: a `TransferVarTensor` callable\n    TMO: a `TransferModulesOptims` callable\n  \"\"\"\n  # Set the CUDA_VISIBLE_DEVICES environment variable\n  import os\n  visible_devices = ''\n  for i in sys_device_ids:\n    visible_devices += '{}, '.format(i)\n  os.environ['CUDA_VISIBLE_DEVICES'] = visible_devices\n  # Return wrappers.\n  # Models and user defined Variables/Tensors would be transferred to the\n  # first device.\n  device_id = 0 if len(sys_device_ids) > 0 else -1\n  TVT = TransferVarTensor(device_id)\n  TMO = TransferModulesOptims(device_id)\n  return TVT, TMO\n\n\ndef set_devices_for_ml(sys_device_ids):\n  \"\"\"This version is for mutual learning.\n  \n  It sets some GPUs to be visible and returns some wrappers to transferring \n  Variables/Tensors and Modules/Optimizers.\n  \n  Args:\n    sys_device_ids: a tuple of tuples; which devices to use for each model, \n      len(sys_device_ids) should be equal to number of models. Examples:\n        \n      sys_device_ids = ((-1,), (-1,))\n        the two models both on CPU\n      sys_device_ids = ((-1,), (2,))\n        the 1st model on CPU, the 2nd model on GPU 2\n      sys_device_ids = ((3,),)\n        the only one model on the 4th gpu \n      sys_device_ids = ((0, 1), (2, 3))\n        the 1st model on GPU 0 and 1, the 2nd model on GPU 2 and 3\n      sys_device_ids = ((0,), (0,))\n        the two models both on GPU 0\n      sys_device_ids = ((0,), (0,), (1,), (1,))\n        the 1st and 2nd model on GPU 0, the 3rd and 4th model on GPU 1\n  \n  Returns:\n    TVTs: a list of `TransferVarTensor` callables, one for one model.\n    TMOs: a list of `TransferModulesOptims` callables, one for one model.\n    relative_device_ids: a list of lists; `sys_device_ids` transformed to \n      relative ids; to be used in `DataParallel`\n  \"\"\"\n  import os\n\n  all_ids = []\n  for ids in sys_device_ids:\n    all_ids += ids\n  unique_sys_device_ids = list(set(all_ids))\n  unique_sys_device_ids.sort()\n  if -1 in unique_sys_device_ids:\n    unique_sys_device_ids.remove(-1)\n\n  # Set the CUDA_VISIBLE_DEVICES environment variable\n\n  visible_devices = ''\n  for i in unique_sys_device_ids:\n    visible_devices += '{}, '.format(i)\n  os.environ['CUDA_VISIBLE_DEVICES'] = visible_devices\n\n  # Return wrappers\n\n  relative_device_ids = []\n  TVTs, TMOs = [], []\n  for ids in sys_device_ids:\n    relative_ids = []\n    for id in ids:\n      if id != -1:\n        id = find_index(unique_sys_device_ids, id)\n      relative_ids.append(id)\n    relative_device_ids.append(relative_ids)\n\n    # Models and user defined Variables/Tensors would be transferred to the\n    # first device.\n    TVTs.append(TransferVarTensor(relative_ids[0]))\n    TMOs.append(TransferModulesOptims(relative_ids[0]))\n  return TVTs, TMOs, relative_device_ids\n\n\ndef load_ckpt(modules_optims, ckpt_file, load_to_cpu=True, verbose=True):\n  \"\"\"Load state_dict's of modules/optimizers from file.\n  Args:\n    modules_optims: A list, which members are either torch.nn.optimizer \n      or torch.nn.Module.\n    ckpt_file: The file path.\n    load_to_cpu: Boolean. Whether to transform tensors in modules/optimizers \n      to cpu type.\n  \"\"\"\n  map_location = (lambda storage, loc: storage) if load_to_cpu else None\n  ckpt = torch.load(ckpt_file, map_location=map_location)\n  for m, sd in zip(modules_optims, ckpt['state_dicts']):\n    if 'fc.weight' in sd:\n      del sd['fc.weight']\n    if 'fc.bias' in sd:  \n      del sd['fc.bias']\n    load_state_dict(m, sd)\n  if verbose:\n    print('Resume from ckpt {}, \\nepoch {}, \\nscores {}'.format(\n      ckpt_file, ckpt['ep'], ckpt['scores']))\n  return ckpt['ep'], ckpt['scores']\n\n\ndef save_ckpt(modules_optims, ep, scores, ckpt_file):\n  \"\"\"Save state_dict's of modules/optimizers to file. \n  Args:\n    modules_optims: A list, which members are either torch.nn.optimizer \n      or torch.nn.Module.\n    ep: the current epoch number\n    scores: the performance of current model\n    ckpt_file: The file path.\n  Note:\n    torch.save() reserves device type and id of tensors to save, so when \n    loading ckpt, you have to inform torch.load() to load these tensors to \n    cpu or your desired gpu, if you change devices.\n  \"\"\"\n  state_dicts = [m.state_dict() for m in modules_optims]\n  ckpt = dict(state_dicts=state_dicts,\n              ep=ep,\n              scores=scores)\n  may_make_dir(osp.dirname(osp.abspath(ckpt_file)))\n  torch.save(ckpt, ckpt_file)\n\n\ndef load_state_dict(model, src_state_dict):\n  \"\"\"Copy parameters and buffers from `src_state_dict` into `model` and its \n  descendants. The `src_state_dict.keys()` NEED NOT exactly match \n  `model.state_dict().keys()`. For dict key mismatch, just\n  skip it; for copying error, just output warnings and proceed.\n\n  Arguments:\n    model: A torch.nn.Module object. \n    src_state_dict (dict): A dict containing parameters and persistent buffers.\n  Note:\n    This is modified from torch.nn.modules.module.load_state_dict(), to make\n    the warnings and errors more detailed.\n  \"\"\"\n  from torch.nn import Parameter\n  dest_state_dict = model.state_dict()\n  for name, param in src_state_dict.items():\n    ### CHANGED HERE FOR FINE TUNING\n    if name not in dest_state_dict:\n      continue\n    if isinstance(param, Parameter):\n      # backwards compatibility for serialized parameters\n      param = param.data\n    try:\n      dest_state_dict[name].copy_(param)\n    except Exception as e:\n      print(\"Warning: Error occurs when copying '{}': {}\"\n            .format(name, str(e)))\n\n  # src_missing = set(dest_state_dict.keys()) - set(src_state_dict.keys())\n  # if len(src_missing) > 0:\n  #   print(\"Keys not found in source state_dict: \")\n  #   for n in src_missing:\n  #     print('\\t', n)\n\n  # dest_missing = set(src_state_dict.keys()) - set(dest_state_dict.keys())\n  # if len(dest_missing) > 0:\n  #   print(\"Keys not found in destination state_dict: \")\n  #   for n in dest_missing:\n  #     print('\\t', n)\n\n\ndef is_iterable(obj):\n  return hasattr(obj, '__len__')\n\n\ndef may_set_mode(maybe_modules, mode):\n  \"\"\"maybe_modules: an object or a list of objects.\"\"\"\n  assert mode in ['train', 'eval']\n  if not is_iterable(maybe_modules):\n    maybe_modules = [maybe_modules]\n  for m in maybe_modules:\n    if isinstance(m, torch.nn.Module):\n      if mode == 'train':\n        m.train()\n      else:\n        m.eval()\n\n\ndef may_make_dir(path):\n  \"\"\"\n  Args:\n    path: a dir, or result of `osp.dirname(osp.abspath(file_path))`\n  Note:\n    `osp.exists('')` returns `False`, while `osp.exists('.')` returns `True`!\n  \"\"\"\n  # This clause has mistakes:\n  # if path is None or '':\n\n  if path in [None, '']:\n    return\n  if not osp.exists(path):\n    os.makedirs(path)\n\n\nclass AverageMeter(object):\n  \"\"\"Modified from Tong Xiao's open-reid. \n  Computes and stores the average and current value\"\"\"\n\n  def __init__(self):\n    self.val = 0\n    self.avg = 0\n    self.sum = 0\n    self.count = 0\n\n  def reset(self):\n    self.val = 0\n    self.avg = 0\n    self.sum = 0\n    self.count = 0\n\n  def update(self, val, n=1):\n    self.val = val\n    self.sum += val * n\n    self.count += n\n    self.avg = float(self.sum) / (self.count + 1e-20)\n\n\nclass RunningAverageMeter(object):\n  \"\"\"Computes and stores the running average and current value\"\"\"\n\n  def __init__(self, hist=0.99):\n    self.val = None\n    self.avg = None\n    self.hist = hist\n\n  def reset(self):\n    self.val = None\n    self.avg = None\n\n  def update(self, val):\n    if self.avg is None:\n      self.avg = val\n    else:\n      self.avg = self.avg * self.hist + val * (1 - self.hist)\n    self.val = val\n\n\nclass RecentAverageMeter(object):\n  \"\"\"Stores and computes the average of recent values.\"\"\"\n\n  def __init__(self, hist_size=100):\n    self.hist_size = hist_size\n    self.fifo = []\n    self.val = 0\n\n  def reset(self):\n    self.fifo = []\n    self.val = 0\n\n  def update(self, val):\n    self.val = val\n    self.fifo.append(val)\n    if len(self.fifo) > self.hist_size:\n      del self.fifo[0]\n\n  @property\n  def avg(self):\n    assert len(self.fifo) > 0\n    return float(sum(self.fifo)) / len(self.fifo)\n\n\ndef get_model_wrapper(model, multi_gpu):\n  from torch.nn.parallel import DataParallel\n  if multi_gpu:\n    return DataParallel(model)\n  else:\n    return model\n\n\nclass ReDirectSTD(object):\n  \"\"\"Modified from Tong Xiao's `Logger` in open-reid.\n  This class overwrites sys.stdout or sys.stderr, so that console logs can\n  also be written to file.\n  Args:\n    fpath: file path\n    console: one of ['stdout', 'stderr']\n    immediately_visible: If `False`, the file is opened only once and closed\n      after exiting. In this case, the message written to file may not be\n      immediately visible (Because the file handle is occupied by the\n      program?). If `True`, each writing operation of the console will\n      open, write to, and close the file. If your program has tons of writing\n      operations, the cost of opening and closing file may be obvious. (?)\n  Usage example:\n    `ReDirectSTD('stdout.txt', 'stdout', False)`\n    `ReDirectSTD('stderr.txt', 'stderr', False)`\n  NOTE: File will be deleted if already existing. Log dir and file is created\n    lazily -- if no message is written, the dir and file will not be created.\n  \"\"\"\n\n  def __init__(self, fpath=None, console='stdout', immediately_visible=False):\n    import sys\n    import os\n    import os.path as osp\n\n    assert console in ['stdout', 'stderr']\n    self.console = sys.stdout if console == 'stdout' else sys.stderr\n    self.file = fpath\n    self.f = None\n    self.immediately_visible = immediately_visible\n    if fpath is not None:\n      # Remove existing log file.\n      if osp.exists(fpath):\n        os.remove(fpath)\n\n    # Overwrite\n    if console == 'stdout':\n      sys.stdout = self\n    else:\n      sys.stderr = self\n\n  def __del__(self):\n    self.close()\n\n  def __enter__(self):\n    pass\n\n  def __exit__(self, *args):\n    self.close()\n\n  def write(self, msg):\n    self.console.write(msg)\n    if self.file is not None:\n      may_make_dir(os.path.dirname(osp.abspath(self.file)))\n      if self.immediately_visible:\n        with open(self.file, 'a') as f:\n          f.write(msg)\n      else:\n        if self.f is None:\n          self.f = open(self.file, 'w')\n        self.f.write(msg)\n\n  def flush(self):\n    self.console.flush()\n    if self.f is not None:\n      self.f.flush()\n      import os\n      os.fsync(self.f.fileno())\n\n  def close(self):\n    self.console.close()\n    if self.f is not None:\n      self.f.close()\n\n\ndef set_seed(seed):\n  import random\n  random.seed(seed)\n  print('setting random-seed to {}'.format(seed))\n\n  import numpy as np\n  np.random.seed(seed)\n  print('setting np-random-seed to {}'.format(seed))\n\n  import torch\n  torch.backends.cudnn.enabled = False\n  print('cudnn.enabled set to {}'.format(torch.backends.cudnn.enabled))\n  # set seed for CPU\n  torch.manual_seed(seed)\n  print('setting torch-seed to {}'.format(seed))\n\n\ndef print_array(array, fmt='{:.2f}', end=' '):\n  \"\"\"Print a 1-D tuple, list, or numpy array containing digits.\"\"\"\n  s = ''\n  for x in array:\n    s += fmt.format(float(x)) + end\n  s += '\\n'\n  print(s)\n  return s\n\n\n# Great idea from https://github.com/amdegroot/ssd.pytorch\ndef str2bool(v):\n  return v.lower() in (\"yes\", \"true\", \"t\", \"1\")\n\n\ndef tight_float_str(x, fmt='{:.4f}'):\n  return fmt.format(x).rstrip('0').rstrip('.')\n\n\ndef find_index(seq, item):\n  for i, x in enumerate(seq):\n    if item == x:\n      return i\n  return -1\n\n\ndef adjust_lr_exp(optimizer, base_lr, ep, total_ep, start_decay_at_ep):\n  \"\"\"Decay exponentially in the later phase of training. All parameters in the \n  optimizer share the same learning rate.\n  \n  Args:\n    optimizer: a pytorch `Optimizer` object\n    base_lr: starting learning rate\n    ep: current epoch, ep >= 1\n    total_ep: total number of epochs to train\n    start_decay_at_ep: start decaying at the BEGINNING of this epoch\n  \n  Example:\n    base_lr = 2e-4\n    total_ep = 300\n    start_decay_at_ep = 201\n    It means the learning rate starts at 2e-4 and begins decaying after 200 \n    epochs. And training stops after 300 epochs.\n  \n  NOTE: \n    It is meant to be called at the BEGINNING of an epoch.\n  \"\"\"\n  assert ep >= 1, \"Current epoch number should be >= 1\"\n\n  if ep < start_decay_at_ep:\n    return\n\n  for g in optimizer.param_groups:\n    g['lr'] = (base_lr * (0.001 ** (float(ep + 1 - start_decay_at_ep)\n                                    / (total_ep + 1 - start_decay_at_ep))))\n  print('=====> lr adjusted to {:.10f}'.format(g['lr']).rstrip('0'))\n\n\ndef adjust_lr_staircase(optimizer, base_lr, ep, decay_at_epochs, factor):\n  \"\"\"Multiplied by a factor at the BEGINNING of specified epochs. All \n  parameters in the optimizer share the same learning rate.\n  \n  Args:\n    optimizer: a pytorch `Optimizer` object\n    base_lr: starting learning rate\n    ep: current epoch, ep >= 1\n    decay_at_epochs: a list or tuple; learning rate is multiplied by a factor \n      at the BEGINNING of these epochs\n    factor: a number in range (0, 1)\n  \n  Example:\n    base_lr = 1e-3\n    decay_at_epochs = [51, 101]\n    factor = 0.1\n    It means the learning rate starts at 1e-3 and is multiplied by 0.1 at the \n    BEGINNING of the 51'st epoch, and then further multiplied by 0.1 at the \n    BEGINNING of the 101'st epoch, then stays unchanged till the end of \n    training.\n  \n  NOTE: \n    It is meant to be called at the BEGINNING of an epoch.\n  \"\"\"\n  assert ep >= 1, \"Current epoch number should be >= 1\"\n\n  if ep not in decay_at_epochs:\n    return\n\n  ind = find_index(decay_at_epochs, ep)\n  for g in optimizer.param_groups:\n    g['lr'] = base_lr * factor ** (ind + 1)\n  print('=====> lr adjusted to {:.10f}'.format(g['lr']).rstrip('0'))\n\n\n@contextmanager\ndef measure_time(enter_msg):\n  st = time.time()\n  print(enter_msg)\n  yield\n  print('Done, {:.2f}s'.format(time.time() - st))\n\n# @profile\ndef generate_features(appearance_model, patches, opt, object_ids = None):\n    Tensor = torch.cuda.FloatTensor if opt.using_cuda else torch.FloatTensor\n    features = []\n    for i, patch in enumerate(patches):\n        if patch is None or patch.nelement()==0:\n            features.append(None)\n            continue\n        patch = patch.unsqueeze(0)\n        if opt.perfect:\n            feature = torch.zeros(1024)\n            feature[object_ids[i]] = 1\n            feature = feature.type(Tensor)\n        else:\n            if opt.appearance_model == 'aligned_reid':\n                with torch.no_grad():\n                    feature ,_ = appearance_model(patch.cuda())\n                    feature = feature.squeeze(0).type(Tensor)\n            elif opt.appearance_model == 'deepsort':\n                patch = patch.permute(0,2,3,1).cpu().numpy()\n                feature = appearance_model(patch)\n                feature = feature[0]\n            elif opt.appearance_model == 'resnet_reid':\n                patch = patch.permute(0,2,3,1)\n                feature = appearance_model.inference([patch.squeeze(0)])\n                feature = feature[0][0].type(Tensor)\n        features.append(feature)\n    return features\n\n# @profile\ndef generate_features_batched(appearance_model, patches, opt, object_ids = None):\n    if opt.perfect or opt.appearance_model == 'deepsort': # Do old/slow way if perfect features or deepsort features\n        return generate_features(appearance_model, patches, opt, object_ids)\n\n    if opt.appearance_model == 'resnet_reid':\n        Tensor = torch.cuda.FloatTensor if opt.using_cuda else torch.FloatTensor\n\n        features = []\n        resnet_patches = []\n        for i, patch in enumerate(patches):\n            if patch is None or patch.nelement()==0:\n                features.append(None)\n            else:\n                features.append(1)\n                resnet_patches.append(patch.permute(1,2,0))\n\n        resnet_features = appearance_model.inference(resnet_patches)\n        ctr = 0\n        for i in range(len(features)):\n            if features[i] is not None:\n                features[i] = resnet_features[ctr].type(Tensor)\n                ctr += 1\n        return features\n\n    elif opt.appearance_model == 'aligned_reid':\n        return generate_features(appearance_model, patches, opt, object_ids)   #TODO: Fix batched appearance features. This currently gives bad features\n        Tensor = torch.cuda.FloatTensor if opt.using_cuda else torch.FloatTensor\n        maxx = -1\n        maxy = -1\n        idxs = []\n        features = []\n        for i, patch in enumerate(patches):\n            if patch is None or patch.nelement()==0:\n                continue\n            maxx = max(maxx, patch.size()[1])\n            maxy = max(maxy, patch.size()[2])\n            idxs.append(i)\n\n        if(maxx==-1 and maxy==-1):\n            return features\n        batch = torch.zeros(len(idxs),3,maxx,maxy).cuda()\n        for i, idx in enumerate(idxs):\n            patch = patches[idx]\n            patchx = patch.size()[1]\n            patchy = patch.size()[2]\n            batch[i,:,:patchx,:patchy] = patch\n\n        with torch.no_grad():\n            features_torch, _ = appearance_model(batch)\n            features_torch = features_torch.type(Tensor)\n\n            i = 0\n            ctr = 0\n            for idx in idxs:\n                while(i < idx):\n                    features.append(None)\n                    i+=1\n                features.append(features_torch[ctr,:])\n                i+=1\n                ctr+=1\n            while(i<len(patches)):\n                features.append(None)\n                i+=1\n\n        return features\n    else:\n        print(\"Critical Error! Attempted to batch appearance features but no model was selected\")\n\ndef get_image_patches(input_img, detections):\n    #Generates patches and also converts detections\n    patches = []\n\n    for detection in detections:\n        x1, y1, x2, y2, _, _, _ = detection\n            # Rescale coordinates to original dimensions\n        x1 = x1.item()\n        x2 = x2.item()\n        y1 = y1.item()\n        y2 = y2.item()\n\n        box_h = round(y2-y1)\n        box_w = round(x2-x1)\n        x1=round(x1)\n        y1=round(y1)\n\n        patch = input_img[:, y1:y1+box_h, x1:x1+box_w]\n        patches.append(patch)\n\n    return patches\n\ndef create_appearance_model(model_type, alignreid_checkpoint, resnet_reid_checkpoint=None, cuda=True):\n    if model_type == 'aligned_reid':\n        appearance_model = aligned_reid_model()\n        map_location = (lambda storage, loc: storage)\n        sd = torch.load(alignreid_checkpoint, map_location=map_location)\n        load_state_dict(appearance_model, sd['state_dicts'][0])\n        if cuda:\n            appearance_model.cuda()\n        appearance_model.eval()\n    elif model_type == 'deepsort':\n        appearance_model = deep_sort_model()\n    elif model_type == 'resnet_reid':\n        appearance_model = ResNet_Loader(resnet_reid_checkpoint)\n\n    return appearance_model\n"
  },
  {
    "path": "paper_experiments/utils/assign_ids_detections.py",
    "content": "import numpy as np\nimport os\nimport pdb\nfrom tqdm import tqdm\nfrom deep_sort_utils import non_max_suppression as deepsort_nms\nfrom visualise import draw_track\nimport matplotlib.pyplot as plt\nfrom PIL import Image\nfrom evaluate_detections import iou\n\ndef assign_detection_id(detection_path, gt_path, conf_threshold = 0, iou_threshold = 0.5):\n\t#expecting detections and gt in file with format as in read_detections.py\n\t# applies confidence thresholding\n\ttry:\n\t\tdetections = np.loadtxt(detection_path, delimiter=',')\n\t\tgt = np.loadtxt(gt_path, delimiter=',')\n\texcept:\n\t\treturn\n\tgt_frames = gt[:, 0]\n\tdet_confidence = detections[:, 6]\n\n\t###CONFIDENCE THRESHOLD\n\tdetections = detections[det_confidence > conf_threshold]\n\t########\n\n\tdet_frames = detections[:, 0]\n\tdet_confidence = detections[:, 6]\n\tgt_boxes = np.asarray(list(zip(gt[:, 2], gt[:, 3], gt[:, 4], gt[:, 5])))\n\tdet_boxes = np.asarray(list(zip(detections[:, 2], detections[:, 3], detections[:, 4], detections[:, 5])))\n\tout_matrix = []\n\tassigned_ids = []\n\n\tfor frame in np.unique(det_frames):\n\t\tframe_mask_det = det_frames == frame\n\t\tframe_mask_gt = gt_frames == frame\n\t\tgt_ids = gt[frame_mask_gt, 1]\n\t\tframe_gt_boxes = gt_boxes[frame_mask_gt]\n\t\tframe_det_boxes = det_boxes[frame_mask_det]\n\n\t\tfor i, det_box in enumerate(frame_det_boxes):\n\t\t\tiou_list = np.asarray([iou(gt_box, det_box) for gt_box in frame_gt_boxes])\n\t\t\tiou_sorted = np.argsort(iou_list)\n\t\t\tpositive_idx = np.where(iou_list >= iou_threshold)[0]\n\t\t\tif len(positive_idx)==0:\n\t\t\t\tassigned_ids.append(-1)\n\t\t\telse:\n\t\t\t\tassigned_ids.append(gt_ids[iou_sorted[-1]])\n\tassigned_ids = np.expand_dims(np.asarray(assigned_ids), 1)\n\t\n\ttry:\n\t\tout_matrix = np.hstack([np.expand_dims(detections[:,0], 1), assigned_ids, detections[:,2:]])\n\texcept:\n\t\tpdb.set_trace()\n\tnp.savetxt(detection_path, out_matrix, delimiter=',', fmt = '%.2f')\n\n\n\treturn\n\nif __name__=='__main__':\n\tap = []\n\tKITTI_root = 'data/KITTI/sequences'\n\tfor sequence in tqdm(range(21)):\n\t\tassign_detection_id(os.path.join(KITTI_root, '%.4d'%sequence, 'det','rrc_subcnn_car_det.txt'), \n\t\t\t\t\t\t\t\t\t\tos.path.join(KITTI_root, '%.4d'%sequence, 'gt', 'gt_car.txt'))\n"
  },
  {
    "path": "paper_experiments/utils/calibration.py",
    "content": "import numpy as np\nimport cv2\nimport os\nimport yaml\nimport torch\nimport pdb\n\nclass Calibration(object):\n    ''' Calibration matrices and utils\n        3d XYZ in <label>.txt are in rect camera coord.\n        2d box xy are in image2 coord\n        Points in <lidar>.bin are in Velodyne coord.\n        y_image2 = P^2_rect * x_rect\n        y_image2 = P^2_rect * R0_rect * Tr_velo_to_cam * x_velo\n        x_ref = Tr_velo_to_cam * x_velo\n        x_rect = R0_rect * x_ref\n        P^2_rect = [f^2_u,  0,      c^2_u,  -f^2_u b^2_x;\n                    0,      f^2_v,  c^2_v,  -f^2_v b^2_y;\n                    0,      0,      1,      0]\n                 = K * [1|t]\n        image2 coord:\n         ----> x-axis (u)\n        |\n        |\n        v y-axis (v)\n        velodyne coord:\n        front x, left y, up z\n        rect/ref camera coord:\n        right x, down y, front z\n        Ref (KITTI paper): http://www.cvlibs.net/publications/Geiger2013IJRR.pdf\n        TODO(rqi): do matrix multiplication only once for each projection.\n    '''\n    def __init__(self, calib_filepath):\n\n        calibs = self.read_calib_file(calib_filepath)\n        # Projection matrix from rect camera coord to image2 coord\n        self.P = calibs['P2'] \n        self.P = np.reshape(self.P, [3,4])\n        self.P_torch = torch.from_numpy(self.P).float().cuda()\n\n        # Rigid transform from Velodyne coord to reference camera coord\n        try:\n            self.V2C = calibs['Tr_velo_to_cam']\n        except:\n            self.V2C = calibs['Tr_velo_cam']\n\n        self.V2C = np.reshape(self.V2C, [3,4])\n        self.C2V = inverse_rigid_trans(self.V2C)\n        # Rotation from reference camera coord to rect camera coord\n        try:\n            self.R0 = calibs['R0_rect']\n        except:\n            self.R0 = calibs['R_rect']\n        self.R0 = np.reshape(self.R0,[3,3])\n        self.R0_torch = torch.from_numpy(self.R0).float().cuda()\n\n        RA = np.zeros((4,4))\n        RA[:3,:3] = self.R0\n        RA[3,3] = 1\n        self.D = np.matmul(self.P,RA).T\n        self.D_torch = torch.from_numpy(self.D).float().cuda()\n\n        # Camera intrinsics and extrinsics\n        self.c_u = self.P[0,2]\n        self.c_v = self.P[1,2]\n        self.f_u = self.P[0,0]\n        self.f_v = self.P[1,1]\n        self.b_x = self.P[0,3]/(-self.f_u) # relative \n        self.b_y = self.P[1,3]/(-self.f_v)\n\n    def read_calib_file(self, filepath):\n        ''' Read in a calibration file and parse into a dictionary.\n        Ref: https://github.com/utiasSTARS/pykitti/blob/master/pykitti/utils.py\n        '''\n        data = {}\n        with open(filepath, 'r') as f:\n            for line in f.readlines():\n                line = line.rstrip()\n                if len(line)==0: continue\n                key, value = line.split(' ', 1)\n                if key.endswith(':'):\n                    key = key[:-1]\n                # The only non-float values in these files are dates, which\n                # we don't care about anyway\n                try:\n                    data[key] = np.array([float(x) for x in value.split()])\n                except ValueError:\n                    pass\n\n        return data\n    \n    def read_calib_from_video(self, calib_root_dir):\n        ''' Read calibration for camera 2 from video calib files.\n            there are calib_cam_to_cam and calib_velo_to_cam under the calib_root_dir\n        '''\n        data = {}\n        cam2cam = self.read_calib_file(os.path.join(calib_root_dir, 'calib_cam_to_cam.txt'))\n        velo2cam = self.read_calib_file(os.path.join(calib_root_dir, 'calib_velo_to_cam.txt'))\n        Tr_velo_to_cam = np.zeros((3,4))\n        Tr_velo_to_cam[0:3,0:3] = np.reshape(velo2cam['R'], [3,3])\n        Tr_velo_to_cam[:,3] = velo2cam['T']\n        data['Tr_velo_to_cam'] = np.reshape(Tr_velo_to_cam, [12])\n        data['R0_rect'] = cam2cam['R_rect_00']\n        data['P2'] = cam2cam['P_rect_02']\n        return data\n\n    def cart2hom(self, pts_3d):\n        ''' Input: nx3 points in Cartesian\n            Oupput: nx4 points in Homogeneous by appending 1\n        '''\n        n = pts_3d.shape[0]\n        pts_3d_hom = np.hstack((pts_3d, np.ones((n,1))))\n        return pts_3d_hom\n \n    def cart2hom_torch(self, pts_3d):\n        n = pts_3d.size()[0]\n        pts_3d_hom = torch.cat((pts_3d, torch.ones(n,1).to(\"cuda:0\")), 1)\n        return pts_3d_hom\n\n    # =========================== \n    # ------- 3d to 3d ---------- \n    # =========================== \n    def project_velo_to_ref(self, pts_3d_velo):\n        pts_3d_velo = self.cart2hom(pts_3d_velo) # nx4\n        return np.dot(pts_3d_velo, np.transpose(self.V2C))\n\n    def project_ref_to_velo(self, pts_3d_ref):\n        pts_3d_ref = self.cart2hom(pts_3d_ref) # nx4\n        return np.dot(pts_3d_ref, np.transpose(self.C2V))\n\n    def project_rect_to_ref(self, pts_3d_rect):\n        ''' Input and Output are nx3 points '''\n        return np.transpose(np.dot(np.linalg.inv(self.R0), np.transpose(pts_3d_rect)))\n    \n    def project_ref_to_rect(self, pts_3d_ref):\n        ''' Input and Output are nx3 points '''\n        return np.transpose(np.dot(self.R0, np.transpose(pts_3d_ref)))\n\n    def project_ref_to_rect_torch(self, pts_3d_ref):\n        ''' Input and Output are nx3 points '''\n        return torch.transpose(torch.matmul(self.R0_torch, torch.transpose(pts_3d_ref,0,1)),0,1)\n \n    def project_rect_to_velo(self, pts_3d_rect):\n        ''' Input: nx3 points in rect camera coord.\n            Output: nx3 points in velodyne coord.\n        ''' \n        pts_3d_ref = self.project_rect_to_ref(pts_3d_rect)\n        return self.project_ref_to_velo(pts_3d_ref)\n\n    def project_velo_to_rect(self, pts_3d_velo):\n        pts_3d_ref = self.project_velo_to_ref(pts_3d_velo)\n        return self.project_ref_to_rect(pts_3d_ref)\n\n    # =========================== \n    # ------- 3d to 2d ---------- \n    # =========================== \n    def project_rect_to_image(self, pts_3d_rect):\n        ''' Input: nx3 points in rect camera coord.\n            Output: nx2 points in image2 coord.\n        '''\n        pts_3d_rect = self.cart2hom(pts_3d_rect)\n        pts_2d = np.dot(pts_3d_rect, np.transpose(self.P)) # nx3\n        pts_2d[:,0] /= pts_2d[:,2]\n        pts_2d[:,1] /= pts_2d[:,2]\n        return pts_2d[:,0:2]\n\n    def project_rect_to_image_torch(self, pts_3d_rect):\n        ''' Input: nx3 points in rect camera coord.\n            Output: nx2 points in image2 coord.\n        '''\n        pts_3d_rect = self.cart2hom_torch(pts_3d_rect)\n        pts_2d = torch.matmul(pts_3d_rect, torch.transpose(self.P_torch,0,1)) # nx3\n        pts_2d[:,0] /= pts_2d[:,2]\n        pts_2d[:,1] /= pts_2d[:,2]\n        return pts_2d[:,0:2]\n\n    def project_ref_to_image_torch(self, pts_3d_ref):\n        ''' Input: nx3 points in ref camera coord.\n            Output: nx2 points in image2 coord.\n        '''\n        pts_3d_ref = self.cart2hom_torch(pts_3d_ref)\n        pts_2d = torch.matmul(pts_3d_ref, self.D_torch) # nx3\n        pts_2d[:,0] /= pts_2d[:,2]\n        pts_2d[:,1] /= pts_2d[:,2]\n        return pts_2d[:,0:2]\n\n    def project_velo_to_image(self, pts_3d_velo):\n        ''' Input: nx3 points in velodyne coord.\n            Output: nx2 points in image2 coord.\n        '''\n        pts_3d_rect = self.project_velo_to_rect(pts_3d_velo)\n        return self.project_rect_to_image(pts_3d_rect)\n\n    # =========================== \n    # ------- 2d to 3d ---------- \n    # =========================== \n    def project_image_to_rect(self, uv_depth):\n        ''' Input: nx3 first two channels are uv, 3rd channel\n                   is depth in rect camera coord.\n            Output: nx3 points in rect camera coord.\n        '''\n        n = uv_depth.shape[0]\n        x = ((uv_depth[:,0]-self.c_u)*uv_depth[:,2])/self.f_u + self.b_x\n        y = ((uv_depth[:,1]-self.c_v)*uv_depth[:,2])/self.f_v + self.b_y\n        pts_3d_rect = np.zeros((n,3))\n        pts_3d_rect[:,0] = x\n        pts_3d_rect[:,1] = y\n        pts_3d_rect[:,2] = uv_depth[:,2]\n        return pts_3d_rect\n\n    def project_image_to_velo(self, uv_depth):\n        pts_3d_rect = self.project_image_to_rect(uv_depth)\n        return self.project_rect_to_velo(pts_3d_rect)\n\ndef rotx(t):\n    ''' 3D Rotation about the x-axis. '''\n    c = np.cos(t)\n    s = np.sin(t)\n    return np.array([[1,  0,  0],\n                     [0,  c, -s],\n                     [0,  s,  c]])\n\n\ndef roty(t):\n    ''' Rotation about the y-axis. '''\n    c = np.cos(t)\n    s = np.sin(t)\n    return np.array([[c,  0,  s],\n                     [0,  1,  0],\n                     [-s, 0,  c]])\n\n\ndef rotz(t):\n    ''' Rotation about the z-axis. '''\n    c = np.cos(t)\n    s = np.sin(t)\n    return np.array([[c, -s,  0],\n                     [s,  c,  0],\n                     [0,  0,  1]])\n\n\ndef transform_from_rot_trans(R, t):\n    ''' Transforation matrix from rotation matrix and translation vector. '''\n    R = R.reshape(3, 3)\n    t = t.reshape(3, 1)\n    return np.vstack((np.hstack([R, t]), [0, 0, 0, 1]))\n\n\ndef inverse_rigid_trans(Tr):\n    ''' Inverse a rigid body transform matrix (3x4 as [R|t])\n        [R'|-R't; 0|1]\n    '''\n    inv_Tr = np.zeros_like(Tr) # 3x4\n    inv_Tr[0:3,0:3] = np.transpose(Tr[0:3,0:3])\n    inv_Tr[0:3,3] = np.dot(-np.transpose(Tr[0:3,0:3]), Tr[0:3,3])\n    return inv_Tr\n\ndef read_label(label_filename):\n    lines = [line.rstrip() for line in open(label_filename)]\n    objects = [Object3d(line) for line in lines]\n    return objects\n\ndef load_image(img_filename):\n    return cv2.imread(img_filename)\n\ndef load_velo_scan(velo_filename):\n    scan = np.fromfile(velo_filename, dtype=np.float32)\n    scan = scan.reshape((-1, 4))\n    return scan\n\ndef project_to_image(pts_3d, P):\n    ''' Project 3d points to image plane.\n    Usage: pts_2d = projectToImage(pts_3d, P)\n      input: pts_3d: nx3 matrix\n             P:      3x4 projection matrix\n      output: pts_2d: nx2 matrix\n      P(3x4) dot pts_3d_extended(4xn) = projected_pts_2d(3xn)\n      => normalize projected_pts_2d(2xn)\n      <=> pts_3d_extended(nx4) dot P'(4x3) = projected_pts_2d(nx3)\n          => normalize projected_pts_2d(nx2)\n    '''\n    n = pts_3d.shape[0]\n    pts_3d_extend = np.hstack((pts_3d, np.ones((n,1))))\n    print(('pts_3d_extend shape: ', pts_3d_extend.shape))\n    pts_2d = np.dot(pts_3d_extend, np.transpose(P)) # nx3\n    pts_2d[:,0] /= pts_2d[:,2]\n    pts_2d[:,1] /= pts_2d[:,2]\n    return pts_2d[:,0:2]\n\n\ndef compute_box_3d(obj, P):\n    ''' Takes an object and a projection matrix (P) and projects the 3d\n        bounding box into the image plane.\n        Returns:\n            corners_2d: (8,2) array in left image coord.\n            corners_3d: (8,3) array in in rect camera coord.\n    '''\n    # compute rotational matrix around yaw axis\n    R = roty(obj.ry)    \n\n    # 3d bounding box dimensions\n    l = obj.l;\n    w = obj.w;\n    h = obj.h;\n    \n    # 3d bounding box corners\n    x_corners = [l/2,l/2,-l/2,-l/2,l/2,l/2,-l/2,-l/2];\n    y_corners = [0,0,0,0,-h,-h,-h,-h];\n    z_corners = [w/2,-w/2,-w/2,w/2,w/2,-w/2,-w/2,w/2];\n    \n    # rotate and translate 3d bounding box\n    corners_3d = np.dot(R, np.vstack([x_corners,y_corners,z_corners]))\n    #print corners_3d.shape\n    corners_3d[0,:] = corners_3d[0,:] + obj.t[0];\n    corners_3d[1,:] = corners_3d[1,:] + obj.t[1];\n    corners_3d[2,:] = corners_3d[2,:] + obj.t[2];\n    #print 'cornsers_3d: ', corners_3d \n    # only draw 3d bounding box for objs in front of the camera\n    if np.any(corners_3d[2,:]<0.1):\n        corners_2d = None\n        return corners_2d, np.transpose(corners_3d)\n    \n    # project the 3d bounding box into the image plane\n    corners_2d = project_to_image(np.transpose(corners_3d), P);\n    #print 'corners_2d: ', corners_2d\n    return corners_2d, np.transpose(corners_3d)\n\n\ndef compute_orientation_3d(obj, P):\n    ''' Takes an object and a projection matrix (P) and projects the 3d\n        object orientation vector into the image plane.\n        Returns:\n            orientation_2d: (2,2) array in left image coord.\n            orientation_3d: (2,3) array in in rect camera coord.\n    '''\n    \n    # compute rotational matrix around yaw axis\n    R = roty(obj.ry)\n   \n    # orientation in object coordinate system\n    orientation_3d = np.array([[0.0, obj.l],[0,0],[0,0]])\n    \n    # rotate and translate in camera coordinate system, project in image\n    orientation_3d = np.dot(R, orientation_3d)\n    orientation_3d[0,:] = orientation_3d[0,:] + obj.t[0]\n    orientation_3d[1,:] = orientation_3d[1,:] + obj.t[1]\n    orientation_3d[2,:] = orientation_3d[2,:] + obj.t[2]\n    \n    # vector behind image plane?\n    if np.any(orientation_3d[2,:]<0.1):\n      orientation_2d = None\n      return orientation_2d, np.transpose(orientation_3d)\n    \n    # project orientation into the image plane\n    orientation_2d = project_to_image(np.transpose(orientation_3d), P);\n    return orientation_2d, np.transpose(orientation_3d)\n\ndef draw_projected_box3d(image, qs, color=(255,255,255), thickness=2):\n    ''' Draw 3d bounding box in image\n        qs: (8,3) array of vertices for the 3d box in following order:\n            1 -------- 0\n           /|         /|\n          2 -------- 3 .\n          | |        | |\n          . 5 -------- 4\n          |/         |/\n          6 -------- 7\n    '''\n    qs = qs.astype(np.int32)\n    for k in range(0,4):\n       # Ref: http://docs.enthought.com/mayavi/mayavi/auto/mlab_helper_functions.html\n       i,j=k,(k+1)%4\n       # use LINE_AA for opencv3\n       cv2.line(image, (qs[i,0],qs[i,1]), (qs[j,0],qs[j,1]), color, thickness, cv2.CV_AA)\n\n       i,j=k+4,(k+1)%4 + 4\n       cv2.line(image, (qs[i,0],qs[i,1]), (qs[j,0],qs[j,1]), color, thickness, cv2.CV_AA)\n\n       i,j=k,k+4\n       cv2.line(image, (qs[i,0],qs[i,1]), (qs[j,0],qs[j,1]), color, thickness, cv2.CV_AA)\n    return image\n\n\n\nclass OmniCalibration(Calibration):\n    def __init__(self, calib_folder):\n\n        global_config = os.path.join(calib_folder, 'defaults.yaml')\n        camera_config = os.path.join(calib_folder, 'cameras.yaml')\n\n        with open(global_config) as f:\n            self.global_config_dict = yaml.safe_load(f)\n        \n        with open(camera_config) as f:\n            self.camera_config_dict = yaml.safe_load(f)\n        \n        self.median_focal_length_y = self.calculate_median_param_value(param = 'f_y')\n        self.median_optical_center_y = self.calculate_median_param_value(param = 't_y')\n        # image shape is (color channels, height, width)\n        self.img_shape = 3, self.global_config_dict['image']['height'], self.global_config_dict['image']['width']\n    \n    def project_ref_to_image_torch(self, pointcloud):\n\n        theta = (torch.atan2(pointcloud[:, 0], pointcloud[:, 2]) + np.pi) %(2*np.pi)\n        horizontal_fraction = theta/ (2*np.pi)\n        x = (horizontal_fraction * self.img_shape[2]) % self.img_shape[2]\n        y = -self.median_focal_length_y*(pointcloud[:, 1]*torch.cos(theta)/pointcloud[:, 2]) + self.median_optical_center_y\n        pts_2d = torch.stack([x, y], dim=1)\n        \n        return pts_2d\n\n\n    def project_image_to_rect(self, uvdepth):\n\n        theta = (uvdepth[:, 0]/self.img_shape[2])*2*np.pi - np.pi\n        z = uvdepth[:, 2]*np.cos(theta)\n        x = uvdepth[:, 2]*np.sin(theta)\n        y = z*-1*(uvdepth[:, 1] - self.median_optical_center_y)/(self.median_focal_length_y * np.cos(theta))\n\n        return np.stack([x,y,z], axis=1)\n\n    def project_velo_to_ref(self, pointcloud):\n\n        pointcloud = pointcloud[:, [1, 2, 0]]\n        pointcloud[:, 0] *= -1\n        pointcloud[:, 1] *= -1\n\n        return pointcloud\n\n    def move_lidar_to_camera_frame(self, pointcloud, upper = True):\n        # assumed only rotation about z axis\n        \n        if upper:\n            pointcloud -= self.global_config_dict['lidar_upper_to_rgb']['translation']\n            theta = self.global_config_dict['lidar_upper_to_rgb']['rotation'][-1]\n        else:\n            pointcloud -= self.global_config_dict['lidar_lower_to_rgb']['translation']\n            theta = self.global_config_dict['lidar_lower_to_rgb']['rotation'][-1]\n\n        rotation_matrix = torch.Tensor([[np.cos(theta), np.sin(theta)], [-np.sin(theta), np.cos(theta)]]).type(pointcloud.type())\n        pointcloud[:, :2] = torch.matmul(rotation_matrix.unsqueeze(0), pointcloud[:, :2].transpose(0,1)).transpose(0,1)\n        return pointcloud\n        \n    \n    def calculate_median_param_value(self, param):\n        if param=='f_y':\n            idx = 4\n        elif param == 'f_x':\n            idx = 0\n        elif param == 't_y':\n            idx = 5\n        elif param == 't_x':\n            idx = 2\n        elif param == 's':\n            idx = 1\n        else:\n            raise 'Wrong parameter!'\n\n        omni_camera = ['sensor_0', 'sensor_2', 'sensor_4', 'sensor_6', 'sensor_8']\n        parameter_list = []\n        for sensor, camera_params in self.camera_config_dict['cameras'].items():\n            if sensor not in omni_camera:\n                continue\n            K_matrix = camera_params['K'].split(' ')\n            parameter_list.append(float(K_matrix[idx]))\n        return np.median(parameter_list)\n"
  },
  {
    "path": "paper_experiments/utils/combine_and_process_detections.py",
    "content": "import os\nfrom os import listdir\nfrom os.path import isfile, join\n\n#root = \"/cvgl2/u/mihirp/depth_tracking/data/JRDB/sequences/\"\n#root = \"/cvgl2/u/mihirp/depth_tracking/data/JRDB/test_sequences/\"\n#root = \"/cvgl2/u/mihirp/depth_tracking/data/KITTI/sequences/\"\nroot = \"/cvgl2/u/mihirp/depth_tracking/data/KITTI/test_sequences/\"\n\nfile_name = \"new_subcnn_faster_rcnn\"\n# file_name = \"detectron2_x101\"\n\ndef threshold(filename, thresh, min, max):\n  detections = []\n  with open(filename, 'r') as readfile:\n    dets = readfile.read().split('\\n')\n    dets = dets[:len(dets)-1] #filter out last line which is just \\n\n    for det in dets:\n      parsedet = det.split(' ')\n      score = float(parsedet[len(parsedet)-1])\n      parsedet[len(parsedet)-1] = str((float(parsedet[len(parsedet)-1]) - thresh) / (max - thresh))\n      if(score > thresh):\n        detections.append(parsedet)\n  return detections\n\nfor seq in sorted(os.listdir(root)): #21 for normal, 29 for testing\n  path = os.path.join(root,seq,'det')\n\n  with open(os.path.join(path,file_name+'_raw.txt'), 'w') as f:\n    pred_dets = []\n    #pred_dets.append(threshold(os.path.join(path,'rrc.txt'), .05, 0, 1))\n    pred_dets.append(threshold(os.path.join(path,'subcnn.txt'), .8, 0, 1))\n    #pred_dets.append(threshold(os.path.join(path,'faster_rcnn.txt'), .99, 0, 1))\n    #pred_dets.append(threshold(os.path.join(path,'detectron2_x101.txt'), .9, 0, 1))\n    #pred_dets.append(threshold(path+'regionlets.txt', 5, -5, 25))\n    if len(pred_dets[0]) == 0:\n      continue\n    max_frames = int((pred_dets[0])[len(pred_dets[0])-1][0])\n\n    det_ctrs = [0,0,0,0]\n    for frame in range(max_frames+1):\n      frame_num = 0\n      for j in range(1): #TODO: Update to number of detectors used\n        while det_ctrs[j] < len(pred_dets[j]) and int( (pred_dets[j])[det_ctrs[j]][0]) == frame:\n          (pred_dets[j])[det_ctrs[j]][1] = str(frame_num)\n          frame_num+=1\n          f.write( \" \".join( (pred_dets[j])[det_ctrs[j]] )+'\\n')\n          det_ctrs[j]+=1\n\n  # Counts max/min of scores\n  for ctr, pred_det in enumerate(pred_dets):\n    minval = 1000\n    maxval = 0\n    for detection in pred_det:\n      score = detection[len(detection)-1]\n      if float(score)>maxval:\n        maxval = float(score)\n      if float(score)<minval:\n        minval = float(score)\n    # print(\"Detector: \"+str(ctr)+\" Max: \"+str(maxval))\n    # print(\"Detector: \"+str(ctr)+\" Min: \"+str(minval))\n\n  with open(os.path.join(path,file_name+'_raw.txt'), 'r') as f:\n    lines = f.readlines()\n\n  with open(os.path.join(path, file_name+'_car.txt'), 'w') as fcar:\n    with open(os.path.join(path, file_name+'_ped.txt'), 'w') as fped:\n      for line in lines:\n        if len(line) < 5:\n          continue\n        vals = line.split(' ')\n        min_x = float(vals[6])\n        min_y = float(vals[7])\n        max_x = float(vals[8])\n        max_y = float(vals[9])\n        score = vals[-1]\n        out_line = vals[0]+',0,'+str(min_x)+','+str(min_y)+','+str(max_x-min_x)+','+str(max_y-min_y)+','+str(score)\n        if vals[2] == 'Car':\n          fcar.write(out_line)\n        elif vals[2] == 'Pedestrian':\n          fped.write(out_line)\n"
  },
  {
    "path": "paper_experiments/utils/dataset.py",
    "content": "import glob\nimport os\nimport pdb\nimport random\nimport sys\nfrom itertools import compress\n\nimport numpy as np\nimport torch\nimport torchvision.transforms as transforms\n\nfrom PIL import Image\nfrom skimage.transform import resize\nfrom torch.utils.data import Dataset\nfrom tqdm import tqdm\n\nfrom models.pointnet_model import PointNet\nfrom .calibration import Calibration, OmniCalibration\nfrom .read_detections import (read_ground_truth_2d_detections,\n                              read_ground_truth_3d_detections)\n\n\nclass SequenceDataset(Dataset):\n    def __init__(self, folder_path, point_cloud=False, cuda=False, omni=False):\n\n        self.files = sorted(glob.glob('%s/imgs/*.*' % folder_path), key = lambda x: int(os.path.splitext(os.path.basename(x))[0]))\n        self.files = [file for file in self.files if is_image_file(file)]\n        self.point_cloud = point_cloud\n        self.seq_name = os.path.split(folder_path)[-1]\n        self.omni = omni\n        if point_cloud:\n            if self.omni:\n                calib_folder = os.path.join(folder_path, 'calib')\n                self.calib = OmniCalibration(calib_folder)\n            else:\n                self.calib_file = os.path.join(folder_path, 'calib', self.seq_name+'.txt')\n                self.calib = Calibration(self.calib_file)\n            self.depth_files = sorted(glob.glob('%s/*.*' % os.path.join(folder_path, 'depth')))\n            self.depth_files = [file for file in self.depth_files if file.split('.')[-1]=='bin']\n        else:\n            self.calib = None\n        self.cuda = cuda\n\n    def __getitem__(self, index):\n\n\n        img_path = self.files[index % len(self.files)]\n        if self.point_cloud:\n            depth_path = self.depth_files[index % len(self.depth_files)]\n        # Extract image\n        img = np.array(Image.open(img_path))\n\n        # Channels-first\n        input_img = np.transpose(img, (2, 0, 1))/255\n        # As pytorch tensor\n        input_img = torch.from_numpy(input_img).float()\n        if self.cuda:\n            input_img = input_img.cuda()\n        frame_idx = int(os.path.basename(img_path)[:-4])\n        if self.point_cloud:\n            #velodyne coordinates and image coordinates are different.\n            #velo_x = camera_z\n            #velo_y = -camera_x\n            #velo_z = -camera_y\n            if self.omni:\n                scan = np.load(depth_path)\n            else:\n                scan = np.fromfile(depth_path, dtype=np.float32)\n            scan = scan.reshape((-1, 4))\n            scan[:, :3] = self.calib.project_velo_to_ref(scan[:, :3])\n            return frame_idx, img_path, input_img, scan\n        else:\n            return frame_idx, img_path, input_img, -1\n\n    def __len__(self):\n        return len(self.files)\n\ndef is_image_file(file):\n    IMG_FILE_FORMATS = ['jpg', 'png', 'tif', 'bmp', 'jpeg']\n    if file.split('.')[-1] in IMG_FILE_FORMATS:\n        return True\n    else:\n        return False\n\nclass TripletDataset(Dataset):\n    \n    def __init__(self, feature_path, num_negative_samples = 100, cuda = True, sequence = False, test = False):\n        if test:\n            feature_file = os.path.join(feature_path, 'test_features.npy')\n        else:\n            feature_file = os.path.join(feature_path, 'features.npy')\n        \n        feature_array = np.load(feature_file)\n\n        # feature_array = feature_array[:500]\n        # feature_array = np.vstack([feature_array[146], feature_array[148], feature_array[149],feature_array[32], feature_array[10], feature_array[7],feature_array[9],feature_array[31], feature_array[8]])\n        # feature_array = np.vstack([feature_array[10], feature_array[11],feature_array[9],feature_array[8],feature_array[249],feature_array[247]])\n        # self.ids = feature_array[:, 0]\n        # if not test:\n        #     feature_array = feature_array[self.ids < 5]\n        self.ids = feature_array[:, 0].astype(np.float32).astype(np.int32)\n        self.unique_ids = np.unique(self.ids)\n        self.frames = feature_array[:, 2].astype(np.float32).astype(np.int32)\n        self.features = feature_array[:, 3:].astype(np.float32)\n        self.sequences = feature_array[:, 1].astype(np.float32).astype(np.int32)\n        self.sequence = sequence\n        if self.sequence:\n            self.size = self.unique_ids.size\n        else:\n            self.size = self.ids.size\n        self.num_negative_samples = num_negative_samples\n        self.tensor_type = torch.cuda.FloatTensor if cuda else torch.FloatTensor\n\n\n    def __getitem__(self, index):\n        \n        if self.sequence:\n            object_id = self.unique_ids[index]\n            positive_ids = self.ids == object_id\n            object_sequence = self.sequences[positive_ids][0]\n            object_frames = self.frames[positive_ids]\n\n            positive_sequence = self.features[positive_ids]\n            positive_sequence = torch.Tensor(positive_sequence).type(self.tensor_type)\n            negative_sequence = []\n            for frame in object_frames[1:]:\n                idx = np.logical_and(self.sequences==object_sequence, self.frames==frame)\n                idx = np.logical_and(idx, self.ids!=object_id)\n                if np.sum(idx)==0:\n                    negative_sequence.append(None)\n                else:\n                    negative_sequence.append(torch.Tensor(self.features[idx]).type(self.tensor_type))\n            negative_ids = np.random.choice(len(self.ids), size = self.num_negative_samples, replace = False)\n            negative_ids = negative_ids[self.ids[negative_ids] != object_id]\n            negative_features = self.features[negative_ids]\n            negative_features = torch.Tensor(negative_features).type(self.tensor_type)\n            \n            return positive_sequence, negative_sequence, negative_features\n        else:\n            object_id = self.ids[index]\n            anchor_feature = self.features[index]\n            anchor_feature = torch.Tensor(anchor_feature).type(self.tensor_type)\n\n            positive_ids = np.where(self.ids == object_id)[0]\n            positive_feature = self.features[random.choice(positive_ids)]\n            positive_feature = torch.Tensor(positive_feature).type(self.tensor_type)\n\n            negative_ids = np.random.choice(len(self.ids), size = self.num_negative_samples, replace = False)\n            negative_ids = negative_ids[self.ids[negative_ids] != object_id]\n            negative_features = self.features[negative_ids]\n            negative_features = torch.Tensor(negative_features).type(self.tensor_type)\n\n\n            return anchor_feature, positive_feature, negative_features\n\n\n    def __len__(self):\n        return self.size\n\nclass STIPDataset(Dataset):\n    def __init__(self, folder_path, img_size=416, point_cloud = False, pad = False):\n\n        self.files = sorted(glob.glob('%s/imgs/*/*.*' % folder_path), key = lambda x: int(os.path.splitext(os.path.basename(x))[0]))\n        self.files = [file for file in self.files if is_image_file(file)]\n        self.img_shape = (img_size, img_size)\n        self.pad = pad\n        self.seq_name = os.path.split(folder_path)[-1]\n\n    def __getitem__(self, index):\n\n\n        img_path = self.files[index % len(self.files)]\n\n        # Extract image\n        img = np.array(Image.open(img_path))\n        h, w, _ = img.shape\n        dim_diff = np.abs(h - w)\n        # Upper (left) and lower (right) padding\n        pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2\n        # Determine padding\n        pad = ((pad1, pad2), (0, 0), (0, 0)) if h <= w else ((0, 0), (pad1, pad2), (0, 0))\n        # Add padding\n        if self.pad:\n            img = np.pad(img, pad, 'constant', constant_values=127.5) / 255.\n            # Resize and normalize\n            img = resize(img, (*self.img_shape, 3), mode='reflect', anti_aliasing = True)\n        # Channels-first\n        input_img = np.transpose(img, (2, 0, 1))/255\n        # As pytorch tensor\n        input_img = torch.from_numpy(input_img).float()\n\n        return img_path, input_img, -1\n\n    def __len__(self):\n        return len(self.files)\n\ndef collate_fn(inputs):\n    #ASSUMES BATCH SIZE IS ALWAYS 1\n    return inputs[0]"
  },
  {
    "path": "paper_experiments/utils/deep_sort_utils.py",
    "content": "# vim: expandtab:ts=4:sw=4\nimport numpy as np\nimport cv2\n\n\ndef non_max_suppression(boxes, max_bbox_overlap, scores=None):\n    \"\"\"Suppress overlapping detections.\n\n    Original code from [1]_ has been adapted to include confidence score.\n\n    .. [1] http://www.pyimagesearch.com/2015/02/16/\n           faster-non-maximum-suppression-python/\n\n    Examples\n    --------\n\n        >>> boxes = [d.roi for d in detections]\n        >>> scores = [d.confidence for d in detections]\n        >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores)\n        >>> detections = [detections[i] for i in indices]\n\n    Parameters\n    ----------\n    boxes : ndarray\n        Array of ROIs (x, y, width, height).\n    max_bbox_overlap : float\n        ROIs that overlap more than this values are suppressed.\n    scores : Optional[array_like]\n        Detector confidence score.\n\n    Returns\n    -------\n    List[int]\n        Returns indices of detections that have survived non-maxima suppression.\n\n    \"\"\"\n    if len(boxes) == 0:\n        return []\n\n    boxes = boxes.astype(np.float)\n    pick = []\n\n    x1 = boxes[:, 0]\n    y1 = boxes[:, 1]\n    x2 = boxes[:, 2] + boxes[:, 0]\n    y2 = boxes[:, 3] + boxes[:, 1]\n\n    area = (x2 - x1 + 1) * (y2 - y1 + 1)\n    if scores is not None:\n        idxs = np.argsort(scores)\n    else:\n        idxs = np.argsort(y2)\n\n    while len(idxs) > 0:\n        last = len(idxs) - 1\n        i = idxs[last]\n        pick.append(i)\n\n        xx1 = np.maximum(x1[i], x1[idxs[:last]])\n        yy1 = np.maximum(y1[i], y1[idxs[:last]])\n        xx2 = np.minimum(x2[i], x2[idxs[:last]])\n        yy2 = np.minimum(y2[i], y2[idxs[:last]])\n\n        w = np.maximum(0, xx2 - xx1 + 1)\n        h = np.maximum(0, yy2 - yy1 + 1)\n\n        #overlap = (w * h) / (area[idxs[:last]]) # + area[idxs[last:last+1]] - w * h) #changed from deepsort to sum both areas\n        overlap = (w * h) / (area[idxs[:last]] + area[idxs[last:last+1]] - w * h) #changed from deepsort to sum both areas\n        threshold = np.where(y2[i]-y1[i] < 50, max_bbox_overlap-0.1, max_bbox_overlap)\n        idxs = np.delete(\n            idxs, np.concatenate(\n                ([last], np.where(overlap > threshold)[0])))\n\n    return pick\n"
  },
  {
    "path": "paper_experiments/utils/detection.py",
    "content": "# vim: expandtab:ts=4:sw=4\nimport numpy as np\n\n\nclass Detection(object):\n    \"\"\"\n    This class represents a bounding box detection in a single image.\n\n    Parameters\n    ----------\n    tlwh : array_like\n        Bounding box in format `(x, y, w, h)`.\n    confidence : float\n        Detector confidence score.\n    feature : array_like\n        A feature vector that describes the object contained in this image.\n\n    Attributes\n    ----------\n    tlwh : ndarray\n        Bounding box in format `(top left x, top left y, width, height)`.\n    confidence : ndarray\n        Detector confidence score.\n    feature : ndarray | NoneType\n        A feature vector that describes the object contained in this image.\n\n    \"\"\"\n\n    def __init__(self, tlwh, box_3d, confidence, appearance_feature, feature):\n        self.tlwh = np.asarray(tlwh, dtype=np.float)\n        # Note that detections format is centre of 3D box and dimensions (not bottom face)\n        self.box_3d = box_3d\n        if box_3d is not None:\n            self.box_3d[1] -= box_3d[4]/2\n            self.box_3d = np.asarray(box_3d, dtype=np.float32)\n        self.confidence = float(confidence)\n        self.appearance_feature = np.asarray(appearance_feature, dtype=np.float32)\n        if feature is not None:\n            self.feature = np.asarray(feature, dtype = np.float32)\n        else:\n            self.feature = None\n\n\n    def to_tlbr(self):\n        \"\"\"Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,\n        `(top left, bottom right)`.\n        \"\"\"\n        ret = self.tlwh.copy()\n        ret[2:] += ret[:2]\n        return ret\n\n    def to_xyah(self):\n        \"\"\"Convert bounding box to format `(center x, center y, aspect ratio,\n        height)`, where the aspect ratio is `width / height`.\n        \"\"\"\n        ret = self.tlwh.copy()\n        ret[:2] += ret[2:] / 2\n        ret[2] /= ret[3]\n        return ret\n    def to_xywh(self):\n        \"\"\"Convert bounding box to format `(center x, center y, aspect ratio,\n        height)`, where the aspect ratio is `width / height`.\n        \"\"\"\n        ret = self.tlwh.copy()\n        ret[:2] += ret[2:] / 2\n        return ret\n    def get_3d_distance(self):\n        if self.box_3d is not None:\n            return np.sqrt(self.box_3d[0]**2 + self.box_3d[2]**2)"
  },
  {
    "path": "paper_experiments/utils/double_measurement_kf.py",
    "content": "import random\nimport numpy as np\nimport scipy.linalg\nimport EKF\nimport pdb\nimport kf_2d\nimport os\nimport pickle\nimport torch\nfrom copy import deepcopy\nimport matplotlib.pyplot as plt\nfrom read_detections import read_ground_truth_3d_detections, read_ground_truth_2d_detections\nnp.set_printoptions(precision=4, suppress=True)\nfrom calibration import Calibration\nimport sys\nsys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))\nfrom evaluation.distances import iou_matrix\n\nclass KF_3D(kf_2d.KalmanFilter2D):\n    \"\"\"\n    3D Kalman Filter that tracks objets in 3D space\n\n        The 8-dimensional state space\n\n            x, y, z, l, h, w, theta, vx, vz\n\n        contains the bounding box center position (x, z), the heading angle theta, the\n        box dimensions l, w, h, and the x and z velocities.\n\n        Object motion follows a constant velocity model. The bounding box location\n        (x, y) is taken as direct observation of the state space (linear\n        observation model).\n    \"\"\"\n    def __init__(self, calib, pos_weight_3d, pos_weight, velocity_weight, theta_weight, \n                    std_process, std_measurement_2d, std_measurement_3d, \n                    initial_uncertainty, omni = True, debug=True):\n        self.ndim, self.dt = 9, 1.\n\n        # Create Kalman filter model matrices.\n        # Motion model is constant velocity, i.e. x = x + Vx*dt\n        self._motion_mat = np.eye(self.ndim, self.ndim)\n        self._motion_mat[0, 7] = self.dt\n        self._motion_mat[2, 8] = self.dt\n        # Sensor model is direct observation, i.e. x = x\n        self._observation_mat = np.eye(self.ndim - 2, self.ndim)\n        if omni:\n            self.x_constant = calib.img_shape[2]/(2*np.pi)\n            self.y_constant = calib.median_focal_length_y\n            self.calib = calib\n        else:\n            self.projection_matrix = calib.P\n\n        self.omni = omni\n        self._std_weight_pos_3d = pos_weight_3d\n        self._std_weight_pos = pos_weight\n        self._std_weight_vel = velocity_weight\n        self._std_weight_theta= theta_weight\n\n        self._std_weight_process = std_process\n        self._initial_uncertainty = initial_uncertainty\n        self._std_weight_measurement_2d = std_measurement_2d\n        self._std_weight_measurement_3d = std_measurement_3d\n        self.debug = debug\n\n    def initiate(self, measurement_3d):\n\n        mean_pos = measurement_3d\n        mean_vel = np.zeros((2,))\n        mean = np.r_[mean_pos, mean_vel]\n        std = [\n                self._std_weight_pos_3d * measurement_3d[0],\n                self._std_weight_pos_3d * measurement_3d[1],\n                self._std_weight_pos_3d * measurement_3d[2],\n                self._std_weight_pos_3d * measurement_3d[3],\n                self._std_weight_pos_3d * measurement_3d[4],\n                self._std_weight_pos_3d * measurement_3d[5],\n                self._std_weight_theta,\n                self._std_weight_vel,\n                self._std_weight_vel]\n        covariance = np.diag(np.square(std))*(self._initial_uncertainty*self._std_weight_process)**2\n        \n        return mean, covariance\n        \n    def get_process_noise(self, mean):\n        std_pos = [\n            self._std_weight_pos_3d, # x\n            self._std_weight_pos_3d, # y\n            self._std_weight_pos_3d, # z\n            self._std_weight_pos_3d, # l\n            self._std_weight_pos_3d, # h            \n            self._std_weight_pos_3d, # w\n            self._std_weight_theta # theta\n            ]\n        std_vel = [\n            self._std_weight_vel, # x\n            self._std_weight_vel, # z\n            ]\n        self._motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))\n        motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))*self._std_weight_process**2\n        return motion_cov\n    \n    def get_2d_measurement_noise(self, measurement_2d):\n        # Returns Qt the sensor noise covariance\n                \n        # Measurement uncertainty scaled by estimated height\n        std = [\n                self._std_weight_pos*measurement_2d[2],\n                self._std_weight_pos*measurement_2d[3],\n                self._std_weight_pos*measurement_2d[2],\n                self._std_weight_pos*measurement_2d[3]]\n        innovation_cov = np.diag(np.square(std))*self._std_weight_measurement_2d**2\n        return innovation_cov\n    \n    def get_3d_measurement_noise(self, measurement):\n        # Returns Qt the sensor noise covariance\n                \n        # Measurement uncertainty scaled by estimated height\n        std = [\n            self._std_weight_pos_3d * measurement[0], # x\n            self._std_weight_pos_3d * measurement[1], # y\n            self._std_weight_pos_3d * measurement[2], # z\n            self._std_weight_pos_3d * measurement[3], # l\n            self._std_weight_pos_3d * measurement[4], # h\n            self._std_weight_pos_3d * measurement[5], # w\n            self._std_weight_theta # theta\n            ]\n        innovation_cov = np.diag(np.square(std))*self._std_weight_measurement_3d**2\n        return innovation_cov\n    \n    def gating_distance(self, mean, covariance, measurements,\n                        only_position=False,\n                        use_3d=True):\n        \"\"\"Compute gating distance between state distribution and measurements.\n\n        A suitable distance threshold can be obtained from `chi2inv95`. If\n        `only_position` is False, the chi-square distribution has 4 degrees of\n        freedom, otherwise 2.\n\n        Parameters\n        ----------\n        mean : ndarray\n            Mean vector over the state distribution (8 dimensional).\n        covariance : ndarray\n            Covariance of the state distribution (8x8 dimensional).\n        measurements : ndarray\n            An Nx4 dimensional matrix of N measurements, each in\n            format (x, y, a, h) where (x, y) is the bounding box center\n            position, a the aspect ratio, and h the height.\n        only_position : Optional[bool]\n            If True, distance computation is done with respect to the bounding\n            box center position only.\n\n        Returns\n        -------\n        ndarray\n            Returns an array of length N, where the i-th element contains the\n            squared Mahalanobis distance between (mean, covariance) and\n            `measurements[i]`.\n\n        \"\"\"\n        if not use_3d:\n            corner_points, corner_points_3d = self.calculate_corners(mean)\n            H_2d = self.get_2d_measurement_matrix(mean, corner_points, corner_points_3d)\n            min_x, min_y = np.amin(corner_points, axis = 0)[:2]\n            max_x, max_y = np.amax(corner_points, axis = 0)[:2]\n            cov = self.project_cov_2d(mean, covariance, H_2d)\n            mean = np.array([min_x, min_y, max_x - min_x, max_y - min_y])\n        else:\n            mean, cov = mean[:7], covariance[:7, :7]\n        if only_position:\n            if use_3d:\n                mean, cov = mean[:3], cov[:3, :3]\n                measurements = measurements[:, :3]\n            else:\n                mean, cov = mean[:2], cov[:2, :2]\n                measurements = measurements[:, :2]\n        return EKF.squared_mahalanobis_distance(mean, cov, measurements)\n\n    def project_cov(self, mean, covariance):\n        # Returns S the innovation covariance (projected covariance)\n                \n        measurement_noise = self.get_3d_measurement_noise(mean)\n        innovation_cov = (np.linalg.multi_dot((self._observation_mat, covariance,\n                                          self._observation_mat.T))\n                     + measurement_noise)\n        return innovation_cov\n\n    def project_cov_2d(self, mean, covariance, H_2d):\n        # Returns S the innovation covariance (projected covariance)\n                \n        measurement_noise = self.get_2d_measurement_noise(mean)\n        innovation_cov = (np.linalg.multi_dot((H_2d, covariance,\n                                          H_2d.T))\n                     + measurement_noise)\n        return innovation_cov\n    # @profile\n    def update(self, mean, covariance, measurement_2d, measurement_3d = None, marginalization=None, JPDA=False):\n        \"\"\"Run Kalman filter correction step.\n\n        Parameters\n        ----------\n        mean : ndarray\n            The predicted state's mean vector (9 dimensional).\n        covariance : ndarray\n            The state's covariance matrix (9x9 dimensional).\n        measurement_2d : ndarray\n            The 4 dimensional measurement vector (x, y, w, h), where (x, y)\n            is the center position, a the aspect ratio, and h the height of the\n            bounding box.\n        measurement_3d : ndarray\n            The 7 dimensional measurement vector (x, y, z, l, h, w, theta), where (x, y, z)\n            is the center bottom of the box, l, q, h are the dimensions of the bounding box\n            theta is the orientation angle w.r.t. the positive x axis.\n        Returns\n        -------\n        (ndarray, ndarray)\n            Returns the measurement-corrected state distribution.\n\n        \"\"\"\n\n        if np.any(np.isnan(mean)):\n            return mean, covariance\n        out_cov = covariance\n        H_3d = self._observation_mat\n        do_3d = True\n        covariance_3d = None\n        for meas in measurement_3d:\n            if meas is None:\n                do_3d = False\n                break\n        if do_3d:\n            S_matrix = self.project_cov(mean, out_cov)\n            try:\n                chol_factor, lower = scipy.linalg.cho_factor(\n                    S_matrix, lower=True, check_finite=False)\n                kalman_gain = scipy.linalg.cho_solve(\n                    (chol_factor, lower), np.dot(out_cov, H_3d.T).T,\n                    check_finite=False).T\n            except:\n                # in case cholesky factorization fails, revert to standard solver\n                kalman_gain = np.linalg.multi_dot((out_cov, H_3d.T, np.linalg.inv(S_matrix)))\n            out_cov -= np.linalg.multi_dot((kalman_gain, S_matrix, kalman_gain.T))\n            if JPDA:\n                innovation_3d = 0\n                cov_uncertainty_3d = 0\n                for i, detection_3d in enumerate(measurement_3d):\n                    innovation_partial = detection_3d - mean[:7]\n                    innovation_3d += innovation_partial * marginalization[i+1]\n                    cov_uncertainty_3d += marginalization[i+1] * np.outer(innovation_partial, innovation_partial)\n                partial_cov = cov_uncertainty_3d-np.outer(innovation_3d, innovation_3d)\n                out_cov *= 1 - marginalization[0]\n                out_cov += np.linalg.multi_dot((kalman_gain, partial_cov, kalman_gain.T))\n                out_cov += marginalization[0]*covariance\n            else:\n                out_cov = out_cov - np.linalg.multi_dot((kalman_gain, H_3d, out_cov))\n                innovation_3d = measurement_3d - mean[:7]\n            mean = mean + np.dot(kalman_gain, innovation_3d)\n            post_3d_mean = mean\n            covariance_3d = out_cov\n\n        if measurement_2d is not None:\n            corner_points, corner_points_3d = self.calculate_corners(mean)\n            H_2d = self.get_2d_measurement_matrix(mean, corner_points, corner_points_3d)\n            #update based on 2D\n            min_x, min_y = np.amin(corner_points, axis = 0)[:2]\n            max_x, max_y = np.amax(corner_points, axis = 0)[:2]\n            S_matrix = self.project_cov_2d(np.array([min_x, min_y, max_x - min_x, max_y - min_y]), out_cov, H_2d)\n            try:\n                chol_factor, lower = scipy.linalg.cho_factor(\n                    S_matrix, lower=True, check_finite=False)\n                kalman_gain = scipy.linalg.cho_solve(\n                    (chol_factor, lower), np.dot(out_cov, H_2d.T).T,\n                    check_finite=False).T\n            except:\n                # in case cholesky factorization fails, revert to standard solver\n                kalman_gain = np.linalg.multi_dot((out_cov, H_2d.T, np.linalg.inv(S_matrix)))\n            out_cov = np.dot(np.eye(*out_cov.shape)-np.dot(kalman_gain, H_2d), out_cov)\n            if JPDA:\n                innovation_2d = 0\n                cov_uncertainty_2d = 0\n                for i, detection_2d in enumerate(measurement_2d):\n                    innovation_partial = detection_2d[:4] - np.array([min_x, min_y, max_x - min_x, max_y - min_y])\n                    innovation_2d += innovation_partial * marginalization[i+1] # +1 to account for dummy node\n                    cov_uncertainty_2d += marginalization[i+1] * np.outer(innovation_partial, innovation_partial)\n                partial_cov = cov_uncertainty_2d-np.outer(innovation_2d, innovation_2d)\n                out_cov *= 1 - marginalization[0]\n                out_cov += np.linalg.multi_dot((kalman_gain, partial_cov, kalman_gain.T))\n                if covariance_3d is None:\n                    out_cov += marginalization[0]*covariance\n                else:\n                    out_cov += marginalization[0]*covariance_3d                    \n            else:\n                innovation_2d = measurement_2d[:4] - np.array([min_x, min_y, max_x - min_x, max_y - min_y])\n            mean = mean + np.dot(kalman_gain, innovation_2d)\n        \n        if self.debug:\n            return mean, out_cov, post_3d_mean\n        return mean, out_cov\n\n    # @profile\n    def get_2d_measurement_matrix(self, mean, corner_points, corner_points_3d):\n\n        min_x = np.inf\n        min_x_idx = None\n        max_x = -np.inf\n        max_x_idx = None\n        min_y = np.inf\n        min_y_idx = None\n        max_y = -np.inf\n        max_y_idx = None\n        for idx, pt in enumerate(corner_points):\n            if pt[0] < min_x:\n                min_x_idx = idx\n                min_x = pt[0]\n            if pt[0] > max_x:\n                max_x_idx = idx\n                max_x = pt[0]\n            if pt[1] < min_y:\n                min_y_idx = idx\n                min_y = pt[1]\n            if pt[1] > max_y:\n                max_y_idx = idx\n                max_y = pt[1]\n        if self.omni:\n            jac_x = np.dot(self.jacobian_omni(corner_points_3d[min_x_idx])[0], self.corner_jacobian(mean, min_x_idx))\n            jac_y = np.dot(self.jacobian_omni(corner_points_3d[min_y_idx])[1], self.corner_jacobian(mean, min_y_idx))\n            jac_w = np.dot(self.jacobian_omni(corner_points_3d[max_x_idx])[0], self.corner_jacobian(mean, max_x_idx)) - jac_x\n            jac_h = np.dot(self.jacobian_omni(corner_points_3d[max_y_idx])[1], self.corner_jacobian(mean, max_y_idx)) - jac_y\n        else:\n            jac_x = np.dot(self.jacobian(corner_points_3d[min_x_idx])[0], self.corner_jacobian(mean, min_x_idx))\n            jac_y = np.dot(self.jacobian(corner_points_3d[min_y_idx])[1], self.corner_jacobian(mean, min_y_idx))\n            jac_w = np.dot(self.jacobian(corner_points_3d[max_x_idx])[0], self.corner_jacobian(mean, max_x_idx)) - jac_x\n            jac_h = np.dot(self.jacobian(corner_points_3d[max_y_idx])[1], self.corner_jacobian(mean, max_y_idx)) - jac_y\n        jac = np.vstack([jac_x, jac_y, jac_w, jac_h])\n        jac = np.hstack([jac, np.zeros((jac.shape[0], 2))])\n        return jac \n    # Jacobian for projective transformation\n    def jacobian(self, pt_3d):\n        den = np.sum(self.projection_matrix[2] * pt_3d)\n        dxy = (1 - self.projection_matrix[2] * pt_3d/den) * self.projection_matrix[0:2]/den\n\n        return dxy[:, :3]\n    \n    def jacobian_omni(self, pt_3d):\n        jac = np.zeros((2, 3))\n        x, y, z = pt_3d[0], pt_3d[1], pt_3d[2]\n        denominator = (x**2 + z**2)\n        jac[0, 0] = -self.x_constant*(2*x*(z**2)/denominator)\n        jac[0, 0] /= denominator\n        jac[0, 2] = self.x_constant*2*z/denominator\n        jac[0, 2] *= 1 - (z**2)/denominator\n\n        jac[1, 0] = self.y_constant*x*y/denominator\n        jac[1, 1] = -self.y_constant\n        jac[1,2] = self.y_constant*z*y/denominator\n        jac[1, :] /= np.sqrt(denominator)\n\n        return jac\n\n    def calculate_corners(self, box):\n        x,y,z,l,h,w,theta = box[:7]\n        pt_3d = []\n        x_delta_1 = np.cos(theta)*l/2+np.sin(theta)*w/2\n        x_delta_2 = np.cos(theta)*l/2 - np.sin(theta)*w/2\n        z_delta_1 = np.sin(theta)*l/2-np.cos(theta)*w/2\n        z_delta_2 = np.sin(theta)*l/2+np.cos(theta)*w/2\n        pt_3d.append((x+x_delta_1, y + h/2, z+z_delta_1, 1))\n        pt_3d.append((x+x_delta_2, y + h/2, z+z_delta_2, 1))\n        pt_3d.append((x-x_delta_2, y + h/2, z-z_delta_2, 1))\n        pt_3d.append((x-x_delta_1, y + h/2, z-z_delta_1, 1))\n        pt_3d.append((x+x_delta_1, y - h/2, z+z_delta_1, 1))\n        pt_3d.append((x+x_delta_2, y - h/2, z+z_delta_2, 1))\n        pt_3d.append((x-x_delta_2, y - h/2, z-z_delta_2, 1))\n        pt_3d.append((x-x_delta_1, y - h/2, z-z_delta_1, 1))\n        pts_3d = np.vstack(pt_3d)\n        pts_2d = self.project_2d(pts_3d)\n        return pts_2d, pts_3d\n    \n    def corner_jacobian(self, pt_3d, corner_idx):\n        _, _, _, l, _, w, theta = pt_3d[:7]\n        jac = np.eye(3,7)\n        \n        jac[1, 4] = 0.5 if corner_idx < 4 else -0.5\n\n        jac[0, 3] = 0.5*np.sin(theta) if corner_idx % 4 < 2 else -0.5*np.sin(theta)\n        jac[0, 5] = 0.5*np.cos(theta) if corner_idx % 2 == 0 else -0.5*np.cos(theta)\n        \n        jac[2, 3] = 0.5*np.cos(theta) if corner_idx%4 < 2 else -0.5*np.cos(theta)\n        jac[2, 5] = 0.5*np.sin(theta) if corner_idx%2 == 0 else -0.5*np.sin(theta)\n\n        if corner_idx%4 == 0:\n            jac[0, 6] = -np.sin(theta)*l/2 + np.cos(theta)*w/2\n            jac[2, 6] = np.cos(theta)*l/2 + np.sin(theta)*w/2\n        elif corner_idx%4==1:\n            jac[0, 6] = -np.sin(theta)*l/2 - np.cos(theta)*w/2\n            jac[2, 6] = np.cos(theta)*l/2 - np.sin(theta)*w/2\n        elif corner_idx%4==2:\n            jac[0, 6] = +np.sin(theta)*l/2 + np.cos(theta)*w/2\n            jac[2, 6] = -np.cos(theta)*l/2 + np.sin(theta)*w/2\n        else:\n            jac[0, 6] = +np.sin(theta)*l/2 - np.cos(theta)*w/2\n            jac[2, 6] = -np.cos(theta)*l/2 - np.sin(theta)*w/2\n\n        return jac\n\n    def project_2d(self, pts_3d):\n        if self.omni:\n            pts_2d = np.array(self.calib.project_ref_to_image_torch(torch.from_numpy(pts_3d)))\n        else:\n            pts_2d = np.dot(pts_3d, self.projection_matrix.T)\n            pts_2d /= np.expand_dims(pts_2d[:, 2], 1)\n        return pts_2d[:, :2]\n\n\ndef swap(detections_3d, iou, idx, swap_prob = 0):\n    if random.random() > swap_prob:\n        return detections_3d[idx]\n    else:\n        iou_row = iou[idx]\n        iou_row[idx] = -1\n        max_idx = np.argmax(iou_row)\n        if iou_row[max_idx] > 0.4:\n            # print(\"SWAP\")\n            return detections_3d[max_idx]\n        else:\n            return detections_3d[idx]\n\n\nif __name__ == '__main__':\n    seq = '0001'\n    gt_path = os.path.join('data','KITTI','sequences', seq, 'gt')\n    prob_3d_list = [0.6]\n    prob_2d_list = [0.9]\n    swap_prob = 0\n    std_3d = 0.2\n    std_2d = 5\n    boxes_3d, ids, frame_3d = read_ground_truth_3d_detections(os.path.join(gt_path, '3d_detections.txt'), None)\n    boxes_2d, object_ids, frame_2d = read_ground_truth_2d_detections(os.path.join(gt_path, 'gt.txt'), None, nms_threshold = 1)\n    boxes_2d[:,2] -= boxes_2d[:,0]\n    boxes_2d[:,3] -= boxes_2d[:,1]\n    boxes_3d[:,1] -= boxes_3d[:, 4]/2\n    calib = Calibration(os.path.join(os.path.dirname(gt_path), 'calib', seq+'.txt'))\n    pos_weight = 0.05\n    pos_weight_2d = 0.006\n    velocity_weight = 0.0007\n    theta_weight = 0.000300\n    std_process = 2\n    std_measurement_2d = 2.6\n    std_measurement_3d = 0.01\n    initial_uncertainty = 1\n    \n    kf = KF_3D(calib, pos_weight, pos_weight_2d, velocity_weight, theta_weight, \n                std_process, std_measurement_2d, std_measurement_3d, \n                initial_uncertainty, omni=False, debug=True)\n    final_errors = np.zeros((len(prob_2d_list), len(prob_3d_list)))\n    random.seed(14295)\n    np.random.seed(14295)\n    for idx_3d, prob_3d in enumerate(prob_3d_list):\n        for idx_2d, prob_2d in enumerate(prob_2d_list):\n            id_means = {idx:[] for idx in np.unique(ids)}\n            id_means_2d = {idx:[] for idx in np.unique(ids)}\n            id_preds = {idx:[] for idx in np.unique(ids)}\n            id_meas = {idx:[] for idx in np.unique(ids)}\n            id_errors = {idx:[] for idx in np.unique(ids)}\n            for frame in sorted(np.unique(frame_2d)):\n                frame_mask = frame_2d==frame\n                frame_boxes_2d = boxes_2d[frame_mask]\n                frame_boxes_3d = boxes_3d[frame_mask]\n                frame_ids = ids[frame_mask]\n                iou = 1-iou_matrix(frame_boxes_2d[:,:4], frame_boxes_2d[:,:4], max_iou=10) #output of function is 1 - IoU\n                for idx, object_id in enumerate(frame_ids):\n                    if frame_boxes_3d[idx][2] > 30:\n                        continue\n                    noise_2d = np.random.randn(*frame_boxes_2d[idx].shape)*std_2d\n                    noise_3d = np.random.randn(*frame_boxes_3d[idx].shape)*std_3d\n                    if len(id_means[object_id.item()]) == 0:\n                        mean, cov = kf.initiate(frame_boxes_2d[idx]+noise_2d, frame_boxes_3d[idx]+noise_3d)\n                        id_means[object_id.item()].append((mean, cov, frame))\n                        # id_preds[object_id.item()].append((mean, cov, frame))\n                        # id_meas[object_id.item()].append((frame_boxes_3d[idx], frame_boxes_2d[idx], frame))\n                        # id_errors[object_id.item()].append((np.sqrt(np.sum((mean[:3] - frame_boxes_3d[idx][:3])**2)), frame))\n                        continue\n                    mean, cov = kf.predict(id_means[object_id.item()][-1][0], id_means[object_id.item()][-1][1])\n                    id_preds[object_id.item()].append((mean, cov, frame))\n                    # if object_id.item()==3:\n                    #     print(\"3D box: \", frame_boxes_3d[idx])\n                    #     print(\"Old mean:\", id_means[object_id.item()][0])\n                    #     print(\"Predicted mean:\", mean)\n                        # pdb.set_trace()\n                    if random.random() < prob_2d:\n                        if random.random() < prob_3d:\n                            mean, cov, mean_2d = kf.update(mean, cov, frame_boxes_2d[idx]+noise_2d, swap(frame_boxes_3d, iou, idx, swap_prob)+noise_3d)\n                        else:\n                            mean, cov, mean_2d = kf.update(mean, cov, frame_boxes_2d[idx]+noise_2d, None)\n                    # if object_id.item()==12:\n                    #     print(\"Updated mean after 2D:\", mean_2d)\n                    #     print(\"Updated mean after 3D:\", mean)\n                    #     print(\"Error:\", np.sqrt(np.sum((mean[:3] - frame_boxes_3d[idx][:3])**2)))\n                    #     if np.sqrt(np.sum((mean[:3] - frame_boxes_3d[idx][:3])**2)) > 1:\n                    #         pdb.set_trace()\n                    id_means[object_id.item()].append((mean, cov, frame))\n                    id_means_2d[object_id.item()].append((mean_2d, frame))\n                    id_meas[object_id.item()].append((frame_boxes_3d[idx], frame_boxes_2d[idx], frame))\n                    id_errors[object_id.item()].append((np.sqrt(np.sum((mean[:3] - frame_boxes_3d[idx][:3])**2)), frame))\n            errors = [np.mean(error[0]) for idx, error in id_errors.items() if len(error) > 0]\n            final_errors[idx_2d, idx_3d] = np.mean(errors)\n            print(\"3D prob: %f %% & 2D prob: %f %% & swap prob: %f %%  RMSE: %f\"%(prob_3d*100, prob_2d*100, swap_prob*100, final_errors[idx_2d, idx_3d]))\n            # if :\n            with open('results/kf_mean_pickle.p', 'wb') as f:\n                pickle.dump([id_means, id_means_2d, id_meas, id_preds], f)\n\n    print(final_errors)"
  },
  {
    "path": "paper_experiments/utils/evaluate_detections.py",
    "content": "import numpy as np\nimport os\nimport pdb\nfrom tqdm import tqdm\nfrom deep_sort_utils import non_max_suppression as deepsort_nms\nfrom visualise import draw_track\nimport matplotlib.pyplot as plt\nfrom PIL import Image\n\n\ndef evaluate_detections(detection_path_1, detection_path_2, detection_path_3, detection_path_4, gt_path):\n\t#expecting detections and gt in file with format as in read_detections.py\n\t# applies confidence thresholding\n\ttry:\n\t\tdetections_1 = np.loadtxt(detection_path_1, delimiter=',')\n\t\t# detections_2 = np.loadtxt(detection_path_2, delimiter=',')\n\t\t# detections_3 = np.loadtxt(detection_path_3, delimiter=',')\n\t\t# detections_4 = np.loadtxt(detection_path_4, delimiter=',')\n\t\t# detections = np.concatenate([detections_1, detections_2, detections_3, detections_4])\n\t\tdetections = detections_1\n\t\tgt = np.loadtxt(gt_path, delimiter=',')\n\texcept:\n\t\treturn\n\tgt_frames = gt[:, 0]\n\tdet_confidence = detections[:, 6]\n\n\t###CONFIDENCE THRESHOLD\n\tdetections = detections[det_confidence > 0.9]\n\t########\n\n\tprint(\"Average number of detections per frame = %f\"%(detections.shape[0]/len(np.unique(gt_frames))))\n\n\tdet_frames = detections[:, 0]\n\tdet_confidence = detections[:, 6]\n\tgt_boxes = np.asarray(list(zip(gt[:, 2], gt[:, 3], gt[:, 4], gt[:, 5])))\n\tdet_boxes = np.asarray(list(zip(detections[:, 2], detections[:, 3], detections[:, 4], detections[:, 5])))\n\tassignments = []\n\tmissed_detections = 0\n\tfor frame in np.unique(gt_frames):\n\t\tframe_mask_det = det_frames == frame\n\t\tframe_mask_gt = gt_frames == frame\n\t\tframe_gt_boxes = gt_boxes[frame_mask_gt]\n\t\tframe_det_boxes = det_boxes[frame_mask_det]\n\t\tframe_confidence = det_confidence[frame_mask_det]\n\t\tx1 = np.expand_dims(detections[frame_mask_det,2].astype(np.float32), 1)\n\t\ty1 = np.expand_dims(detections[frame_mask_det,3].astype(np.float32), 1)\n\t\tw = np.expand_dims(detections[frame_mask_det,4].astype(np.float32), 1)\n\t\th = np.expand_dims(detections[frame_mask_det,5].astype(np.float32), 1)\n\t\tconf = np.expand_dims(detections[frame_mask_det,6].astype(np.float32), 1)\n\t\tboxes = np.hstack([x1, y1, w, h])\n\t\tindices = deepsort_nms(boxes, 0.75, np.squeeze(conf))\n\t\tframe_det_boxes = frame_det_boxes[indices]\n\n\t\t# print(frame_confidence)\n\t\tpositive_arr = np.asarray([False]*len(frame_det_boxes))\n\t\tfor i, gt_box in enumerate(frame_gt_boxes):\n\t\t\tiou_list = np.asarray([iou(gt_box, det_box) for det_box in frame_det_boxes])\n\t\t\tpositive_idx = np.where(iou_list >= 0.5)[0]\n\t\t\tif len(positive_idx) == 0:\n\t\t\t\tmissed_detections += 1\n\t\t\t\tplt.figure(0)\n\n\t\t\t\tplt.imshow(Image.open(os.path.join(os.path.split(detection_path_1)[0], '..','imgs','%.6d.png'%frame)))\n\t\t\t\tdraw_track(None, gt_box, det = False)\n\t\t\t\tfor det_box in frame_det_boxes:\n\t\t\t\t\tdraw_track(None, det_box, det = True)\n\t\t\t\t# \tprint(det_box)\n\t\t\t\t# print('Boxes:')\n\t\t\t\t# print(boxes)\n\t\t\t\t# print('FRAME DONE')\n\t\t\t\tplt.show()\n\n\n\t\t\tpositive_arr[positive_idx] = True\n\t\tassignments.extend(list(zip(positive_arr, frame_confidence)))\n\tassignments = sorted(assignments, key = lambda x: x[1], reverse = True)\n\tpredictions = list(zip(*assignments))[0]\n\ttrue_positives = np.cumsum(predictions)\n\tfalse_negatives = np.cumsum(predictions[::-1])[::-1]+missed_detections\n\tprecision = true_positives/range(1,len(true_positives)+1)\n\trecall = true_positives/(true_positives + false_negatives)\n\tprint(\"Total missed detections = %d\"%missed_detections)\n\tbase = 0\n\tidx = []\n\tfor i,recall_val in enumerate(recall):\n\t\tif recall_val > base:\n\t\t\tbase += 0.1\n\t\t\tidx.append(i)\n\t\tif base >1:\n\t\t\tbreak\n\tprecision_vals = [np.amax(precision[index:]) for index in idx]\n\tif len(precision_vals) < 11:\n\t\tprecision_vals.extend([0]*(11-len(precision_vals)))\n\tprint(precision_vals)\n\n\treturn np.mean(precision_vals)\n\n\ndef iou(bbox_1, bbox_2):\n\n\tx1_1, y1_1, w_1, h_1 = bbox_1\n\tx1_2, y1_2, w_2, h_2 = bbox_2\n\tx2_1 = x1_1 + w_1\n\ty2_1 = y1_1 + h_1\n\n\tx2_2 = x1_2 + w_2\n\ty2_2 = y1_2 + h_2\n\t\n\tarea_1 = abs(x2_1 - x1_1)*abs(y2_1-y1_1)\n\tarea_2 = abs(x2_2 - x1_2)*abs(y2_2-y1_2)\n\n\n\tintersection = max(0, (min(x2_1, x2_2) - max(x1_1, x1_2))) * max(0, (min(y2_1, y2_2) - max(y1_1, y1_2)))\n\tunion = area_1 + area_2 - intersection\n\n\treturn intersection / union\n\nif __name__=='__main__':\n\tap = []\n\tKITTI_root = 'data/KITTI/sequences'\n\tfor sequence in tqdm(range(21)):\n\t\tap.append(evaluate_detections(os.path.join(KITTI_root, '%.4d'%sequence, 'det','subcnn_car_det.txt'), \n\t\t\t\t\t\t\t\t\t\tos.path.join(KITTI_root, '%.4d'%sequence, 'det','rrc_car_det.txt'),\n\t\t\t\t\t\t\t\t\t\tos.path.join(KITTI_root, '%.4d'%sequence, 'det','lsvm_car_det.txt'),\n\t\t\t\t\t\t\t\t\t\tos.path.join(KITTI_root, '%.4d'%sequence, 'det','regionlets_car_det.txt'), \n\t\t\t\t\t\t\t\t\t\tos.path.join(KITTI_root, '%.4d'%sequence, 'gt', 'gt_car.txt')))\n\tap = [ap_val for ap_val in ap if ap_val is not None]\n\tprint(\"FINAL AVERAGE PRECISION OVER ALL SEQUENCES IS: %f\"%np.mean(ap))\n\n"
  },
  {
    "path": "paper_experiments/utils/featurepointnet_model_util.py",
    "content": "import open3d as o3d\nimport numpy as np\nimport tensorflow as tf\nimport os\nimport sys\nimport torch\nBASE_DIR = os.path.dirname(os.path.abspath(__file__))\nsys.path.append(BASE_DIR)\nimport featurepointnet_tf_util as tf_util\n\n# -----------------\n# Global Constants\n# -----------------\n\nNUM_HEADING_BIN = 12\nNUM_SIZE_CLUSTER = 8 # one cluster for each type\nNUM_OBJECT_POINT = 512\ng_type2class={'Car':0, 'Van':1, 'Truck':2, 'Pedestrian':3,\n              'Person_sitting':4, 'Cyclist':5, 'Tram':6, 'Misc':7}\ng_class2type = {g_type2class[t]:t for t in g_type2class}\ng_type2onehotclass = {'Car': 0, 'Pedestrian': 1, 'Cyclist': 2}\n#Added 0.5m and 0.2m for car and pedestrian to make boxes slightly bigger\ng_type_mean_size = {'Car': np.array([3.88311640418,1.62856739989,1.52563191462]),\n                    'Van': np.array([5.06763659,1.9007158,2.20532825]),\n                    'Truck': np.array([10.13586957,2.58549199,3.2520595]),\n                    'Pedestrian': np.array([0.84422524,0.66068622,1.76255119]),\n                    'Person_sitting': np.array([0.80057803,0.5983815,1.27450867]),\n                    'Cyclist': np.array([1.76282397,0.59706367,1.73698127]),\n                    'Tram': np.array([16.17150617,2.53246914,3.53079012]),\n                    'Misc': np.array([3.64300781,1.54298177,1.92320313])}\ng_mean_size_arr = np.zeros((NUM_SIZE_CLUSTER, 3)) # size clustrs\nfor i in range(NUM_SIZE_CLUSTER):\n    g_mean_size_arr[i,:] = g_type_mean_size[g_class2type[i]]\n\n# -----------------\n# TF Functions Helpers\n# -----------------\n\ndef tf_gather_object_pc(point_cloud, mask, npoints=512):\n    ''' Gather object point clouds according to predicted masks.\n    Input:\n        point_cloud: TF tensor in shape (B,N,C)\n        mask: TF tensor in shape (B,N) of 0 (not pick) or 1 (pick)\n        npoints: int scalar, maximum number of points to keep (default: 512)\n    Output:\n        object_pc: TF tensor in shape (B,npoint,C)\n        indices: TF int tensor in shape (B,npoint,2)\n    '''\n    def mask_to_indices(mask):\n        indices = np.zeros((mask.shape[0], npoints, 2), dtype=np.int32)\n        for i in range(mask.shape[0]):\n            pos_indices = np.where(mask[i,:]>0.5)[0]\n            # skip cases when pos_indices is empty\n            if len(pos_indices) > 0: \n                if len(pos_indices) > npoints:\n                    choice = np.random.choice(len(pos_indices),\n                        npoints, replace=False)\n                else:\n                    choice = np.random.choice(len(pos_indices),\n                        npoints-len(pos_indices), replace=True)\n                    choice = np.concatenate((np.arange(len(pos_indices)), choice))\n                np.random.shuffle(choice)\n                indices[i,:,1] = pos_indices[choice]\n            indices[i,:,0] = i\n        return indices\n\n    indices = tf.py_func(mask_to_indices, [mask], tf.int32)  \n    object_pc = tf.gather_nd(point_cloud, indices)\n    return object_pc, indices\n\n\ndef get_box3d_corners_helper(centers, headings, sizes):\n    \"\"\" TF layer. Input: (N,3), (N,), (N,3), Output: (N,8,3) \"\"\"\n    #print '-----', centers\n    N = centers.get_shape()[0].value\n    l = tf.slice(sizes, [0,0], [-1,1]) # (N,1)\n    w = tf.slice(sizes, [0,1], [-1,1]) # (N,1)\n    h = tf.slice(sizes, [0,2], [-1,1]) # (N,1)\n    #print l,w,h\n    x_corners = tf.concat([l/2,l/2,-l/2,-l/2,l/2,l/2,-l/2,-l/2], axis=1) # (N,8)\n    y_corners = tf.concat([h/2,h/2,h/2,h/2,-h/2,-h/2,-h/2,-h/2], axis=1) # (N,8)\n    z_corners = tf.concat([w/2,-w/2,-w/2,w/2,w/2,-w/2,-w/2,w/2], axis=1) # (N,8)\n    corners = tf.concat([tf.expand_dims(x_corners,1), tf.expand_dims(y_corners,1), tf.expand_dims(z_corners,1)], axis=1) # (N,3,8)\n    #print x_corners, y_corners, z_corners\n    c = tf.cos(headings)\n    s = tf.sin(headings)\n    ones = tf.ones([N], dtype=tf.float32)\n    zeros = tf.zeros([N], dtype=tf.float32)\n    row1 = tf.stack([c,zeros,s], axis=1) # (N,3)\n    row2 = tf.stack([zeros,ones,zeros], axis=1)\n    row3 = tf.stack([-s,zeros,c], axis=1)\n    R = tf.concat([tf.expand_dims(row1,1), tf.expand_dims(row2,1), tf.expand_dims(row3,1)], axis=1) # (N,3,3)\n    #print row1, row2, row3, R, N\n    corners_3d = tf.matmul(R, corners) # (N,3,8)\n    corners_3d += tf.tile(tf.expand_dims(centers,2), [1,1,8]) # (N,3,8)\n    corners_3d = tf.transpose(corners_3d, perm=[0,2,1]) # (N,8,3)\n    return corners_3d\n\ndef get_box3d_corners(center, heading_residuals, size_residuals):\n    \"\"\" TF layer.\n    Inputs:\n        center: (B,3)\n        heading_residuals: (B,NH)\n        size_residuals: (B,NS,3)\n    Outputs:\n        box3d_corners: (B,NH,NS,8,3) tensor\n    \"\"\"\n    batch_size = center.get_shape()[0].value\n    heading_bin_centers = tf.constant(np.arange(0,2*np.pi,2*np.pi/NUM_HEADING_BIN), dtype=tf.float32) # (NH,)\n    headings = heading_residuals + tf.expand_dims(heading_bin_centers, 0) # (B,NH)\n    \n    mean_sizes = tf.expand_dims(tf.constant(g_mean_size_arr, dtype=tf.float32), 0) + size_residuals # (B,NS,1)\n    sizes = mean_sizes + size_residuals # (B,NS,3)\n    sizes = tf.tile(tf.expand_dims(sizes,1), [1,NUM_HEADING_BIN,1,1]) # (B,NH,NS,3)\n    headings = tf.tile(tf.expand_dims(headings,-1), [1,1,NUM_SIZE_CLUSTER]) # (B,NH,NS)\n    centers = tf.tile(tf.expand_dims(tf.expand_dims(center,1),1), [1,NUM_HEADING_BIN, NUM_SIZE_CLUSTER,1]) # (B,NH,NS,3)\n\n    N = batch_size*NUM_HEADING_BIN*NUM_SIZE_CLUSTER\n    corners_3d = get_box3d_corners_helper(tf.reshape(centers, [N,3]), tf.reshape(headings, [N]), tf.reshape(sizes, [N,3]))\n\n    return tf.reshape(corners_3d, [batch_size, NUM_HEADING_BIN, NUM_SIZE_CLUSTER, 8, 3])\n\n\ndef huber_loss(error, delta):\n    abs_error = tf.abs(error)\n    quadratic = tf.minimum(abs_error, delta)\n    linear = (abs_error - quadratic)\n    losses = 0.5 * quadratic**2 + delta * linear\n    return tf.reduce_mean(losses)\n\n\ndef parse_output_to_tensors(output, end_points):\n    ''' Parse batch output to separate tensors (added to end_points)\n    Input:\n        output: TF tensor in shape (B,3+2*NUM_HEADING_BIN+4*NUM_SIZE_CLUSTER)\n        end_points: dict\n    Output:\n        end_points: dict (updated)\n    '''\n    batch_size = output.get_shape()[0].value\n    center = tf.slice(output, [0,0], [-1,3])\n    end_points['center_boxnet'] = center\n\n    heading_scores = tf.slice(output, [0,3], [-1,NUM_HEADING_BIN])\n    heading_residuals_normalized = tf.slice(output, [0,3+NUM_HEADING_BIN],\n        [-1,NUM_HEADING_BIN])\n    end_points['heading_scores'] = heading_scores # BxNUM_HEADING_BIN\n    end_points['heading_residuals_normalized'] = \\\n        heading_residuals_normalized # BxNUM_HEADING_BIN (-1 to 1)\n    end_points['heading_residuals'] = \\\n        heading_residuals_normalized * (np.pi/NUM_HEADING_BIN) # BxNUM_HEADING_BIN\n    \n    size_scores = tf.slice(output, [0,3+NUM_HEADING_BIN*2],\n        [-1,NUM_SIZE_CLUSTER]) # BxNUM_SIZE_CLUSTER\n    size_residuals_normalized = tf.slice(output,\n        [0,3+NUM_HEADING_BIN*2+NUM_SIZE_CLUSTER], [-1,NUM_SIZE_CLUSTER*3])\n    size_residuals_normalized = tf.reshape(size_residuals_normalized,\n        [batch_size, NUM_SIZE_CLUSTER, 3]) # BxNUM_SIZE_CLUSTERx3\n    end_points['size_scores'] = size_scores\n    end_points['size_residuals_normalized'] = size_residuals_normalized\n    end_points['size_residuals'] = size_residuals_normalized * \\\n        tf.expand_dims(tf.constant(g_mean_size_arr, dtype=tf.float32), 0)\n\n    return end_points\n\n# -----------------\n# Box Parsing Helpers\n# -----------------\n\ndef from_prediction_to_label_format(center, angle_class, angle_res,\\\n                                    size_class, size_res, rot_angle):\n    ''' Convert predicted box parameters to label format. '''\n    l,w,h = class2size(size_class, size_res)\n    ry = class2angle(angle_class, angle_res, NUM_HEADING_BIN) + rot_angle\n    tx,ty,tz = rotate_pc_along_y(np.expand_dims(center,0),-rot_angle).squeeze()\n    ty += h/2.0\n    return tx,ty,tz,l,w,h,ry\n\ndef size2class(size, type_name):\n    ''' Convert 3D bounding box size to template class and residuals.\n    todo (rqi): support multiple size clusters per type.\n \n    Input:\n        size: numpy array of shape (3,) for (l,w,h)\n        type_name: string\n    Output:\n        size_class: int scalar\n        size_residual: numpy array of shape (3,)\n    '''\n    size_class = g_type2class[type_name]\n    size_residual = size - g_type_mean_size[type_name]\n    return size_class, size_residual\n\ndef class2size(pred_cls, residual):\n    ''' Inverse function to size2class. '''\n    mean_size = g_type_mean_size[g_class2type[pred_cls]]\n    return mean_size + residual\n\ndef angle2class(angle, num_class):\n    ''' Convert continuous angle to discrete class and residual.\n    Input:\n        angle: rad scalar, from 0-2pi (or -pi~pi), class center at\n            0, 1*(2pi/N), 2*(2pi/N) ...  (N-1)*(2pi/N)\n        num_class: int scalar, number of classes N\n    Output:\n        class_id, int, among 0,1,...,N-1\n        residual_angle: float, a number such that\n            class*(2pi/N) + residual_angle = angle\n    '''\n    angle = angle%(2*np.pi)\n    assert(angle>=0 and angle<=2*np.pi)\n    angle_per_class = 2*np.pi/float(num_class)\n    shifted_angle = (angle+angle_per_class/2)%(2*np.pi)\n    class_id = int(shifted_angle/angle_per_class)\n    residual_angle = shifted_angle - \\\n        (class_id * angle_per_class + angle_per_class/2)\n    return class_id, residual_angle\n\ndef class2angle(pred_cls, residual, num_class, to_label_format=True):\n    ''' Inverse function to angle2class.\n    If to_label_format, adjust angle to the range as in labels.\n    '''\n    angle_per_class = 2*np.pi/float(num_class)\n    angle_center = pred_cls * angle_per_class\n    angle = angle_center + residual\n    if to_label_format and angle>np.pi:\n        angle = angle - 2*np.pi\n    return angle\n\ndef rotate_pc_along_y(pc, rot_angle):\n    '''\n    Input:\n        pc: numpy array (N,C), first 3 channels are XYZ\n            z is facing forward, x is left ward, y is downward\n        rot_angle: rad scalar\n    Output:\n        pc: updated pc with XYZ rotated\n    '''\n    cosval = np.cos(rot_angle)\n    sinval = np.sin(rot_angle)\n    rotmat = np.array([[cosval, -sinval],[sinval, cosval]])\n    pc[:,[0,2]] = np.dot(pc[:,[0,2]], np.transpose(rotmat))\n    return pc\n\n# --------------------------------------\n# Shared subgraphs for v1 and v2 models\n# --------------------------------------\n\ndef placeholder_inputs(batch_size, num_point):\n    ''' Get useful placeholder tensors.\n    Input:\n        batch_size: scalar int\n        num_point: scalar int\n    Output:\n        TF placeholders for inputs and ground truths\n    '''\n    pointclouds_pl = tf.placeholder(tf.float32,\n        shape=(batch_size, num_point, 4))\n    one_hot_vec_pl = tf.placeholder(tf.float32, shape=(batch_size, 3))\n\n    # labels_pl is for segmentation label\n    labels_pl = tf.placeholder(tf.int32, shape=(batch_size, num_point))\n    centers_pl = tf.placeholder(tf.float32, shape=(batch_size, 3))\n    heading_class_label_pl = tf.placeholder(tf.int32, shape=(batch_size,))\n    heading_residual_label_pl = tf.placeholder(tf.float32, shape=(batch_size,))\n    size_class_label_pl = tf.placeholder(tf.int32, shape=(batch_size,))\n    size_residual_label_pl = tf.placeholder(tf.float32, shape=(batch_size,3))\n\n    return pointclouds_pl, one_hot_vec_pl, labels_pl, centers_pl, \\\n        heading_class_label_pl, heading_residual_label_pl, \\\n        size_class_label_pl, size_residual_label_pl\n\n\ndef point_cloud_masking(point_cloud, logits, end_points, xyz_only=True):\n    ''' Select point cloud with predicted 3D mask,\n    translate coordinates to the masked points centroid.\n    \n    Input:\n        point_cloud: TF tensor in shape (B,N,C)\n        logits: TF tensor in shape (B,N,2)\n        end_points: dict\n        xyz_only: boolean, if True only return XYZ channels\n    Output:\n        object_point_cloud: TF tensor in shape (B,M,3)\n            for simplicity we only keep XYZ here\n            M = NUM_OBJECT_POINT as a hyper-parameter\n        mask_xyz_mean: TF tensor in shape (B,3)\n    '''\n    batch_size = point_cloud.get_shape()[0].value\n    num_point = point_cloud.get_shape()[1].value\n    mask = tf.slice(logits,[0,0,0],[-1,-1,1]) < \\\n        tf.slice(logits,[0,0,1],[-1,-1,1])\n    mask = tf.to_float(mask) # BxNx1\n    mask_count = tf.tile(tf.reduce_sum(mask,axis=1,keep_dims=True),\n        [1,1,3]) # Bx1x3\n    point_cloud_xyz = tf.slice(point_cloud, [0,0,0], [-1,-1,3]) # BxNx3\n    mask_xyz_mean = tf.reduce_sum(tf.tile(mask, [1,1,3])*point_cloud_xyz,\n        axis=1, keep_dims=True) # Bx1x3\n    mask = tf.squeeze(mask, axis=[2]) # BxN\n    end_points['mask'] = mask\n    mask_xyz_mean = mask_xyz_mean/tf.maximum(mask_count,1) # Bx1x3\n\n    # Translate to masked points' centroid\n    point_cloud_xyz_stage1 = point_cloud_xyz - \\\n        tf.tile(mask_xyz_mean, [1,num_point,1])\n\n    if xyz_only:\n        point_cloud_stage1 = point_cloud_xyz_stage1\n    else:\n        point_cloud_features = tf.slice(point_cloud, [0,0,3], [-1,-1,-1])\n        point_cloud_stage1 = tf.concat(\\\n            [point_cloud_xyz_stage1, point_cloud_features], axis=-1)\n    num_channels = point_cloud_stage1.get_shape()[2].value\n\n    object_point_cloud, _ = tf_gather_object_pc(point_cloud_stage1,\n        mask, NUM_OBJECT_POINT)\n    object_point_cloud.set_shape([batch_size, NUM_OBJECT_POINT, num_channels])\n\n    return object_point_cloud, tf.squeeze(mask_xyz_mean, axis=1), end_points\n\n\ndef get_center_regression_net(object_point_cloud, one_hot_vec,\n                              is_training, bn_decay, end_points):\n    ''' Regression network for center delta. a.k.a. T-Net.\n    Input:\n        object_point_cloud: TF tensor in shape (B,M,C)\n            point clouds in 3D mask coordinate\n        one_hot_vec: TF tensor in shape (B,3)\n            length-3 vectors indicating predicted object type\n    Output:\n        predicted_center: TF tensor in shape (B,3)\n    ''' \n    num_point = object_point_cloud.get_shape()[1].value\n    net = tf.expand_dims(object_point_cloud, 2)\n    net = tf_util.conv2d(net, 128, [1,1],\n                         padding='VALID', stride=[1,1],\n                         bn=True, is_training=is_training,\n                         scope='conv-reg1-stage1', bn_decay=bn_decay)\n    net = tf_util.conv2d(net, 128, [1,1],\n                         padding='VALID', stride=[1,1],\n                         bn=True, is_training=is_training,\n                         scope='conv-reg2-stage1', bn_decay=bn_decay)\n    net = tf_util.conv2d(net, 256, [1,1],\n                         padding='VALID', stride=[1,1],\n                         bn=True, is_training=is_training,\n                         scope='conv-reg3-stage1', bn_decay=bn_decay)\n    net = tf_util.max_pool2d(net, [num_point,1],\n        padding='VALID', scope='maxpool-stage1')\n    net = tf.squeeze(net, axis=[1,2])\n    net = tf.concat([net, one_hot_vec], axis=1)\n    net = tf_util.fully_connected(net, 256, scope='fc1-stage1', bn=True,\n        is_training=is_training, bn_decay=bn_decay)\n    net = tf_util.fully_connected(net, 128, scope='fc2-stage1', bn=True,\n        is_training=is_training, bn_decay=bn_decay)\n    predicted_center = tf_util.fully_connected(net, 3, activation_fn=None,\n        scope='fc3-stage1')\n    return predicted_center, end_points\n\n\ndef get_loss(mask_label, center_label, \\\n             heading_class_label, heading_residual_label, \\\n             size_class_label, size_residual_label, \\\n             end_points, \\\n             corner_loss_weight=10.0, \\\n             box_loss_weight=1.0):\n    ''' Loss functions for 3D object detection.\n    Input:\n        mask_label: TF int32 tensor in shape (B,N)\n        center_label: TF tensor in shape (B,3)\n        heading_class_label: TF int32 tensor in shape (B,) \n        heading_residual_label: TF tensor in shape (B,) \n        size_class_label: TF tensor int32 in shape (B,)\n        size_residual_label: TF tensor tensor in shape (B,)\n        end_points: dict, outputs from our model\n        corner_loss_weight: float scalar\n        box_loss_weight: float scalar\n    Output:\n        total_loss: TF scalar tensor\n            the total_loss is also added to the losses collection\n    '''\n    # 3D Segmentation loss\n    mask_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(\\\n        logits=end_points['mask_logits'], labels=mask_label))\n    tf.summary.scalar('3d mask loss', mask_loss)\n\n    # Center regression losses\n    center_dist = tf.norm(center_label - end_points['center'], axis=-1)\n    center_loss = huber_loss(center_dist, delta=2.0)\n    tf.summary.scalar('center loss', center_loss)\n    stage1_center_dist = tf.norm(center_label - \\\n        end_points['stage1_center'], axis=-1)\n    stage1_center_loss = huber_loss(stage1_center_dist, delta=1.0)\n    tf.summary.scalar('stage1 center loss', stage1_center_loss)\n\n    # Heading loss\n    heading_class_loss = tf.reduce_mean( \\\n        tf.nn.sparse_softmax_cross_entropy_with_logits( \\\n        logits=end_points['heading_scores'], labels=heading_class_label))\n    tf.summary.scalar('heading class loss', heading_class_loss)\n\n    hcls_onehot = tf.one_hot(heading_class_label,\n        depth=NUM_HEADING_BIN,\n        on_value=1, off_value=0, axis=-1) # BxNUM_HEADING_BIN\n    heading_residual_normalized_label = \\\n        heading_residual_label / (np.pi/NUM_HEADING_BIN)\n    heading_residual_normalized_loss = huber_loss(tf.reduce_sum( \\\n        end_points['heading_residuals_normalized']*tf.to_float(hcls_onehot), axis=1) - \\\n        heading_residual_normalized_label, delta=1.0)\n    tf.summary.scalar('heading residual normalized loss',\n        heading_residual_normalized_loss)\n\n    # Size loss\n    size_class_loss = tf.reduce_mean( \\\n        tf.nn.sparse_softmax_cross_entropy_with_logits( \\\n        logits=end_points['size_scores'], labels=size_class_label))\n    tf.summary.scalar('size class loss', size_class_loss)\n\n    scls_onehot = tf.one_hot(size_class_label,\n        depth=NUM_SIZE_CLUSTER,\n        on_value=1, off_value=0, axis=-1) # BxNUM_SIZE_CLUSTER\n    scls_onehot_tiled = tf.tile(tf.expand_dims( \\\n        tf.to_float(scls_onehot), -1), [1,1,3]) # BxNUM_SIZE_CLUSTERx3\n    predicted_size_residual_normalized = tf.reduce_sum( \\\n        end_points['size_residuals_normalized']*scls_onehot_tiled, axis=[1]) # Bx3\n\n    mean_size_arr_expand = tf.expand_dims( \\\n        tf.constant(g_mean_size_arr, dtype=tf.float32),0) # 1xNUM_SIZE_CLUSTERx3\n    mean_size_label = tf.reduce_sum( \\\n        scls_onehot_tiled * mean_size_arr_expand, axis=[1]) # Bx3\n    size_residual_label_normalized = size_residual_label / mean_size_label\n    size_normalized_dist = tf.norm( \\\n        size_residual_label_normalized - predicted_size_residual_normalized,\n        axis=-1)\n    size_residual_normalized_loss = huber_loss(size_normalized_dist, delta=1.0)\n    tf.summary.scalar('size residual normalized loss',\n        size_residual_normalized_loss)\n\n    # Corner loss\n    # We select the predicted corners corresponding to the \n    # GT heading bin and size cluster.\n    corners_3d = get_box3d_corners(end_points['center'],\n        end_points['heading_residuals'],\n        end_points['size_residuals']) # (B,NH,NS,8,3)\n    gt_mask = tf.tile(tf.expand_dims(hcls_onehot, 2), [1,1,NUM_SIZE_CLUSTER]) * \\\n        tf.tile(tf.expand_dims(scls_onehot,1), [1,NUM_HEADING_BIN,1]) # (B,NH,NS)\n    corners_3d_pred = tf.reduce_sum( \\\n        tf.to_float(tf.expand_dims(tf.expand_dims(gt_mask,-1),-1)) * corners_3d,\n        axis=[1,2]) # (B,8,3)\n\n    heading_bin_centers = tf.constant( \\\n        np.arange(0,2*np.pi,2*np.pi/NUM_HEADING_BIN), dtype=tf.float32) # (NH,)\n    heading_label = tf.expand_dims(heading_residual_label,1) + \\\n        tf.expand_dims(heading_bin_centers, 0) # (B,NH)\n    heading_label = tf.reduce_sum(tf.to_float(hcls_onehot)*heading_label, 1)\n    mean_sizes = tf.expand_dims( \\\n        tf.constant(g_mean_size_arr, dtype=tf.float32), 0) # (1,NS,3)\n    size_label = mean_sizes + \\\n        tf.expand_dims(size_residual_label, 1) # (1,NS,3) + (B,1,3) = (B,NS,3)\n    size_label = tf.reduce_sum( \\\n        tf.expand_dims(tf.to_float(scls_onehot),-1)*size_label, axis=[1]) # (B,3)\n    corners_3d_gt = get_box3d_corners_helper( \\\n        center_label, heading_label, size_label) # (B,8,3)\n    corners_3d_gt_flip = get_box3d_corners_helper( \\\n        center_label, heading_label+np.pi, size_label) # (B,8,3)\n\n    corners_dist = tf.minimum(tf.norm(corners_3d_pred - corners_3d_gt, axis=-1),\n        tf.norm(corners_3d_pred - corners_3d_gt_flip, axis=-1))\n    corners_loss = huber_loss(corners_dist, delta=1.0) \n    tf.summary.scalar('corners loss', corners_loss)\n\n    # Weighted sum of all losses\n    total_loss = mask_loss + box_loss_weight * (center_loss + \\\n        heading_class_loss + size_class_loss + \\\n        heading_residual_normalized_loss*20 + \\\n        size_residual_normalized_loss*20 + \\\n        stage1_center_loss + \\\n        corner_loss_weight*corners_loss)\n    tf.add_to_collection('losses', total_loss)\n\n    return total_loss\n\ndef get_lidar_in_image_fov(pc_velo, calib, xmin, ymin, xmax, ymax,\n                           clip_distance=40.0):\n    ''' Filter lidar points, keep those in image FOV '''\n    #pts_2d = calib.project_rect_to_image(calib.project_ref_to_rect(pc_velo))\n    #pts_2d = calib.project_rect_to_image_torch(calib.project_ref_to_rect_torch(torch.from_numpy(pc_velo).cuda()))\n    pts_2d = calib.project_ref_to_image_torch(torch.from_numpy(pc_velo).cuda())\n    pts_2d = pts_2d.cpu().numpy()\n\n    fov_inds = (pts_2d[:,0]<xmax) & (pts_2d[:,0]>=xmin) & \\\n        (pts_2d[:,1]<ymax) & (pts_2d[:,1]>=ymin)\n    # fov_inds = fov_inds & (pc_velo[:,2]<clip_distance) #filter out far z pts\n    imgfov_pc_velo = pc_velo[fov_inds,:]\n    return imgfov_pc_velo, pts_2d, fov_inds\n\n# @profile\ndef preprocess_pointcloud(detections, point_cloud, pc_image_coord,\n                            calib, num_point = 1024, \n                            lidar_point_threshold=5):\n    ''' Extract point clouds in frustums extruded from 2D detection boxes.\n        Update: Lidar points and 3d boxes are in *rect camera* coord system\n            (as that in 3d box label files)\n        \n    Input:\n        lidar_point_threshold: int, neglect frustum with too few points.\n    Output:\n\n    '''\n    \n    point_clouds = [] # channel number = 4, xyz,intensity in rect camera coord\n    rot_angles = []\n    ids_3d = np.zeros((len(detections)))\n    for i, detection in enumerate(detections):\n\n        xmin,ymin,xmax,ymax,_,_,_ = detection\n        box_fov_inds = (pc_image_coord[:,0]<xmax) & \\\n            (pc_image_coord[:,0]>=xmin) & \\\n            (pc_image_coord[:,1]<ymax) & \\\n            (pc_image_coord[:,1]>=ymin)\n        pc_in_box_fov = point_cloud[box_fov_inds,:]\n        box_center = np.array([xmax+xmin, ymin+ymax])/2\n        uvdepth = np.zeros((1,3))\n        uvdepth[0,0:2] = box_center\n        uvdepth[0,2] = 20 # some random depth\n        box2d_center_rect = calib.project_image_to_rect(uvdepth)\n        frustum_angle = np.pi/2 - np.arctan2(box2d_center_rect[0,2],\n            box2d_center_rect[0,0])\n        rot_angles.append(frustum_angle)\n        if len(pc_in_box_fov)<lidar_point_threshold:\n            ids_3d[i] = -1\n            point_clouds.append(np.zeros((1, num_point, 4)))\n        else:\n            if pc_in_box_fov.shape[0] > num_point:\n                pc_in_box_fov = np.expand_dims(pc_in_box_fov[np.random.choice(range(pc_in_box_fov.shape[0]), size = (num_point), replace=False)], 0)\n            else:\n                pc_in_box_fov = np.expand_dims(np.vstack([pc_in_box_fov, pc_in_box_fov[np.random.choice(range(pc_in_box_fov.shape[0]), size = (num_point-pc_in_box_fov.shape[0]), replace=True)]]), 0)\n            pc_in_box_fov[0] = rotate_pc_along_y(pc_in_box_fov[0], frustum_angle)\n            # frustum = o3d.geometry.PointCloud()\n            # out = pc_in_box_fov[0, :, :3]\n            # # out = out[:, [2, 0, 1]]\n            # # out[:, 1] *= -1\n            # # out[:, 2] *= -1\n            # frustum.points = o3d.utility.Vector3dVector(out)\n            # o3d.io.write_point_cloud(\"pc_frustum_sample_rot_1.xyz\", frustum)\n            # # rot_angles.append(0) #no frsturum rotation\n            # import pdb; pdb.set_trace()\n            point_clouds.append(pc_in_box_fov)\n\n    return point_clouds, rot_angles, ids_3d\n# @profile\ndef generate_detections_3d(detector, detections_2d, point_cloud, calib, img_shape, peds=False):\n    _, img_height, img_width = img_shape\n    _, pc_image_coord, img_fov_inds = get_lidar_in_image_fov(np.copy(point_cloud[:,:3]), calib, 0, 0, img_width, img_height)\n    pc_image_coord = pc_image_coord[img_fov_inds,:]\n    point_cloud = point_cloud[img_fov_inds,:]\n    point_cloud_frustrums, rot_angles, ids_3d = preprocess_pointcloud(detections_2d, point_cloud, pc_image_coord, calib, num_point = detector.num_point)\n    point_cloud_frustrums = np.vstack(point_cloud_frustrums)\n    boxes_3d, scores_3d, depth_features = detector(point_cloud_frustrums, np.asarray(rot_angles), peds)\n    for i in range(len(ids_3d)):\n        if ids_3d[i] == -1:\n            boxes_3d[i] = None\n    return boxes_3d, ids_3d, rot_angles, scores_3d, depth_features\n\ndef convert_depth_features(depth_features_orig, ids_3d, cuda = True):\n    Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor\n    depth_features = []\n    for i, depth_feature_orig in enumerate(depth_features_orig):\n        if depth_feature_orig is None or ids_3d[i] == -1:\n            depth_features.append(None)\n        else:\n            depth_features.append(torch.Tensor(depth_feature_orig).type(Tensor))\n    return depth_features\n"
  },
  {
    "path": "paper_experiments/utils/featurepointnet_tf_util.py",
    "content": "\"\"\" Wrapper functions for TensorFlow layers.\n\nAuthor: Charles R. Qi\nDate: November 2017\n\"\"\"\n\nimport numpy as np\nimport tensorflow as tf\n\ndef _variable_on_cpu(name, shape, initializer, use_fp16=False):\n  \"\"\"Helper to create a Variable stored on CPU memory.\n  Args:\n    name: name of the variable\n    shape: list of ints\n    initializer: initializer for Variable\n  Returns:\n    Variable Tensor\n  \"\"\"\n  with tf.device(\"/cpu:0\"):\n    dtype = tf.float16 if use_fp16 else tf.float32\n    var = tf.get_variable(name, shape, initializer=initializer, dtype=dtype)\n  return var\n\ndef _variable_with_weight_decay(name, shape, stddev, wd, use_xavier=True):\n  \"\"\"Helper to create an initialized Variable with weight decay.\n\n  Note that the Variable is initialized with a truncated normal distribution.\n  A weight decay is added only if one is specified.\n\n  Args:\n    name: name of the variable\n    shape: list of ints\n    stddev: standard deviation of a truncated Gaussian\n    wd: add L2Loss weight decay multiplied by this float. If None, weight\n        decay is not added for this Variable.\n    use_xavier: bool, whether to use xavier initializer\n\n  Returns:\n    Variable Tensor\n  \"\"\"\n  if use_xavier:\n    initializer = tf.contrib.layers.xavier_initializer()\n  else:\n    initializer = tf.truncated_normal_initializer(stddev=stddev)\n  var = _variable_on_cpu(name, shape, initializer)\n  if wd is not None:\n    weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss')\n    tf.add_to_collection('losses', weight_decay)\n  return var\n\n\ndef conv1d(inputs,\n           num_output_channels,\n           kernel_size,\n           scope,\n           stride=1,\n           padding='SAME',\n           data_format='NHWC',\n           use_xavier=True,\n           stddev=1e-3,\n           weight_decay=None,\n           activation_fn=tf.nn.relu,\n           bn=False,\n           bn_decay=None,\n           is_training=None):\n  \"\"\" 1D convolution with non-linear operation.\n\n  Args:\n    inputs: 3-D tensor variable BxLxC\n    num_output_channels: int\n    kernel_size: int\n    scope: string\n    stride: int\n    padding: 'SAME' or 'VALID'\n    data_format: 'NHWC' or 'NCHW'\n    use_xavier: bool, use xavier_initializer if true\n    stddev: float, stddev for truncated_normal init\n    weight_decay: float\n    activation_fn: function\n    bn: bool, whether to use batch norm\n    bn_decay: float or float tensor variable in [0,1]\n    is_training: bool Tensor variable\n\n  Returns:\n    Variable tensor\n  \"\"\"\n  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:\n    assert(data_format=='NHWC' or data_format=='NCHW')\n    if data_format == 'NHWC':\n      num_in_channels = inputs.get_shape()[-1].value\n    elif data_format=='NCHW':\n      num_in_channels = inputs.get_shape()[1].value\n    kernel_shape = [kernel_size,\n                    num_in_channels, num_output_channels]\n    kernel = _variable_with_weight_decay('weights',\n                                         shape=kernel_shape,\n                                         use_xavier=use_xavier,\n                                         stddev=stddev,\n                                         wd=weight_decay)\n    outputs = tf.nn.conv1d(inputs, kernel,\n                           stride=stride,\n                           padding=padding,\n                           data_format=data_format)\n    biases = _variable_on_cpu('biases', [num_output_channels],\n                              tf.constant_initializer(0.0))\n    outputs = tf.nn.bias_add(outputs, biases, data_format=data_format)\n\n    if bn:\n      outputs = batch_norm_for_conv1d(outputs, is_training,\n                                      bn_decay=bn_decay, scope='bn',\n                                      data_format=data_format)\n\n    if activation_fn is not None:\n      outputs = activation_fn(outputs)\n    return outputs\n\n\n\n\ndef conv2d(inputs,\n           num_output_channels,\n           kernel_size,\n           scope,\n           stride=[1, 1],\n           padding='SAME',\n           data_format='NHWC',\n           use_xavier=True,\n           stddev=1e-3,\n           weight_decay=None,\n           activation_fn=tf.nn.relu,\n           bn=False,\n           bn_decay=None,\n           is_training=None):\n  \"\"\" 2D convolution with non-linear operation.\n\n  Args:\n    inputs: 4-D tensor variable BxHxWxC\n    num_output_channels: int\n    kernel_size: a list of 2 ints\n    scope: string\n    stride: a list of 2 ints\n    padding: 'SAME' or 'VALID'\n    data_format: 'NHWC' or 'NCHW'\n    use_xavier: bool, use xavier_initializer if true\n    stddev: float, stddev for truncated_normal init\n    weight_decay: float\n    activation_fn: function\n    bn: bool, whether to use batch norm\n    bn_decay: float or float tensor variable in [0,1]\n    is_training: bool Tensor variable\n\n  Returns:\n    Variable tensor\n  \"\"\"\n  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:\n      kernel_h, kernel_w = kernel_size\n      assert(data_format=='NHWC' or data_format=='NCHW')\n      if data_format == 'NHWC':\n        num_in_channels = inputs.get_shape()[-1].value\n      elif data_format=='NCHW':\n        num_in_channels = inputs.get_shape()[1].value\n      kernel_shape = [kernel_h, kernel_w,\n                      num_in_channels, num_output_channels]\n      kernel = _variable_with_weight_decay('weights',\n                                           shape=kernel_shape,\n                                           use_xavier=use_xavier,\n                                           stddev=stddev,\n                                           wd=weight_decay)\n      stride_h, stride_w = stride\n      outputs = tf.nn.conv2d(inputs, kernel,\n                             [1, stride_h, stride_w, 1],\n                             padding=padding,\n                             data_format=data_format)\n      biases = _variable_on_cpu('biases', [num_output_channels],\n                                tf.constant_initializer(0.0))\n      outputs = tf.nn.bias_add(outputs, biases, data_format=data_format)\n\n      if bn:\n        outputs = batch_norm_for_conv2d(outputs, is_training,\n                                        bn_decay=bn_decay, scope='bn',\n                                        data_format=data_format)\n\n      if activation_fn is not None:\n        outputs = activation_fn(outputs)\n      return outputs\n\n\ndef conv2d_transpose(inputs,\n                     num_output_channels,\n                     kernel_size,\n                     scope,\n                     stride=[1, 1],\n                     padding='SAME',\n                     use_xavier=True,\n                     stddev=1e-3,\n                     weight_decay=None,\n                     activation_fn=tf.nn.relu,\n                     bn=False,\n                     bn_decay=None,\n                     is_training=None):\n  \"\"\" 2D convolution transpose with non-linear operation.\n\n  Args:\n    inputs: 4-D tensor variable BxHxWxC\n    num_output_channels: int\n    kernel_size: a list of 2 ints\n    scope: string\n    stride: a list of 2 ints\n    padding: 'SAME' or 'VALID'\n    use_xavier: bool, use xavier_initializer if true\n    stddev: float, stddev for truncated_normal init\n    weight_decay: float\n    activation_fn: function\n    bn: bool, whether to use batch norm\n    bn_decay: float or float tensor variable in [0,1]\n    is_training: bool Tensor variable\n\n  Returns:\n    Variable tensor\n\n  Note: conv2d(conv2d_transpose(a, num_out, ksize, stride), a.shape[-1], ksize, stride) == a\n  \"\"\"\n  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:\n      kernel_h, kernel_w = kernel_size\n      num_in_channels = inputs.get_shape()[-1].value\n      kernel_shape = [kernel_h, kernel_w,\n                      num_output_channels, num_in_channels] # reversed to conv2d\n      kernel = _variable_with_weight_decay('weights',\n                                           shape=kernel_shape,\n                                           use_xavier=use_xavier,\n                                           stddev=stddev,\n                                           wd=weight_decay)\n      stride_h, stride_w = stride\n      \n      # from slim.convolution2d_transpose\n      def get_deconv_dim(dim_size, stride_size, kernel_size, padding):\n          dim_size *= stride_size\n\n          if padding == 'VALID' and dim_size is not None:\n            dim_size += max(kernel_size - stride_size, 0)\n          return dim_size\n\n      # caculate output shape\n      batch_size = inputs.get_shape()[0].value\n      height = inputs.get_shape()[1].value\n      width = inputs.get_shape()[2].value\n      out_height = get_deconv_dim(height, stride_h, kernel_h, padding)\n      out_width = get_deconv_dim(width, stride_w, kernel_w, padding)\n      output_shape = [batch_size, out_height, out_width, num_output_channels]\n\n      outputs = tf.nn.conv2d_transpose(inputs, kernel, output_shape,\n                             [1, stride_h, stride_w, 1],\n                             padding=padding)\n      biases = _variable_on_cpu('biases', [num_output_channels],\n                                tf.constant_initializer(0.0))\n      outputs = tf.nn.bias_add(outputs, biases)\n\n      if bn:\n        outputs = batch_norm_for_conv2d(outputs, is_training,\n                                        bn_decay=bn_decay, scope='bn')\n\n      if activation_fn is not None:\n        outputs = activation_fn(outputs)\n      return outputs\n\n   \n\ndef conv3d(inputs,\n           num_output_channels,\n           kernel_size,\n           scope,\n           stride=[1, 1, 1],\n           padding='SAME',\n           use_xavier=True,\n           stddev=1e-3,\n           weight_decay=None,\n           activation_fn=tf.nn.relu,\n           bn=False,\n           bn_decay=None,\n           is_training=None):\n  \"\"\" 3D convolution with non-linear operation.\n\n  Args:\n    inputs: 5-D tensor variable BxDxHxWxC\n    num_output_channels: int\n    kernel_size: a list of 3 ints\n    scope: string\n    stride: a list of 3 ints\n    padding: 'SAME' or 'VALID'\n    use_xavier: bool, use xavier_initializer if true\n    stddev: float, stddev for truncated_normal init\n    weight_decay: float\n    activation_fn: function\n    bn: bool, whether to use batch norm\n    bn_decay: float or float tensor variable in [0,1]\n    is_training: bool Tensor variable\n\n  Returns:\n    Variable tensor\n  \"\"\"\n  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:\n    kernel_d, kernel_h, kernel_w = kernel_size\n    num_in_channels = inputs.get_shape()[-1].value\n    kernel_shape = [kernel_d, kernel_h, kernel_w,\n                    num_in_channels, num_output_channels]\n    kernel = _variable_with_weight_decay('weights',\n                                         shape=kernel_shape,\n                                         use_xavier=use_xavier,\n                                         stddev=stddev,\n                                         wd=weight_decay)\n    stride_d, stride_h, stride_w = stride\n    outputs = tf.nn.conv3d(inputs, kernel,\n                           [1, stride_d, stride_h, stride_w, 1],\n                           padding=padding)\n    biases = _variable_on_cpu('biases', [num_output_channels],\n                              tf.constant_initializer(0.0))\n    outputs = tf.nn.bias_add(outputs, biases)\n    \n    if bn:\n      outputs = batch_norm_for_conv3d(outputs, is_training,\n                                      bn_decay=bn_decay, scope='bn')\n\n    if activation_fn is not None:\n      outputs = activation_fn(outputs)\n    return outputs\n\ndef fully_connected(inputs,\n                    num_outputs,\n                    scope,\n                    use_xavier=True,\n                    stddev=1e-3,\n                    weight_decay=None,\n                    activation_fn=tf.nn.relu,\n                    bn=False,\n                    bn_decay=None,\n                    is_training=None):\n  \"\"\" Fully connected layer with non-linear operation.\n  \n  Args:\n    inputs: 2-D tensor BxN\n    num_outputs: int\n  \n  Returns:\n    Variable tensor of size B x num_outputs.\n  \"\"\"\n  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:\n    num_input_units = inputs.get_shape()[-1].value\n    weights = _variable_with_weight_decay('weights',\n                                          shape=[num_input_units, num_outputs],\n                                          use_xavier=use_xavier,\n                                          stddev=stddev,\n                                          wd=weight_decay)\n    outputs = tf.matmul(inputs, weights)\n    biases = _variable_on_cpu('biases', [num_outputs],\n                             tf.constant_initializer(0.0))\n    outputs = tf.nn.bias_add(outputs, biases)\n     \n    if bn:\n      outputs = batch_norm_for_fc(outputs, is_training, bn_decay, 'bn')\n\n    if activation_fn is not None:\n      outputs = activation_fn(outputs)\n    return outputs\n\n\ndef max_pool2d(inputs,\n               kernel_size,\n               scope,\n               stride=[2, 2],\n               padding='VALID'):\n  \"\"\" 2D max pooling.\n\n  Args:\n    inputs: 4-D tensor BxHxWxC\n    kernel_size: a list of 2 ints\n    stride: a list of 2 ints\n  \n  Returns:\n    Variable tensor\n  \"\"\"\n  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:\n    kernel_h, kernel_w = kernel_size\n    stride_h, stride_w = stride\n    outputs = tf.nn.max_pool(inputs,\n                             ksize=[1, kernel_h, kernel_w, 1],\n                             strides=[1, stride_h, stride_w, 1],\n                             padding=padding,\n                             name=sc.name)\n    return outputs\n\ndef avg_pool2d(inputs,\n               kernel_size,\n               scope,\n               stride=[2, 2],\n               padding='VALID'):\n  \"\"\" 2D avg pooling.\n\n  Args:\n    inputs: 4-D tensor BxHxWxC\n    kernel_size: a list of 2 ints\n    stride: a list of 2 ints\n  \n  Returns:\n    Variable tensor\n  \"\"\"\n  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:\n    kernel_h, kernel_w = kernel_size\n    stride_h, stride_w = stride\n    outputs = tf.nn.avg_pool(inputs,\n                             ksize=[1, kernel_h, kernel_w, 1],\n                             strides=[1, stride_h, stride_w, 1],\n                             padding=padding,\n                             name=sc.name)\n    return outputs\n\n\ndef max_pool3d(inputs,\n               kernel_size,\n               scope,\n               stride=[2, 2, 2],\n               padding='VALID'):\n  \"\"\" 3D max pooling.\n\n  Args:\n    inputs: 5-D tensor BxDxHxWxC\n    kernel_size: a list of 3 ints\n    stride: a list of 3 ints\n  \n  Returns:\n    Variable tensor\n  \"\"\"\n  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:\n    kernel_d, kernel_h, kernel_w = kernel_size\n    stride_d, stride_h, stride_w = stride\n    outputs = tf.nn.max_pool3d(inputs,\n                               ksize=[1, kernel_d, kernel_h, kernel_w, 1],\n                               strides=[1, stride_d, stride_h, stride_w, 1],\n                               padding=padding,\n                               name=sc.name)\n    return outputs\n\ndef avg_pool3d(inputs,\n               kernel_size,\n               scope,\n               stride=[2, 2, 2],\n               padding='VALID'):\n  \"\"\" 3D avg pooling.\n\n  Args:\n    inputs: 5-D tensor BxDxHxWxC\n    kernel_size: a list of 3 ints\n    stride: a list of 3 ints\n  \n  Returns:\n    Variable tensor\n  \"\"\"\n  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:\n    kernel_d, kernel_h, kernel_w = kernel_size\n    stride_d, stride_h, stride_w = stride\n    outputs = tf.nn.avg_pool3d(inputs,\n                               ksize=[1, kernel_d, kernel_h, kernel_w, 1],\n                               strides=[1, stride_d, stride_h, stride_w, 1],\n                               padding=padding,\n                               name=sc.name)\n    return outputs\n\n\ndef batch_norm_template_unused(inputs, is_training, scope, moments_dims, bn_decay):\n  \"\"\" NOTE: this is older version of the util func. it is deprecated.\n  Batch normalization on convolutional maps and beyond...\n  Ref.: http://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow\n  \n  Args:\n      inputs:        Tensor, k-D input ... x C could be BC or BHWC or BDHWC\n      is_training:   boolean tf.Varialbe, true indicates training phase\n      scope:         string, variable scope\n      moments_dims:  a list of ints, indicating dimensions for moments calculation\n      bn_decay:      float or float tensor variable, controling moving average weight\n  Return:\n      normed:        batch-normalized maps\n  \"\"\"\n  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:\n    num_channels = inputs.get_shape()[-1].value\n    beta = _variable_on_cpu(name='beta',shape=[num_channels],\n                            initializer=tf.constant_initializer(0))\n    gamma = _variable_on_cpu(name='gamma',shape=[num_channels],\n                            initializer=tf.constant_initializer(1.0))\n    batch_mean, batch_var = tf.nn.moments(inputs, moments_dims, name='moments')\n    decay = bn_decay if bn_decay is not None else 0.9\n    ema = tf.train.ExponentialMovingAverage(decay=decay)\n    # Operator that maintains moving averages of variables.\n    # Need to set reuse=False, otherwise if reuse, will see moments_1/mean/ExponentialMovingAverage/ does not exist\n    # https://github.com/shekkizh/WassersteinGAN.tensorflow/issues/3\n    with tf.variable_scope(tf.get_variable_scope(), reuse=False):\n        ema_apply_op = tf.cond(is_training,\n                               lambda: ema.apply([batch_mean, batch_var]),\n                               lambda: tf.no_op())\n    \n    # Update moving average and return current batch's avg and var.\n    def mean_var_with_update():\n      with tf.control_dependencies([ema_apply_op]):\n        return tf.identity(batch_mean), tf.identity(batch_var)\n    \n    # ema.average returns the Variable holding the average of var.\n    mean, var = tf.cond(is_training,\n                        mean_var_with_update,\n                        lambda: (ema.average(batch_mean), ema.average(batch_var)))\n    normed = tf.nn.batch_normalization(inputs, mean, var, beta, gamma, 1e-3)\n  return normed\n\n\ndef batch_norm_template(inputs, is_training, scope, moments_dims_unused, bn_decay, data_format='NHWC'):\n  \"\"\" Batch normalization on convolutional maps and beyond...\n  Ref.: http://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow\n  \n  Args:\n      inputs:        Tensor, k-D input ... x C could be BC or BHWC or BDHWC\n      is_training:   boolean tf.Varialbe, true indicates training phase\n      scope:         string, variable scope\n      moments_dims:  a list of ints, indicating dimensions for moments calculation\n      bn_decay:      float or float tensor variable, controling moving average weight\n      data_format:   'NHWC' or 'NCHW'\n  Return:\n      normed:        batch-normalized maps\n  \"\"\"\n  bn_decay = bn_decay if bn_decay is not None else 0.9\n  return tf.contrib.layers.batch_norm(inputs, \n                                      center=True, scale=True,\n                                      is_training=is_training, decay=bn_decay,updates_collections=None,\n                                      scope=scope,\n                                      data_format=data_format)\n\n\ndef batch_norm_for_fc(inputs, is_training, bn_decay, scope):\n  \"\"\" Batch normalization on FC data.\n  \n  Args:\n      inputs:      Tensor, 2D BxC input\n      is_training: boolean tf.Varialbe, true indicates training phase\n      bn_decay:    float or float tensor variable, controling moving average weight\n      scope:       string, variable scope\n  Return:\n      normed:      batch-normalized maps\n  \"\"\"\n  return batch_norm_template(inputs, is_training, scope, [0,], bn_decay)\n\n\ndef batch_norm_for_conv1d(inputs, is_training, bn_decay, scope, data_format):\n  \"\"\" Batch normalization on 1D convolutional maps.\n  \n  Args:\n      inputs:      Tensor, 3D BLC input maps\n      is_training: boolean tf.Varialbe, true indicates training phase\n      bn_decay:    float or float tensor variable, controling moving average weight\n      scope:       string, variable scope\n      data_format: 'NHWC' or 'NCHW'\n  Return:\n      normed:      batch-normalized maps\n  \"\"\"\n  return batch_norm_template(inputs, is_training, scope, [0,1], bn_decay, data_format)\n\n\n\n  \ndef batch_norm_for_conv2d(inputs, is_training, bn_decay, scope, data_format):\n  \"\"\" Batch normalization on 2D convolutional maps.\n  \n  Args:\n      inputs:      Tensor, 4D BHWC input maps\n      is_training: boolean tf.Varialbe, true indicates training phase\n      bn_decay:    float or float tensor variable, controling moving average weight\n      scope:       string, variable scope\n      data_format: 'NHWC' or 'NCHW'\n  Return:\n      normed:      batch-normalized maps\n  \"\"\"\n  return batch_norm_template(inputs, is_training, scope, [0,1,2], bn_decay, data_format)\n\n\ndef batch_norm_for_conv3d(inputs, is_training, bn_decay, scope):\n  \"\"\" Batch normalization on 3D convolutional maps.\n  \n  Args:\n      inputs:      Tensor, 5D BDHWC input maps\n      is_training: boolean tf.Varialbe, true indicates training phase\n      bn_decay:    float or float tensor variable, controling moving average weight\n      scope:       string, variable scope\n  Return:\n      normed:      batch-normalized maps\n  \"\"\"\n  return batch_norm_template(inputs, is_training, scope, [0,1,2,3], bn_decay)\n\n\ndef dropout(inputs,\n            is_training,\n            scope,\n            keep_prob=0.5,\n            noise_shape=None):\n  \"\"\" Dropout layer.\n\n  Args:\n    inputs: tensor\n    is_training: boolean tf.Variable\n    scope: string\n    keep_prob: float in [0,1]\n    noise_shape: list of ints\n\n  Returns:\n    tensor variable\n  \"\"\"\n  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:\n    outputs = tf.cond(is_training,\n                      lambda: tf.nn.dropout(inputs, keep_prob, noise_shape),\n                      lambda: inputs)\n    return outputs\n"
  },
  {
    "path": "paper_experiments/utils/imm.py",
    "content": "# vim: expandtab:ts=4:sw=4\nimport numpy as np\nimport scipy.linalg\nimport utils.EKF as EKF\nimport pdb\nimport utils.kf_2d as kf_2d\nimport matplotlib.pyplot as plt\n\nnp.set_printoptions(precision=4, suppress=True)\n\nclass IMMFilter2D(EKF.EKF):\n    \"\"\"\n    An IMM filter for tracking bounding boxes in image space.\n    Contains 2 Kalman Filters\n    Filter 1: Constant Velocity Model:\n        The 8-dimensional state space\n            x, y, a, h, vx, vy, va, vh\n        contains the bounding box center position (x, y), aspect ratio a, height h,\n        and their respective velocities.\n        Object motion follows a constant velocity model. The bounding box location\n        (x, y, a, h) is taken as direct observation of the state space (linear\n        observation model).\n    Filter 2: Random Walk Model:\n        The 4-dimensional state space\n            x, y, a, h\n        contains the bounding box center position (x, y), aspect ratio a, height h.\n        Object motion follows a random walk model. The bounding box location\n        (x, y, a, h) is taken as direct observation of the state space (linear\n        observation model).\n    \"\"\"\n    def __init__(self, kf_vel_params=(1./20, 1./160, 1, 1, 2), kf_walk_params=(1./20, 1./160, 1, 1, 2), markov=(0.9,0.7)):\n        self.kf1 = kf_2d.KalmanFilter2D(*kf_vel_params)\n        self.kf2 = kf_2d.RandomWalkKalmanFilter2D(*kf_walk_params)\n\n        self.markov_transition = np.asarray([[markov[0], 1-markov[0]],\n                                             [markov[1], 1-markov[1]]])\n\n    def initiate(self, measurement, flow):\n        \"\"\"Create track from unassociated measurement.\n        Parameters\n        ----------\n        measurement : ndarray\n            Bounding box coordinates (x, y, a, h) with center position (x, y),\n            aspect ratio a, and height h.\n        Returns\n        -------\n        (ndarray, ndarray)\n            Returns the mean vector (2,8 dimensional) and covariance matrix (2,8x8\n            dimensional) of the new track. Unobserved velocities are initialized\n            to 0 mean.\n        \"\"\"\n        mean_pos1, cov1 = self.kf1.initiate(measurement, flow)\n        #Random walk does not need the flow\n        mean_pos2, cov2 = self.kf2.initiate(measurement, None)\n\n        covariance = np.dstack([cov1, cov2])\n        covariance = np.transpose(covariance, axes=(2,0,1))\n        mean = np.vstack([mean_pos1, mean_pos2])\n        model_probs = np.ones((2,1))*0.5\n\n        return mean, covariance, model_probs\n\n\n    def gating_distance(self, mean, covariance, measurements,\n                        only_position=False):\n        \"\"\"Compute gating distance between state distribution and measurements.\n        A suitable distance threshold can be obtained from `chi2inv95`. If\n        `only_position` is False, the chi-square distribution has 4 degrees of\n        freedom, otherwise 2.\n        Parameters\n        ----------\n        mean : ndarray\n            Mean vector over the state distribution (8 dimensional).\n        covariance : ndarray\n            Covariance of the state distribution (8x8 dimensional).\n        measurements : ndarray\n            An Nx4 dimensional matrix of N measurements, each in\n            format (x, y, a, h) where (x, y) is the bounding box center\n            position, a the aspect ratio, and h the height.\n        only_position : Optional[bool]\n            If True, distance computation is done with respect to the bounding\n            box center position only.\n        Returns\n        -------\n        ndarray\n            Returns an array of length N, where the i-th element contains the\n            squared Mahalanobis distance between (mean, covariance) and\n            `measurements[i]`.\n        \"\"\"\n        dist1 = self.kf1.gating_distance(mean[0, :], covariance[0, :, :], measurements, only_position)\n        dist2 = self.kf2.gating_distance(mean[1, :], covariance[1, :, :], measurements, only_position)\n        return np.where(dist1 < dist2, dist1, dist2)\n\n    def update(self, mean, covariance, measurement, model_probabilities, marginalization=None, JPDA=False):\n        \"\"\"Run Kalman filter correction step.\n        Parameters\n        ----------\n        mean : ndarray\n            The predicted state's mean vector (8 dimensional).\n        covariance : ndarray\n            The state's covariance matrix (8x8 dimensional).\n        measurement : ndarray\n            The 4 dimensional measurement vector (x, y, a, h), where (x, y)\n            is the center position, a the aspect ratio, and h the height of the\n            bounding box.\n        Returns\n        -------\n        (ndarray, ndarray)\n            Returns the measurement-corrected state distribution.\n        \"\"\"\n        # cholesky factorization used to solve for kalman gain since\n        # K = covariance * update_mat * inv(projected_cov)\n        # so K is also the solution to \n        # projected_cov * K = covariance * update_mat\n        # model_probabilities = np.dot(self.markov_transition.T, model_probabilities)\n        # combined_H = np.stack([self.kf1._update_mat, self.kf2._update_mat])\n        # S = np.linalg.multi_dot([combined_H, covariance, np.transpose(combined_H, (0,2,1))])\n\n        mean_1, cov_1 = self.kf1.project(mean[0], covariance[0])\n        mean_2, cov_2 = self.kf2.project(mean[1], covariance[1])\n\n        distance_1 = EKF.squared_mahalanobis_distance(mean_1, cov_1, measurement)\n        distance_2 = EKF.squared_mahalanobis_distance(mean_2, cov_2, measurement)\n\n        distance = np.vstack([distance_1, distance_2])\n\n        distance -= np.amin(distance)\n        \n        dets = np.vstack([np.sqrt(np.linalg.det(cov_1)), np.sqrt(np.linalg.det(cov_2))])\n        if distance.ndim > 1:\n            likelihood = np.sum(np.exp(-distance/2)/dets, axis = -1, keepdims = True)\n        else:\n            likelihood = np.exp(-distance/2)/dets\n\n        model_probs = (likelihood*model_probabilities)/\\\n                        np.sum(likelihood*model_probabilities)\n            \n\n        out_mean_1, out_cov_1 = self.kf1.update(mean[0], covariance[0], measurement, marginalization, JPDA)\n        out_mean_2, out_cov_2 = self.kf2.update(mean[1], covariance[1], measurement, marginalization, JPDA)\n        out_mean = np.vstack([out_mean_1, out_mean_2])\n        out_cov = np.dstack([out_cov_1, out_cov_2])\n        out_cov = np.transpose(out_cov, axes=(2,0,1))\n\n        return out_mean, out_cov, model_probs\n    \n    def predict(self, mean, covariance, model_probabilities):\n        \"\"\"Run Kalman filter prediction step.\n        Parameters\n        ----------\n        mean : ndarray\n            The mean vector of the object state at the previous\n            time step.\n        covariance : ndarray\n            The covariance matrix of the object state at the\n            previous time step.\n        Returns\n        -------\n        (ndarray, ndarray)\n            Returns the mean vector and covariance matrix of the predicted\n            state. Unobserved velocities are initialized to 0 mean.\n        \"\"\"\n        # Perform prediction\n\n        model_future_probabilities = np.dot(self.markov_transition.T, model_probabilities)\n        model_transition_probabilities = self.markov_transition*(model_probabilities/model_future_probabilities.T)\n        mixed_mean_1, mixed_cov_1, mixed_mean_2, mixed_cov_2 = self.mix_models(mean[0], covariance[0], mean[1], covariance[1], model_transition_probabilities)\n        out_mean_1, out_cov_1 = self.kf1.predict(mixed_mean_1, mixed_cov_1)\n        out_mean_2, out_cov_2 = self.kf2.predict(mixed_mean_2, mixed_cov_2)\n\n        out_mean = np.vstack([out_mean_1, out_mean_2])\n        out_cov = np.dstack([out_cov_1, out_cov_2])\n        out_cov = np.transpose(out_cov, axes=(2,0,1))\n        return out_mean, out_cov, model_future_probabilities\n\n    def mix_models(self, mean_1, cov_1, mean_2, cov_2, model_transition_probabilities):\n        \n        mixed_mean_1 = model_transition_probabilities[0, 0]*mean_1 + model_transition_probabilities[1, 0]*mean_2\n        mixed_mean_2 = model_transition_probabilities[0, 1]*mean_1 + model_transition_probabilities[1, 1]*mean_2\n        mean_diff_12 = mean_1 - mixed_mean_2\n        mean_diff_21 = mean_2 - mixed_mean_1\n        mean_diff_11 = mean_1 - mixed_mean_1\n        mean_diff_22 = mean_2 - mixed_mean_2\n\n        mixed_cov_1 = model_transition_probabilities[0, 0]*(cov_1+np.outer(mean_diff_11, mean_diff_11)) + \\\n                        model_transition_probabilities[1, 0]*(cov_2+np.outer(mean_diff_21, mean_diff_21))\n        mixed_cov_2 = model_transition_probabilities[0, 1]*(cov_2+np.outer(mean_diff_12, mean_diff_12)) + \\\n                    model_transition_probabilities[1, 1]*(cov_2+np.outer(mean_diff_22, mean_diff_22))\n\n        return mixed_mean_1, mixed_cov_1, mixed_mean_2, mixed_cov_2\n    \n    @staticmethod\n    def combine_states(mean, cov, model_probabilities):\n\n        mean = np.sum(model_probabilities*mean, axis = 0)\n        covariance = np.sum(np.expand_dims(model_probabilities,2)*cov, axis = 0)\n\n        return mean, covariance\n        \ndef generate_particle_motion(motion_matrices, initial_state, process_noise, length = 100):\n    state_list = [initial_state]\n    seed_mode = 0 if np.random.random() < 0.5 else 1\n    markov_transition_matrix = np.asarray([[0.9, 0.1],[.7, 0.3]])\n    modes = [seed_mode]\n    for i in range(length):\n        modes.append(seed_mode)\n        motion_matrix = motion_matrices[seed_mode]\n        state_list.append(np.dot(motion_matrix, state_list[-1])+np.random.randn(*initial_state.shape)*process_noise[seed_mode])\n        if np.random.rand() < markov_transition_matrix[seed_mode][0]:\n            seed_mode = 0\n        else:\n            seed_mode = 1\n    return np.array(state_list), modes\n\ndef generate_observations(input_state_list, observation_matrix, observation_noise):\n    observation_shape = np.dot(observation_matrix, input_state_list[0]).shape\n    output = [np.dot(observation_matrix, state)+np.random.randn(*observation_shape)*observation_noise \n                for state in input_state_list]\n    return np.array(output)\n\nif __name__=='__main__':\n    imm_filter = IMMFilter2D()\n    motion_matrix = np.eye(8)\n    motion_matrix[0,4] = 1\n    motion_matrix[1,5] = 1\n    initial_state = np.array([0,0,1,1,1,1,0,0])\n    states, modes = generate_particle_motion([motion_matrix, np.eye(8)], initial_state, [0.1, 2], 50)\n    plt.subplot(211)\n    plt.plot(states[:,0], states[:,1], linestyle = '--', marker='.', label= 'True state')\n    observation_matrix = np.eye(4,8)\n    obs = generate_observations(states, observation_matrix, 0.5)\n    # plt.scatter(obs[:,0], obs[:,1], marker='x', color='green', label = 'observation')\n    rnd_filter = kf_2d.KalmanFilter2D()\n    mean, covariance, probs = imm_filter.initiate(obs[0])\n    mean_rand, cov_rand = rnd_filter.initiate(obs[0])\n    mean_list, covariance_list, probs_list = [], [], []\n    mean_list_rand, covariance_list_rand = [], []\n\n    combined_mean, combined_cov = imm_filter.combine_states(mean, covariance, probs)\n    mean_list.append(combined_mean)\n    covariance_list.append(combined_cov)\n    mean_list_rand.append(mean_rand)\n    covariance_list_rand.append(cov_rand)\n    probs_list.append(probs)\n    for idx, i in enumerate(obs[1:]):\n        mean_rand_new, cov_rand_new = rnd_filter.predict(mean_rand, cov_rand)\n        mean_rand, cov_rand = rnd_filter.update(mean_rand_new, cov_rand_new, i)\n        mean_list_rand.append(mean_rand)\n        covariance_list_rand.append(cov_rand)\n\n\n        mean_new, covariance_new, probs_new = imm_filter.predict(mean, covariance, probs)\n        mean, covariance, probs = imm_filter.update(mean_new, covariance_new, i, probs_new)\n        combined_mean, combined_cov = imm_filter.combine_states(mean, covariance, probs)\n        pdb.set_trace()\n        pdb.set_trace()\n        mean_list.append(combined_mean)\n        covariance_list.append(combined_cov)\n        probs_list.append(probs)\n\n    mean_list = np.array(mean_list)\n    mean_list_rand = np.array(mean_list_rand)\n    plt.plot(mean_list[:, 0], mean_list[:, 1], marker='+', c='k', label = 'IMMestimate', alpha = 0.6)\n    plt.plot(mean_list_rand[:, 0], mean_list_rand[:, 1], marker=',', c='orange', label = 'CV estimate', alpha = 0.6)\n    # plt.scatter(mean_list[:, 0], mean_list[:, 1], marker='+', c=np.vstack([probs, np.zeros((1,1))]).T, label = 'IMMestimate')\n    # plt.scatter(mean_list_rand[:, 0], mean_list_rand[:, 1], marker='x', c='orange', label = 'random walk estimate')\n    MSE_IMM = np.mean((mean_list[:,:2]-states[:,:2])**2)\n    MSE = np.mean((mean_list_rand[:,:2]-states[:,:2])**2)\n    print(\"MSE: %f for 2D filter\"%MSE)\n    print(\"MSE: %f for IMM filter\"%MSE_IMM)\n    plt.legend()\n    plt.subplot(212)\n\n    plt.plot(modes, label='True modes')\n    plt.plot([i[1] for i in probs_list], label='predicted modes')\n    plt.legend()\n    plt.show()\n"
  },
  {
    "path": "paper_experiments/utils/iou_matching.py",
    "content": "# vim: expandtab:ts=4:sw=4\nfrom __future__ import absolute_import\nimport numpy as np\nfrom . import linear_assignment\nimport pdb\n\n\ndef iou(bbox, candidates):\n    \"\"\"Computer intersection over union.\n\n    Parameters\n    ----------\n    bbox : ndarray\n        A bounding box in format `(top left x, top left y, width, height)`.\n    candidates : ndarray\n        A matrix of candidate bounding boxes (one per row) in the same format\n        as `bbox`.\n\n    Returns\n    -------\n    ndarray\n        The intersection over union in [0, 1] between the `bbox` and each\n        candidate. A higher score means a larger fraction of the `bbox` is\n        occluded by the candidate.\n\n    \"\"\"\n    bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:]\n    candidates_tl = candidates[:, :2]\n    candidates_br = candidates[:, :2] + candidates[:, 2:]\n\n    tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis],\n               np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]]\n    br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis],\n               np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]]\n    wh = np.maximum(0., br - tl)\n\n    area_intersection = wh.prod(axis=1)\n    area_bbox = bbox[2:].prod()\n    area_candidates = candidates[:, 2:].prod(axis=1)\n    return area_intersection / (area_bbox + area_candidates - area_intersection)\n\n\ndef iou_cost(tracks, detections, track_indices=None,\n             detection_indices=None, use3d=False, kf=None):\n    \"\"\"An intersection over union distance metric.\n\n    Parameters\n    ----------\n    tracks : List[deep_sort.track.Track]\n        A list of tracks.\n    detections : List[deep_sort.detection.Detection]\n        A list of detections.\n    track_indices : Optional[List[int]]\n        A list of indices to tracks that should be matched. Defaults to\n        all `tracks`.\n    detection_indices : Optional[List[int]]\n        A list of indices to detections that should be matched. Defaults\n        to all `detections`.\n    box_expansion_factor:\n        Multiplier for box size to bias towards higher recall\n\n    Returns\n    -------\n    ndarray\n        Returns a cost matrix of shape\n        len(track_indices), len(detection_indices) where entry (i, j) is\n        `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`.\n\n    \"\"\"\n    if track_indices is None:\n        track_indices = np.arange(len(tracks))\n    if detection_indices is None:\n        detection_indices = np.arange(len(detections))\n\n    cost_matrix = np.zeros((len(track_indices), len(detection_indices)))\n\n    if cost_matrix.shape[0] == 0 or cost_matrix.shape[1] == 0:\n        return cost_matrix\n    if use3d:\n        # Convert 3d detctions to tlwh format\n        # @TODO: Should use a Detection3D class to do this\n        candidates = np.array([detections[i].box_3d for i in detection_indices])\n        candidates[:,:2] -= candidates[:,3:5] / 2\n        candidates = candidates[:, [0,2,3,5]]\n    else:\n        candidates = np.asarray([detections[i].tlwh for i in detection_indices])\n\n    for row, track_idx in enumerate(track_indices):\n        if use3d:\n            bbox = tracks[track_idx].to_tlwh3d()\n            bbox[:2] -= bbox[3:5] / 2\n            bbox = bbox[[0,2,3,5]]\n        else:\n            bbox = tracks[track_idx].to_tlwh(kf)\n        cost_matrix[row, :] = 1. - iou(bbox, candidates)\n    return cost_matrix\n"
  },
  {
    "path": "paper_experiments/utils/kf_2d.py",
    "content": "# vim: expandtab:ts=4:sw=4\nimport numpy as np\nimport scipy.linalg\nimport utils.EKF as EKF\nimport pdb\nnp.set_printoptions(precision=4, suppress=True)\n\nclass KalmanFilter2D(EKF.EKF):\n    \"\"\"\n    A simple Kalman filter for tracking bounding boxes in image space.\n\n    The 8-dimensional state space\n\n        x, y, w, h, vx, vy, vw, vh\n\n    contains the bounding box center position (x, y), width w, height h,\n    and their respective velocities.\n\n    Object motion follows a constant velocity model. The bounding box location\n    (x, y, w, h) is taken as direct observation of the state space (linear\n    observation model).\n\n    \"\"\"\n\n    def __init__(self, pos_weight, velocity_weight, std_process, std_measurement, initial_uncertainty, gate_limit):\n        ndim, dt = 4, 1.\n        self.ndim = ndim\n        self.img_center = 1242\n        # Create Kalman filter model matrices.\n        # Motion model is constant velocity, i.e. x = x + Vx*dt\n        self._motion_mat = np.eye(2 * ndim, 2 * ndim)\n        for i in range(ndim):\n            self._motion_mat[i, ndim + i] = dt\n        # Sensor model is direct observation, i.e. x = x\n        self._observation_mat = np.eye(ndim, 2 * ndim)\n\n        # Motion and observation uncertainty are chosen relative to the current\n        # state estimate. These weights control the amount of uncertainty in\n        # the model. This is a bit hacky.\n        self._std_weight_process = std_process\n        self._std_weight_measurement = std_measurement\n        self._std_weight_pos = pos_weight\n        self._std_weight_vel = velocity_weight\n        self._initial_uncertainty = initial_uncertainty\n        self.LIMIT = gate_limit\n\n    def initiate(self, measurement, flow):\n        \"\"\"Create track from unassociated measurement.\n\n        Parameters\n        ----------\n        measurement : ndarray\n            Bounding box coordinates (x, y, a, h) with center position (x, y),\n            aspect ratio a, and height h.\n\n        Returns\n        -------\n        (ndarray, ndarray)\n            Returns the mean vector (8 dimensional) and covariance matrix (8x8\n            dimensional) of the new track. Unobserved velocities are initialized\n            to 0 mean.\n\n        \"\"\"\n        mean_pos = measurement\n        mean_vel = np.zeros_like(mean_pos)\n        if flow is not None:\n            vel = np.mean(np.reshape(flow[int(mean_pos[1]):int(mean_pos[1]+mean_pos[3]), \n                    int(mean_pos[0]):int(mean_pos[0]+mean_pos[2]), :], (-1, 2)), axis=0)\n            mean_vel[:2] = vel\n        mean = np.r_[mean_pos, mean_vel]\n\n        # Initialize covariance based on w, h and configured std\n        std = [\n            (1 + abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_pos * measurement[2],\n            (1 + abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_pos * measurement[3],\n            (1 + abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_pos * measurement[2],\n            (1 + abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_pos * measurement[3],\n\n            (1 + 1.5*abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_vel * measurement[2],\n            (1 + 1.5*abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_vel * measurement[3],\n            (1 + 1.5*abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_vel * measurement[2],\n            (1 + 1.5*abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_vel * measurement[3]]\n\n        covariance = np.diag(np.square(std))*(self._initial_uncertainty*self._std_weight_process)**2\n        return mean, covariance\n\n    def predict_mean(self, mean):\n        # Updates predicted state from previous state (function g)\n        # Calculates motion update Jacobian (Gt)\n        # Returns (g(mean), Gt)\n        return np.dot(self._motion_mat, mean)\n    \n    def predict_covariance(self, mean, covariance, last_detection, next_to_last_detection):\n        # Updates predicted state from previous state (function g)\n        # Calculates motion update Jacobian (Gt)\n        # Returns (g(mean), Gt)\n        process_noise = self.get_process_noise(mean, last_detection, next_to_last_detection)\n        return (np.linalg.multi_dot((self._motion_mat, covariance, self._motion_mat.T)) \n                     + process_noise)\n\n    def get_process_noise(self, mean, last_detection, next_to_last_detection):\n        # Returns Rt the motion noise covariance\n\n        depth_scale = 1\n        if last_detection.box_3d is not None:\n            dist = last_detection.get_3d_distance()\n            depth_scale = max(1,1+(16-dist)/10)\n            if next_to_last_detection is not None and next_to_last_detection.box_3d is not None:\n                b1 = last_detection.box_3d\n                b2 = next_to_last_detection.box_3d\n                vel = ((b1[0]-b2[0])**2 + (b1[2]-b2[2])**2)**(1/2)\n                if vel > 2: # Fast moving (car) nearby, increase uncertainty\n                    depth_scale *= 2\n                    pass\n                # print(vel)\n            # print(dist, depth_scale)\n\n        depth_scale = 1\n        # depth_scale *= max(1, 1+(40-mean[2])/50, 1+(40-mean[3])/50) # Note: Scales up small boxes bc higher uncertainty\n\n        # Motion uncertainty scaled by estimated height\n        std_pos = [\n            depth_scale * self._std_weight_pos * mean[2],\n            depth_scale * self._std_weight_pos * mean[3],\n            depth_scale * self._std_weight_pos * mean[2],\n            depth_scale * self._std_weight_pos * mean[3]]\n        std_vel = [\n            depth_scale * self._std_weight_vel * mean[2],\n            depth_scale * self._std_weight_vel * mean[3],\n            depth_scale * self._std_weight_vel * mean[2],\n            depth_scale * self._std_weight_vel * mean[3]]\n        motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))*self._std_weight_process**2\n\n        return motion_cov\n\n    def project_mean(self, mean):\n        # Measurement prediction from state (function h)\n        # Calculations sensor update Jacobian (Ht)\n        # Returns (h(mean), Ht)\n        return np.dot(self._observation_mat, mean)\n\n\n\n    def get_measurement_noise(self, measurement):\n        # Returns Qt the sensor noise covariance\n                \n        # Measurement uncertainty scaled by estimated height\n        std = [\n                self._std_weight_pos*measurement[2],\n                self._std_weight_pos*measurement[3],\n                self._std_weight_pos*measurement[2],\n                self._std_weight_pos*measurement[3]]\n        innovation_cov = np.diag(np.square(std))*self._std_weight_measurement**2\n        return innovation_cov\n    \n    def project_cov(self, mean, covariance):\n        # Returns S the innovation covariance (projected covariance)\n                \n        measurement_noise = self.get_measurement_noise(mean)\n        innovation_cov = (np.linalg.multi_dot((self._observation_mat, covariance,\n                                          self._observation_mat.T))\n                     + measurement_noise)\n        return innovation_cov\n\n    def gating_distance(self, mean, covariance, measurements,\n                        only_position=False, use_3d=False):\n        \"\"\"Compute gating distance between state distribution and measurements.\n\n        A suitable distance threshold can be obtained from `chi2inv95`. If\n        `only_position` is False, the chi-square distribution has 4 degrees of\n        freedom, otherwise 2.\n\n        Parameters\n        ----------\n        mean : ndarray\n            Mean vector over the state distribution (8 dimensional).\n        covariance : ndarray\n            Covariance of the state distribution (8x8 dimensional).\n        measurements : ndarray\n            An Nx4 dimensional matrix of N measurements, each in\n            format (x, y, a, h) where (x, y) is the bounding box center\n            position, a the aspect ratio, and h the height.\n        only_position : Optional[bool]\n            If True, distance computation is done with respect to the bounding\n            box center position only.\n\n        Returns\n        -------\n        ndarray\n            Returns an array of length N, where the i-th element contains the\n            squared Mahalanobis distance between (mean, covariance) and\n            `measurements[i]`.\n\n        \"\"\"\n        projected_mean, projected_covariance = self.project(mean, covariance)\n        if only_position:\n            projected_mean, projected_covariance = projected_mean[:2], projected_covariance[:2, :2]\n            measurements = measurements[:, :2]\n        max_val = np.amax(projected_covariance)\n        # LIMIT = max(mean[2], mean[3]) #*(1 + abs(3*mean[0]/self.img_center - 1))\n        # print(projected_covariance)\n        if max_val > self.LIMIT:\n            projected_covariance *= self.LIMIT / max_val\n        return EKF.squared_mahalanobis_distance(projected_mean, projected_covariance, measurements)\n\nclass RandomWalkKalmanFilter2D(KalmanFilter2D): #TODO UPDATE THIS DOCUMENTATION\n    \"\"\"\n    A simple Kalman filter for tracking bounding boxes in image space.\n\n    The 8-dimensional state space\n\n        x, y, w, h\n\n    contains the bounding box center position (x, y), aspect ratio a, height h,\n    and their respective velocities.\n\n    Object motion follows a constant velocity model. The bounding box location\n    (x, y, a, h) is taken as direct observation of the state space (linear\n    observation model).\n\n    \"\"\"\n    def __init__(self, pos_weight, velocity_weight, std_process, std_measurement, initial_uncertainty, img_center=1242):\n        ndim, dt = 4, 1.\n        self.ndim = ndim\n        self.img_center = img_center\n        # Create Kalman filter model matrices.\n        # Motion model is constant velocity, i.e. x = x + Vx*dt\n        self._motion_mat = np.eye(2*ndim, 2*ndim)\n        self._motion_mat[ndim:, ndim:] = 0\n        # Sensor model is direct observation, i.e. x = x\n        self._observation_mat = np.eye(ndim, 2*ndim)\n\n        # Motion and observation uncertainty are chosen relative to the current\n        # state estimate. These weights control the amount of uncertainty in\n        # the model. This is a bit hacky.\n        self._std_weight_process = std_process\n        self._std_weight_measurement = std_measurement\n        self._std_weight_pos = pos_weight\n        self._std_weight_vel = velocity_weight\n        self._initial_uncertainty = initial_uncertainty\n"
  },
  {
    "path": "paper_experiments/utils/kf_3d.py",
    "content": "# vim: expandtab:ts=4:sw=4\nimport numpy as np\nimport scipy.linalg\nimport EKF\nimport pdb\n\n\nclass KalmanFilter3D(EKF.EKF):\n    \"\"\"\n    A simple 3D Kalman filter for tracking bounding cuboids in 3d.\n\n    The 12-dimensional state space\n\n        x, y, l, h, w, theta, Vx, Vy, Vl, Vh, Vw, Vtheta\n\n    contains the bounding box center position (x, y), width w, height h,\n    length l, heading theta, and their respective velocities.\n\n    Object motion follows a constant velocity model. The bounding box location\n    is taken as direct observation of the state space (linear observation model).\n\n    \"\"\"\n\n    def __init__(self):\n        ndim, dt = 6, 1.\n        self.ndim = ndim\n\n        # Create Kalman filter model matrices.\n        # Motion model is constant velocity, i.e. x = x + Vx*dt\n        self._motion_mat = np.eye(2 * ndim, 2 * ndim)\n        for i in range(ndim):\n            self._motion_mat[i, ndim + i] = dt\n        # Sensor model is direct observation, i.e. x = x\n        self._update_mat = np.eye(ndim, 2 * ndim)\n\n        # Motion and observation uncertainty are chosen relative to the current\n        # state estimate. These weights control the amount of uncertainty in\n        # the model. This is a bit hacky.\n        self._std_motion_pos = 0.8\n        self._std_motion_vel = 0.1\n        self._std_motion_theta= 0.017*1 # ~1 degrees\n        self._std_motion_omega = 0.017*0.1 # ~0.1 degrees\n\n        self._std_sensor_pos = 0.8\n        self._std_sensor_vel = 0.1\n        self._std_sensor_theta= 0.017*5 # ~5 degrees\n\n        std_pos = [\n            self._std_motion_pos, # x\n            self._std_motion_pos, # y\n            self._std_motion_pos, # l\n            self._std_motion_pos, # h\n            self._std_motion_pos, # w            \n            self._std_motion_theta # theta\n            ]\n        std_vel = [\n            self._std_motion_vel, # x\n            self._std_motion_vel, # y\n            self._std_motion_vel, # l\n            self._std_motion_vel, # h\n            self._std_motion_vel, # w            \n            self._std_motion_omega # omega\n            ]\n        self._motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))\n\n        std = [\n            self._std_sensor_pos, # x\n            self._std_sensor_pos, # y\n            self._std_sensor_pos, # l\n            self._std_sensor_pos, # h\n            self._std_sensor_pos, # w\n            self._std_sensor_theta # theta\n            ]\n        self._innovation_cov = np.diag(np.square(std))\n\n    def initiate(self, measurement):\n        \"\"\"Create track from unassociated measurement.\n\n        Parameters\n        ----------\n        measurement : ndarray\n            Bounding box coordinates (x, y, l, h, w, theta) \n\n        Returns\n        -------\n        (ndarray, ndarray)\n            Returns the mean vector (12 dimensional) and covariance matrix (12x12\n            dimensional) of the new track. Unobserved velocities are initialized\n            to 0 mean.\n\n        \"\"\"\n        mean_pos = measurement\n        mean_vel = np.zeros_like(mean_pos)\n        mean = np.r_[mean_pos, mean_vel]\n\n        # Initialize covariance \n        std = [ 2,  2,  2,  2,  2,  2,\n               10, 10, 10, 10, 10, 10\n              ]\n        covariance = self._motion_cov * np.diag(np.square(std))\n        return mean, covariance\n\n    def motion_update(self, mean, covariance):\n        # Updates predicted state from previous state (function g)\n        # Calculates motion update Jacobian (Gt)\n        # Returns (g(mean), Gt)\n        mean = np.dot(self._motion_mat, mean)\n        return mean, self._motion_mat\n\n    def get_motion_cov(self, mean, covariance):\n        # Returns Rt the motion noise covariance\n\n        return self._motion_cov\n\n    def sensor_update(self, mean, covariance):\n        # Measurement prediction from state (function h)\n        # Calculations sensor update Jacobian (Ht)\n        # Returns (h(mean), Ht)\n        mean = np.dot(self._update_mat, mean)\n        return mean, self._update_mat\n\n    def get_innovation_cov(self, mean, covariance):\n        # Returns Qt the sensor noise covariance\n        return self._innovation_cov\n\n    def adjust_angle(self, measured, target):\n        step = 2*np.pi\n        measured += step*np.round((target - measured)/step)\n        return measured\n\n    def update(self, mean, covariance, meas_in, marginalization=None, JPDA=False):\n        measurement = np.copy(meas_in)\n        if measurement.ndim == 1:\n            measurement[5] = self.adjust_angle(measurement[5], mean[5])\n        else:\n            measurement[:,5] = self.adjust_angle(measurement[:,5], mean[5])\n        return EKF.EKF.update(self, mean, covariance, measurement, marginalization, JPDA)\n\n    def gating_distance(self, mean, covariance, measurements,\n                        only_position=False):\n        \"\"\"Compute gating distance between state distribution and measurements.\n\n        A suitable distance threshold can be obtained from `chi2inv95`. If\n        `only_position` is False, the chi-square distribution has 6 degrees of\n        freedom, otherwise 2.\n\n        Parameters\n        ----------\n        mean : ndarray\n            Mean vector over the state distribution (8 dimensional).\n        covariance : ndarray\n            Covariance of the state distribution (8x8 dimensional).\n        measurements : ndarray\n            An Nx4 dimensional matrix of N measurements, each in\n            format (x, y, a, h) where (x, y) is the bounding box center\n            position, a the aspect ratio, and h the height.\n        only_position : Optional[bool]\n            If True, distance computation is done with respect to the bounding\n            box center position only.\n\n        Returns\n        -------\n        ndarray\n            Returns an array of length N, where the i-th element contains the\n            squared Mahalanobis distance between (mean, covariance) and\n            `measurements[i]`.\n\n        \"\"\"\n        mean, covariance = self.project(mean, covariance)\n        if only_position:\n            mean, covariance = mean[:2], covariance[:2, :2]\n            measurements = measurements[:, :2]\n\n        return EKF.squared_mahalanobis_distance(mean, covariance, measurements)\n"
  },
  {
    "path": "paper_experiments/utils/linear_assignment.py",
    "content": "# vim: expandtab:ts=4:sw=4\nfrom __future__ import absolute_import\nimport numpy as np\nfrom sklearn.utils.linear_assignment_ import linear_assignment\nimport EKF\nimport pdb\nfrom mbest_ilp import new_m_best_sol\nfrom multiprocessing import Pool\nfrom functools import partial\n#from mbest_ilp import m_best_sol as new_m_best_sol\n\nINFTY_COST = 1e+5\n\ndef min_marg_matching(marginalizations, track_indices=None, max_distance=1):\n    cost_matrix = 1 - marginalizations\n    num_tracks, num_detections = cost_matrix.shape\n    if track_indices is None:\n        track_indices = np.arange(num_tracks)\n\n    detection_indices = np.arange(num_detections-1)\n\n    if num_tracks == 0 or num_detections == 0:\n        return [], track_indices, detection_indices  # Nothing to match.\n\n    extra_dummy_cols = np.tile(cost_matrix[:,0,np.newaxis], (1, num_tracks-1))\n    expanded_cost_matrix = np.hstack((extra_dummy_cols, cost_matrix))\n    indices = linear_assignment(expanded_cost_matrix)\n\n    matches, unmatched_tracks, unmatched_detections = [], [], []\n\n    # gather unmatched detections (new track)\n    for col, detection_idx in enumerate(detection_indices):\n        if col+num_tracks not in indices[:, 1]:\n            unmatched_detections.append(detection_idx)\n\n    # gather unmatched tracks (no detection)\n    for row, track_idx in enumerate(track_indices):\n        if row not in indices[:, 0]:\n            unmatched_tracks.append(track_idx)\n\n    # thresholding and matches\n    for row, col in indices:\n\n        track_idx = track_indices[row]\n        detection_idx = col - num_tracks\n        if detection_idx < 0:\n            unmatched_tracks.append(track_idx)\n            continue\n\n        if expanded_cost_matrix[row, col] > max_distance:\n            # apply thresholding\n            unmatched_tracks.append(track_idx)\n            unmatched_detections.append(detection_idx)\n        else:\n            # associate matches\n            matches.append((track_idx, detection_idx))\n\n    return matches, unmatched_tracks, unmatched_detections\n\ndef min_cost_matching(\n        distance_metric, max_distance, tracks, detections, track_indices=None,\n        detection_indices=None, compare_2d = False, detections_3d=None):\n    \"\"\"Solve linear assignment problem.\n\n    Parameters\n    ----------\n    distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray\n        The distance metric is given a list of tracks and detections as well as\n        a list of N track indices and M detection indices. The metric should\n        return the NxM dimensional cost matrix, where element (i, j) is the\n        association cost between the i-th track in the given track indices and\n        the j-th detection in the given detection_indices.\n    max_distance : float\n        Gating threshold. Associations with cost larger than this value are\n        disregarded.\n    tracks : List[track.Track]\n        A list of predicted tracks at the current time step.\n    detections : List[detection.Detection]\n        A list of detections at the current time step.\n    track_indices : List[int]\n        List of track indices that maps rows in `cost_matrix` to tracks in\n        `tracks` (see description above).\n    detection_indices : List[int]\n        List of detection indices that maps columns in `cost_matrix` to\n        detections in `detections` (see description above).\n\n    Returns\n    -------\n    (List[(int, int)], List[int], List[int])\n        Returns a tuple with the following three entries:\n        * A list of matched track and detection indices.\n        * A list of unmatched track indices.\n        * A list of unmatched detection indices.\n\n    \"\"\"\n    if track_indices is None:\n        track_indices = np.arange(len(tracks))\n    if detection_indices is None:\n        detection_indices = np.arange(len(detections))\n\n    if len(detection_indices) == 0 or len(track_indices) == 0:\n        return [], track_indices, detection_indices  # Nothing to match.\n\n    cost_matrix = distance_metric(\n        tracks, detections, track_indices, detection_indices, compare_2d, detections_3d)\n    cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5\n\n    #print(\"\\n\\nCascade Cost Matrix: \", cost_matrix)\n\n    indices = linear_assignment(cost_matrix)\n\n    matches, unmatched_tracks, unmatched_detections = [], [], []\n\n    # gather unmatched detections (new track)\n    for col, detection_idx in enumerate(detection_indices):\n        if col not in indices[:, 1]:\n            unmatched_detections.append(detection_idx)\n\n    # gather unmatched trackes (no detection)\n    for row, track_idx in enumerate(track_indices):\n        if row not in indices[:, 0]:\n            unmatched_tracks.append(track_idx)\n\n    # thresholding and matches\n    for row, col in indices:\n\n        track_idx = track_indices[row]\n        detection_idx = detection_indices[col]\n\n        if cost_matrix[row, col] > max_distance:\n            # apply thresholding\n            unmatched_tracks.append(track_idx)\n            unmatched_detections.append(detection_idx)\n        else:\n            # associate matches\n            matches.append((track_idx, detection_idx))\n\n    return matches, unmatched_tracks, unmatched_detections\n\n# @profile\ndef JPDA(\n        distance_metric, dummy_node_cost_app, dummy_node_cost_iou, tracks, detections, track_indices=None,\n        detection_indices=None, m=1, compare_2d = False, windowing = False):\n    \"\"\"Solve linear assignment problem.\n\n    Parameters\n    ----------\n    distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray\n        The distance metric is given a list of tracks and detections as well as\n        a list of N track indices and M detection indices. The metric should\n        return the NxM dimensional cost matrix, where element (i, j) is the\n        association cost between the i-th track in the given track indices and\n        the j-th detection in the given detection_indices.\n    max_distance : float\n        Gating threshold. Associations with cost larger than this value are\n        disregarded.\n    tracks : List[track.Track]\n        A list of predicted tracks at the current time step.\n    detections : List[detection.Detection]\n        A list of detections at the current time step.\n    track_indices : List[int]\n        List of track indices that maps rows in `cost_matrix` to tracks in\n        `tracks` (see description above).\n    detection_indices : List[int]\n        List of detection indices that maps columns in `cost_matrix` to\n        detections in `detections` (see description above).\n\n    Returns\n    -------\n    (List[(int, int)], List[int], List[int])\n        Returns a tuple with the following three entries:\n        * A list of matched track and detection indices.\n        * A list of unmatched track indices.\n        * A list of unmatched detection indices.\n\n    \"\"\"\n    if track_indices is None:\n        track_indices = np.arange(len(tracks))\n    if detection_indices is None:\n        detection_indices = np.arange(len(detections))\n\n    if len(detection_indices) == 0 or len(track_indices) == 0:\n        return np.zeros((0, len(detections) + 1))  # Nothing to match.\n    cost_matrix, gate_mask = distance_metric(\n        tracks, detections, track_indices, detection_indices, compare_2d)\n    \n    num_tracks, num_detections = cost_matrix.shape[0], cost_matrix.shape[1]\n    cost_matrix[gate_mask] = INFTY_COST\n    \n    # print(\"\\nIOU Cost Matrix:\", cost_matrix[:,:,0])\n    # print(\"App:\", cost_matrix[:,:,1])\n\n    clusters = find_clusters(cost_matrix[:,:,0], INFTY_COST - 0.0001)\n    # print('\\n', clusters)\n\n    jpda_output = []\n    for cluster in clusters:\n        jpda_output.append(get_JPDA_output(cluster, cost_matrix, dummy_node_cost_app, dummy_node_cost_iou, INFTY_COST - 0.0001, m))\n    if not jpda_output:\n        mc = np.zeros((num_tracks, num_detections + 1))\n        mc[:, 0] = 1\n        return mc\n    assignments, assignment_cost = zip(*jpda_output)\n    assignments = np.vstack([item for sublist in assignments for item in sublist])\n    assignment_cost = np.array([item for sublist in assignment_cost for item in sublist])\n\n    marginalised_cost = np.sum(assignments*np.exp(-np.expand_dims(assignment_cost, 1)), axis = 0)\n    marginalised_cost = np.reshape(marginalised_cost, (num_tracks, num_detections+1))\n    # print('\\n', marginalised_cost)\n    return marginalised_cost\n\ndef calculate_entropy(matrix, idx, idy):\n    mask = np.ones(matrix.shape)\n    mask[idx, idy] = 0\n    entropy = matrix/np.sum(mask*matrix, axis=1, keepdims=True)\n    entropy = (-entropy*np.log(entropy)) * mask\n    entropy = np.mean(np.sum(entropy, axis=1))\n    return entropy\n\ndef get_JPDA_output(cluster, cost_matrix, dummy_node_cost_app, dummy_node_cost_iou, cutoff, m):\n    if len(cluster[1]) == 0:\n        assignment = np.zeros((cost_matrix.shape[0], cost_matrix.shape[1]+1))\n        assignment[cluster[0], 0] = 1\n        assignment = assignment.reshape(1,-1)\n        return [assignment], np.array([0])\n    \n    new_cost_matrix_appearance = np.reshape(cost_matrix[np.repeat(cluster[0], len(cluster[1])), \n                                        np.tile(cluster[1] - 1, len(cluster[0])), \n                                        [0]*(len(cluster[1])*len(cluster[0]))], \n                                        (len(cluster[0]), len(cluster[1])))\n    new_cost_matrix_iou = np.reshape(cost_matrix[np.repeat(cluster[0], len(cluster[1])), np.tile(cluster[1] - 1, len(cluster[0])), 1], \n                (len(cluster[0]), len(cluster[1])))\n    idx_x, idx_y = np.where(new_cost_matrix_appearance > cutoff)\n    appearance_entropy = calculate_entropy(new_cost_matrix_appearance, idx_x, idx_y)\n    iou_entropy = calculate_entropy(new_cost_matrix_iou, idx_x, idx_y)\n    if appearance_entropy < iou_entropy:\n        new_cost_matrix = new_cost_matrix_appearance\n        new_cost_matrix = 2*np.ones(new_cost_matrix.shape)/(new_cost_matrix+1) - 1\n        dummy_node_cost = -np.log(2/(dummy_node_cost_app+1) - 1)\n    else:\n        new_cost_matrix = new_cost_matrix_iou\n        new_cost_matrix[new_cost_matrix==1] -= 1e-3\n        new_cost_matrix = 1 - new_cost_matrix\n        dummy_node_cost = -np.log(1-dummy_node_cost_iou)\n    new_cost_matrix = -np.log(new_cost_matrix)\n    new_cost_matrix[idx_x, idx_y] = cutoff\n    if len(cluster[0]) == 1:\n        new_cost_matrix = np.concatenate([np.ones((new_cost_matrix.shape[0], 1))*dummy_node_cost, new_cost_matrix], axis = 1)\n        total_cost = np.sum(np.exp(-new_cost_matrix))\n        new_assignment = np.zeros((cost_matrix.shape[0], cost_matrix.shape[1]+1))\n        new_assignment[np.repeat(cluster[0], len(cluster[1])+1), np.tile(\n                        np.concatenate([np.zeros(1, dtype = np.int32), cluster[1]]), len(cluster[0]))] = np.exp(-new_cost_matrix)/total_cost\n        new_assignment = new_assignment.reshape(1, -1)\n        return  [new_assignment], np.array([0])\n    if new_cost_matrix.ndim <= 1:\n        new_cost_matrix = np.expand_dims(new_cost_matrix, 1)\n\n    # print(new_cost_matrix)\n    assignments, assignment_cost = new_m_best_sol(new_cost_matrix, m, dummy_node_cost)\n    offset = np.amin(assignment_cost)\n    assignment_cost -= offset\n    new_assignments = []\n    total_cost = np.sum(np.exp(-assignment_cost))\n    for assignment in assignments:\n        new_assignment = np.zeros((cost_matrix.shape[0], cost_matrix.shape[1]+1))\n        new_assignment[np.repeat(cluster[0], len(cluster[1])+1), np.tile(\n                    np.concatenate([np.zeros(1, dtype = np.int32), cluster[1]]), len(cluster[0]))] = \\\n                                                assignment/total_cost\n        new_assignments.append(new_assignment.reshape(1, -1))\n    return new_assignments, assignment_cost\n\n\ndef matching_cascade(\n        distance_metric, max_distance, cascade_depth, tracks, detections,\n        track_indices=None, detection_indices=None, compare_2d = False, detections_3d=None):\n    \"\"\"Run matching cascade.\n\n    Parameters\n    ----------\n    distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray\n        The distance metric is given a list of tracks and detections as well as\n        a list of N track indices and M detection indices. The metric should\n        return the NxM dimensional cost matrix, where element (i, j) is the\n        association cost between the i-th track in the given track indices and\n        the j-th detection in the given detection indices.\n    max_distance : float\n        Gating threshold. Associations with cost larger than this value are\n        disregarded.\n    cascade_depth: int\n        The cascade depth, should be se to the maximum track age.\n    tracks : List[track.Track]\n        A list of predicted tracks at the current time step.\n    detections : List[detection.Detection]\n        A list of detections at the current time step.\n    track_indices : Optional[List[int]]\n        List of track indices that maps rows in `cost_matrix` to tracks in\n        `tracks` (see description above). Defaults to all tracks.\n    detection_indices : Optional[List[int]]\n        List of detection indices that maps columns in `cost_matrix` to\n        detections in `detections` (see description above). Defaults to all\n        detections.\n\n    Returns\n    -------\n    (List[(int, int)], List[int], List[int])\n        Returns a tuple with the following three entries:\n        * A list of matched track and detection indices.\n        * A list of unmatched track indices.\n        * A list of unmatched detection indices.\n\n    \"\"\"\n    if track_indices is None:\n        track_indices = list(range(len(tracks)))\n    if detection_indices is None:\n        detection_indices = list(range(len(detections)))\n\n    unmatched_detections = detection_indices\n    matches = []\n    for level in range(cascade_depth):\n        if len(unmatched_detections) == 0:  # No detections left\n            break\n\n        track_indices_l = [\n            k for k in track_indices\n            if tracks[k].time_since_update == 1 + level\n        ]\n        if len(track_indices_l) == 0:  # Nothing to match at this level\n            continue\n\n        matches_l, _, unmatched_detections = \\\n            min_cost_matching(\n                distance_metric, max_distance, tracks, detections,\n                track_indices_l, unmatched_detections, compare_2d, detections_3d=detections_3d)\n        matches += matches_l\n    unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))\n    return matches, unmatched_tracks, unmatched_detections\n\n# @profile\ndef gate_cost_matrix(\n        kf, tracks, detections, track_indices, detection_indices,\n        gated_cost=INFTY_COST, only_position=False, use3d=False, windowing = False):\n    \"\"\"Invalidate infeasible entries in cost matrix based on the state\n    distributions obtained by Kalman filtering.\n\n    Parameters\n    ----------\n    kf : The Kalman filter.\n    cost_matrix : ndarray\n        The NxM dimensional cost matrix, where N is the number of track indices\n        and M is the number of detection indices, such that entry (i, j) is the\n        association cost between `tracks[track_indices[i]]` and\n        `detections[detection_indices[j]]`.\n    tracks : List[track.Track]\n        A list of predicted tracks at the current time step.\n    detections : List[detection.Detection]\n        A list of detections at the current time step.\n    track_indices : List[int]\n        List of track indices that maps rows in `cost_matrix` to tracks in\n        `tracks` (see description above).\n    detection_indices : List[int]\n        List of detection indices that maps columns in `cost_matrix` to\n        detections in `detections` (see description above).\n    gated_cost : Optional[float]\n        Entries in the cost matrix corresponding to infeasible associations are\n        set this value. Defaults to a very large value.\n    only_position : Optional[bool]\n        If True, only the x, y position of the state distribution is considered\n        during gating. Defaults to False.\n\n    Returns\n    -------\n    ndarray\n        Returns the modified cost matrix.\n\n    \"\"\"\n   \n    # assert (len(track_indices) == cost_matrix.shape[0]), \"Cost matrix shape does not match track indices\"\n    # assert (len(detection_indices) == cost_matrix.shape[1]), \"Cost matrix shape does match detection indices\"\n\n    if len(track_indices) == 0 or len(detection_indices) == 0:\n        return None\n\n    if use3d:\n        measurements = np.array([det.box_3d for i, det in enumerate(detections) if i in detection_indices])\n    else:\n        measurements = np.asarray(\n            [detections[i].to_xywh() for i in detection_indices])\n    if use3d and only_position:\n        gating_dim = 3\n    elif use3d:\n        gating_dim =  measurements.shape[1]\n    elif only_position:\n        gating_dim = 2\n    else:\n        gating_dim =  measurements.shape[1]\n    gating_threshold = EKF.chi2inv95[gating_dim]\n    gate_mask = []\n    for track_idx in track_indices:\n        track = tracks[track_idx]\n        gating_distance = kf.gating_distance(\n            track.mean, track.covariance, measurements, only_position, use3d)\n        gated_set = gating_distance > gating_threshold\n        if np.all(gated_set):\n            gated_set = gating_distance > gating_threshold * 3\n        # print(track.track_id, gating_threshold, gating_distance)\n        gate_mask.append(gated_set)\n        # print(gated_set)\n    return np.vstack(gate_mask)\n\ndef find_clusters(cost_matrix, cutoff):\n    num_tracks, _ = cost_matrix.shape\n    clusters = []\n    total_tracks = 0\n    total_detections = 0\n    all_tracks = set(range(num_tracks))\n    all_visited_tracks = set()\n    while total_tracks < num_tracks:\n        visited_detections = set()\n        visited_tracks = set()\n        potential_track = next(iter(all_tracks - all_visited_tracks))\n        potential_tracks = set()\n        potential_tracks.add(potential_track)\n        while potential_tracks:\n            current_track = potential_tracks.pop()\n            visited_detections.update((np.where(cost_matrix[current_track] < cutoff)[0])+1)\n            visited_tracks.add(current_track)\n            for detection in visited_detections:\n                connected_tracks = np.where(cost_matrix[:, detection - 1] < cutoff)[0]\n                for track in connected_tracks:\n                    if track in visited_tracks or track in potential_tracks:\n                        continue\n                    potential_tracks.add(track)\n        total_tracks += len(visited_tracks)\n        total_detections += len(visited_detections)\n        all_visited_tracks.update(visited_tracks)\n        clusters.append((np.array(list(visited_tracks), dtype = np.int32), np.array(list(visited_detections), dtype = np.int32)))\n    return clusters\n"
  },
  {
    "path": "paper_experiments/utils/logger.py",
    "content": "import io, os\nimport numpy as np\nfrom PIL import Image\nimport tensorflow as tf\n#courtesy https://becominghuman.ai/logging-in-tensorboard-with-pytorch-or-any-other-library-c549163dee9e\nclass Logger:\n    def __init__(self, logdir):\n        if not os.path.exists(logdir):\n            os.makedirs(logdir)\n        self.writer = tf.summary.FileWriter(logdir)\n\n    def close(self):\n        self.writer.close()\n\n    def log_scalar(self, tag, value, global_step):\n        summary = tf.Summary()\n        summary.value.add(tag=tag, simple_value=value)\n        self.writer.add_summary(summary, global_step=global_step)\n        self.writer.flush()\n        \n    def log_image(self, tag, img, global_step):\n        s = io.BytesIO()\n        Image.fromarray(img).save(s, format='png')\n\n        img_summary = tf.Summary.Image(encoded_image_string=s.getvalue(),\n                                   height=img.shape[0],\n                                   width=img.shape[1])\n\n        summary = tf.Summary()\n        summary.value.add(tag=tag, image=img_summary)\n        self.writer.add_summary(summary, global_step=global_step)\n        self.writer.flush()\n"
  },
  {
    "path": "paper_experiments/utils/mbest_ilp.py",
    "content": "from gurobipy import Model, quicksum, LinExpr, GRB\nimport numpy as np\nimport copy\nimport time\nfrom sklearn.utils.linear_assignment_ import linear_assignment\nimport pickle\nimport itertools\nimport pdb\nfrom copy import deepcopy\nimport math\n\"\"\"\nFn: ilp_assignment\n------------------\nSolves ILP problem using gurobi\n\"\"\"\ndef ilp_assignment(model):\n    \n    model.optimize()\n    if(model.status == 3):\n        return -1\n    return\n\n\"\"\"\nFn: initialize_model\n--------------------\nInitializes gurobi ILP model by setting the base objective\n\"\"\"\n# @profile\ndef initialize_model(cost_matrix, cutoff, model = None):\n    #Add dummy detection\n    cost_matrix = np.insert(cost_matrix,0, np.ones(cost_matrix.shape[0])*cutoff, axis=1)\n    M,N = cost_matrix.shape\n    if model is None:\n        model = Model()\n    else:\n        model.remove(model.getVars())\n        model.remove(model.getConstrs())\n    model.setParam('OutputFlag', False)\n    # y = []\n    # for i in range(M):\n    #     y.append([])\n    #     for j in range(N):\n    #         y[i].append(m.addVar(vtype=GRB.BINARY, name = 'y_%d%d'%(i,j)))\n    y = model.addVars(M,N, vtype=GRB.BINARY, name = 'y')\n    model.setObjective(quicksum(quicksum([y[i,j]*cost_matrix[i][j] for j in range(N)]) for i in range(M)), GRB.MINIMIZE)\n    # for i in range(M):\n    model.addConstrs((quicksum(y[i,j] for j in range(N))==1 for i in range(M)), name='constraint for track')\n    # for j in range(1,N):\n    model.addConstrs((quicksum(y[i,j] for i in range(M))<=1 for j in range(1, N)), name='constraint for detection')\n    y = list(y.values())\n    return model, M, N, y\n\n\"\"\"\nFn: m_best_sol\n--------------\nFinds m_best solutions for object/track association givent the\ninput cost matrix. Solves constrained ILP problems using gurobi solver.\n\"\"\"\ndef cache(func):\n    cache = {}\n    def cached_function(*args):\n        cost_matrix = args[0]\n        cost_matrix = np.hstack((np.ones((cost_matrix.shape[0], 1))*args[1], cost_matrix))\n        if (cost_matrix.shape[0], cost_matrix.shape[1]) in cache:\n            solution_list = cache[(cost_matrix.shape[0], cost_matrix.shape[1])]\n            solution_vals = np.sum(solution_list*cost_matrix.reshape(1, -1), axis = 1)\n            return solution_list, solution_vals\n        else: \n            solution_list, solution_vals = func(*args)\n            cache[(cost_matrix.shape[0], cost_matrix.shape[1])] = solution_list\n            return solution_list, solution_vals\n    return cached_function\n# @profile\ndef num_solutions(cost_matrix):\n    M,N = cost_matrix.shape\n    N += 1\n    count = 0\n    for i in range(min(M+1, N)):\n        count += np.prod(range(M-i+1, M+1))*np.prod(range(N-i, N))//math.factorial(i)\n        if count > 2000:\n            break\n    return int(count)\n\n@cache\ndef enumerate_solutions(cost_matrix, cutoff, num_solutions):\n    # num_solutions = [[2, 3, 4, 5, 6, 7],[3, 7, 13, 21, 31],[4, 13, 34, 73, 136],[5, 21, 73, 209, 501],[6, 31, 136, 501, 1546], [7]]\n    cost_matrix = np.hstack((np.ones((cost_matrix.shape[0], 1))*cutoff, cost_matrix))\n    M,N = cost_matrix.shape\n    solution_list = np.zeros((num_solutions, M, N), dtype = np.int32)\n    solution_list[:, :, 0] = 1\n    count = 0\n    for i in range(min(M+1, N)):\n        for chosen in itertools.combinations(range(M), i):\n            for perm in itertools.permutations(range(1,N), i):\n                if chosen:\n                    solution_list[[count]*len(chosen), chosen, perm] = 1\n                    solution_list[[count]*len(chosen), chosen, [0]*len(chosen)] = 0\n                count += 1\n    solution_vals = np.sum(np.sum(solution_list*np.expand_dims(cost_matrix, 0), axis = 1), axis = 1)\n    solution_list = np.reshape(solution_list, (num_solutions, -1))\n    return solution_list, solution_vals\n\n\ndef new_m_best_sol(cost_matrix, m_sol, cutoff, model = None):\n    sols = num_solutions(cost_matrix)\n    if sols <= 2000:\n        return enumerate_solutions(cost_matrix, cutoff, sols)\n    model, M, N, y = initialize_model(cost_matrix, cutoff, model)\n    X = np.zeros((m_sol, M*N))\n    xv = []\n    if (ilp_assignment(model) == -1):\n        xv.append(0)\n    else:\n        x = model.getAttr(\"X\", y)\n        X[0] = x\n        xv.append(model.objVal)\n    if m_sol > 1:\n        model.addConstr(LinExpr(x,y) <= M-1, name = 'constraint_0')\n        if (ilp_assignment(model) == -1):\n            xv.append(0)\n        else:\n            x = model.getAttr(\"X\", y)\n            X[1] = x\n            xv.append(model.objVal)\n    if m_sol > 2:\n        model.remove(model.getConstrByName('constraint_0'))\n        second_best_solutions = []\n        second_best_solution_vals = []\n        partitions = []\n        j = np.argmax(np.logical_xor(X[0], X[1]))\n        partitions.append([j])\n        partitions.append([j])\n        model.addConstr(y[j]==X[0][j], name = 'partition_constraint')\n        model.addConstr(LinExpr(X[0], y) <= M-1, name = 'non_equality_constraint')\n        ilp_assignment(model)\n        second_best_solutions.append(model.getAttr(\"X\", y))\n        second_best_solution_vals.append(model.objVal)\n        model.remove(model.getConstrByName('non_equality_constraint'))\n        model.remove(model.getConstrByName('partition_constraint'))\n        model.addConstr(y[j]==X[1][j], name = 'partition_constraint')\n        model.addConstr(LinExpr(X[1], y) <= M-1, name = 'non_equality_constraint')\n        ilp_assignment(model)\n        second_best_solution_vals.append(model.objVal)\n        second_best_solutions.append(model.getAttr(\"X\", y))\n        model.remove(model.getConstrByName('non_equality_constraint'))\n        model.remove(model.getConstrByName('partition_constraint'))\n        \n        for m in range(2, m_sol):\n            l_k = np.argmin(second_best_solution_vals)\n            X[m] = second_best_solutions[l_k]\n            xv.append(second_best_solution_vals[l_k])\n            if m==m_sol-1:\n                break\n            j = np.argmax(np.logical_xor(X[m], X[l_k]))\n            parent_partition = partitions[l_k]\n            constrs = []\n            for idx in parent_partition:\n                constrs.append(model.addConstr(y[idx]==X[l_k, idx]))\n            model.addConstr(y[j]==X[m][j], name = 'partition_constraint_new')\n            model.addConstr(LinExpr(X[m], y) <= M-1, name = 'non_equality_constraint')\n            if(ilp_assignment(model) == -1):\n                second_best_solutions.append(np.ones((M,N)))\n                second_best_solution_vals.append(np.inf)\n            else:\n                second_best_solutions.append(model.getAttr(\"X\", y))\n                second_best_solution_vals.append(model.objVal)\n            model.remove(model.getConstrByName('partition_constraint_new'))\n            model.remove(model.getConstrByName('non_equality_constraint'))\n            model.addConstr(LinExpr(X[l_k], y) <= M-1, name = 'non_equality_constraint')\n            model.addConstr(y[j]==X[l_k][j], name = 'partition_constraint_new')\n            if(ilp_assignment(model) == -1):\n                second_best_solution_vals[l_k] = np.inf\n                second_best_solutions[l_k] = np.ones((M,N))\n            else:\n                second_best_solution_vals[l_k] = model.objVal\n                second_best_solutions[l_k] = model.getAttr(\"X\", y)\n            model.remove(model.getConstrByName('partition_constraint_new'))\n            model.remove(model.getConstrByName('non_equality_constraint'))\n            partitions[l_k].append(j)\n            partitions.append(copy.deepcopy(partitions[l_k]))\n            for constr in constrs:\n                model.remove(constr)\n\n\n\n    # X = np.asarray(X)\n    xv = np.asarray(xv)\n    return X, xv\ndef linear_assignment_wrapper(a):\n    return linear_assignment(a)\n\nif __name__=='__main__':\n    # a = np.random.randn(100,100)\n    # # cProfile.run('m_best_sol(a,1,10)', 'mbest.profile')\n    # # cProfile.run('linear_assignment(a)', 'hungarian.profile')\n    # total = 0\n    # for i in range(10):\n    #     start = time.time()\n    #     _, sol_cost = m_best_sol(a, 1, 10)\n    #     end = time.time()\n    #     total+= end-start\n    # print(\"Time for JPDA m=1, is %f\"%(total/10))\n    # total = 0\n    # for i in range(10):\n    #     start = time.time()\n    #     ass = linear_assignment(a)\n    #     end = time.time()\n    #     total+= end-start\n    # print(\"Time for Hungarian, is %f\"%(total/10))\n    \n    np.random.seed(14295)\n    # Check JPDA matches Hungarian\n    # while True:\n    #     print('*******')\n    #     a = np.random.randn(100,100)\n    #     X, _ = new_m_best_sol(a, 1, 10)\n    #     X = np.reshape(X[0], (100,101))[:,1:]\n    #     ass = linear_assignment(a)\n    #     output_hungarian = np.zeros(a.shape)\n    #     output_hungarian[ass[:,0], ass[:, 1]] = 1\n    #     assert(np.all(output_hungarian==X))\n    #\n    # Output to file to check\n\n    #  np.random.seed(14295)\n    # vals = []\n    # a = np.random.randn(5,5)\n    a = np.array([[0.1,0.6,0.2,0.3],[0.4,0.1,0.9,0.4],[0.3,0.5,0.1,0.7],[0.8,0.2,0.2,0.1]])\n    num_solutions(a)\n    # enumerate_solutions(a.shape[0], a.shape[1]+1)\n    # ass = linear_assignment_wrapper(a)\n    # m = Model()\n    sols, vals = new_m_best_sol(a, 100, 10)\n    for i, val in enumerate(vals):\n        print(np.reshape(sols[i], (4,5)), val)\n    # print(np.reshape(sols[1], (4,5)), vals[1])\n    # print(np.reshape(sols[2], (4,5)), vals[2])\n    # print(np.reshape(sols[3], (4,5)), vals[3])\n\n    # with open('test.pkl', 'wb') as f:\n    #     pickle.dump(vals, f)\n"
  },
  {
    "path": "paper_experiments/utils/nn_matching.py",
    "content": "# vim: expandtab:ts=4:sw=4\nimport numpy as np\nimport pdb\nimport torch\n\ndef _pdist(a, b):\n    \"\"\"Compute pair-wise squared distance between points in `a` and `b`.\n\n    Parameters\n    ----------\n    a : array_like\n        An NxM matrix of N samples of dimensionality M.\n    b : array_like\n        An LxM matrix of L samples of dimensionality M.\n\n    Returns\n    -------\n    ndarray\n        Returns a matrix of size len(a), len(b) such that eleement (i, j)\n        contains the squared distance between `a[i]` and `b[j]`.\n\n    \"\"\"\n    a, b = np.asarray(a), np.asarray(b)\n    if len(a) == 0 or len(b) == 0:\n        return np.zeros((len(a), len(b)))\n    a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1)\n    r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :]\n    r2 = np.clip(r2, 0., float(np.inf))\n    return r2\n\n\ndef _cosine_distance(a, b, data_is_normalized=False):\n    \"\"\"Compute pair-wise cosine distance between points in `a` and `b`.\n\n    Parameters\n    ----------\n    a : array_like\n        An NxM matrix of N samples of dimensionality M.\n    b : array_like\n        An LxM matrix of L samples of dimensionality M.\n    data_is_normalized : Optional[bool]\n        If True, assumes rows in a and b are unit length vectors.\n        Otherwise, a and b are explicitly normalized to lenght 1.\n\n    Returns\n    -------\n    ndarray\n        Returns a matrix of size len(a), len(b) such that eleement (i, j)\n        contains the squared distance between `a[i]` and `b[j]`.\n\n    \"\"\"\n    if not data_is_normalized:\n        a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True)\n        b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True)\n    return 1. - np.dot(a, b.T)\n\ndef _cosine_distance_torch(a, b, data_is_normalized=False):\n    '''\n    _cosine_distance but torched\n    '''\n    if not data_is_normalized:\n        a = a / torch.norm(a, dim=1, keepdim=True)\n        b = b / torch.norm(b, dim=1, keepdim=True)\n    return 1. - torch.matmul(a, torch.transpose(b,0,1))\n\ndef _nn_euclidean_distance(x, y):\n    \"\"\" Helper function for nearest neighbor distance metric (Euclidean).\n\n    Parameters\n    ----------\n    x : ndarray\n        A matrix of N row-vectors (sample points).\n    y : ndarray\n        A matrix of M row-vectors (query points).\n\n    Returns\n    -------\n    ndarray\n        A vector of length M that contains for each entry in `y` the\n        smallest Euclidean distance to a sample in `x`.\n\n    \"\"\"\n    distances = _pdist(x, y)\n    return np.maximum(0.0, distances.min(axis=0))\n\ndef _nn_euclidean_distance_torch(x, y):\n    \"\"\" Helper function for nearest neighbor distance metric (Euclidean).\n\n    Parameters\n    ----------\n    x : ndarray\n        A matrix of N row-vectors (sample points).\n    y : ndarray\n        A matrix of M row-vectors (query points).\n\n    Returns\n    -------\n    ndarray\n        A vector of length M that contains for each entry in `y` the\n        smallest Euclidean distance to a sample in `x`.\n\n    \"\"\"\n    x = x/((x*x).sum(1, keepdim = True)).sqrt()\n    y = y/((y*y).sum(1, keepdim = True)).sqrt()\n    sim = (x.unsqueeze(1) - y.unsqueeze(0)).pow(2).sum(2)\n    sim = sim.exp()\n    sim = (sim - 1)/(sim + 1)\n    sim = torch.min(sim, 0)[0]\n    return sim\n    \ndef _nn_cosine_distance(x, y):\n    \"\"\" Helper function for nearest neighbor distance metric (cosine).\n\n    Parameters\n    ----------\n    x : ndarray\n        A matrix of N row-vectors (sample points).\n    y : ndarray\n        A matrix of M row-vectors (query points).\n\n    Returns\n    -------\n    ndarray\n        A vector of length M that contains for each entry in `y` the\n        smallest cosine distance to a sample in `x`.\n\n    \"\"\"\n    distances = _cosine_distance(x, y)\n    return distances.min(axis=0)\n\ndef _nn_cosine_distance_torch(x,y):\n    '''\n    Same as _nn_cosine_distance except torched\n    '''\n    distances = _cosine_distance_torch(x,y)\n    return torch.min(distances, 0)[0]\n\nclass NearestNeighborDistanceMetric(object):\n    \"\"\"\n    A nearest neighbor distance metric that, for each target, returns\n    the closest distance to any sample that has been observed so far.\n\n    Parameters\n    ----------\n    metric : str\n        Either \"euclidean\" or \"cosine\".\n    matching_threshold: float\n        The matching threshold. Samples with larger distance are considered an\n        invalid match.\n    budget : Optional[int]\n        If not None, fix samples per class to at most this number. Removes\n        the oldest samples when the budget is reached.\n\n    Attributes\n    ----------\n    samples : Dict[int -> List[ndarray]]\n        A dictionary that maps from target identities to the list of samples\n        that have been observed so far.\n\n    \"\"\"\n\n    def __init__(self, metric, budget=None):\n\n\n        if metric == \"euclidean\":\n            self._metric = _nn_euclidean_distance\n            self._metric_torch = _nn_euclidean_distance_torch\n        elif metric == \"cosine\":\n            self._metric = _nn_cosine_distance\n            self._metric_torch = _nn_cosine_distance_torch\n        else:\n            raise ValueError(\n                \"Invalid metric; must be either 'euclidean' or 'cosine'\")\n        self.budget = budget\n        self.samples = {}\n        self.samples_2d = {}\n\n    def partial_fit(self, features, features_2d, targets, targets_2d, active_targets):\n        \"\"\"Update the distance metric with new data.\n\n        Parameters\n        ----------\n        features : ndarray\n            An NxM matrix of N features of dimensionality M.\n        targets : ndarray\n            An integer array of associated target identities.\n        active_targets : List[int]\n            A list of targets that are currently present in the scene.\n\n        \"\"\"\n        for feature, target in zip(features, targets):\n            if feature is not None:\n                self.samples.setdefault(target, []).append(feature)\n            else:\n                self.samples.setdefault(target, [])\n            if self.budget is not None:\n                self.samples[target] = self.samples[target][-self.budget:]\n        self.samples = {k: self.samples[k] for k in active_targets if k in targets}\n        for target in active_targets:\n            self.samples.setdefault(target, [])\n        \n        for feature_2d, target in zip(features_2d, targets_2d):\n            self.samples_2d.setdefault(target, []).append(feature_2d)\n            if self.budget is not None:\n                self.samples_2d[target] = self.samples_2d[target][-self.budget:]\n\n        self.samples_2d = {k: self.samples_2d[k] for k in active_targets}\n\n    def distance(self, features, targets, compare_2d=False):\n        \"\"\"Compute distance between features and targets.\n\n        Parameters\n        ----------\n        features : ndarray\n            An NxM matrix of N features of dimensionality M.\n        targets : List[int]\n            A list of targets to match the given `features` against.\n\n        Returns\n        -------\n        ndarray\n            Returns a cost matrix of shape len(targets), len(features), where\n            element (i, j) contains the closest squared distance between\n            `targets[i]` and `features[j]`.\n\n        \"\"\"\n        cost_matrix = np.zeros((len(targets), len(features)))\n        for i, target in enumerate(targets):\n            if compare_2d:            \n                cost_matrix[i, :] = self._metric(self.samples_2d[target], features)\n            else:\n                cost_matrix[i, :] = self._metric(self.samples[target], features)\n        return cost_matrix\n\n    def distance_torch(self, features, targets, compare_2d=False):\n        '''\n        Same as distance except torched.\n        '''\n        features = torch.from_numpy(features).cuda()\n        cost_matrix = torch.zeros(len(targets), len(features)).cuda()\n        for i, target in enumerate(targets):\n            if compare_2d:\n                cost_matrix[i, :] = self._metric_torch(torch.from_numpy(np.array(self.samples_2d[target])).cuda(), features)\n            else:\n                cost_matrix[i, :] = self._metric_torch(torch.from_numpy(np.array(self.samples[target])).cuda(), features)\n        return cost_matrix.cpu().numpy()\n\n    def check_samples(self, targets):\n        for target in targets:\n            if len(self.samples[target]) == 0:\n                return True\n        return False\n"
  },
  {
    "path": "paper_experiments/utils/pointnet_tf_util.py",
    "content": "\"\"\" Wrapper functions for TensorFlow layers.\n\nAuthor: Charles R. Qi\nDate: November 2016\n\"\"\"\n\nimport numpy as np\nimport tensorflow as tf\n\ndef _variable_on_cpu(name, shape, initializer, use_fp16=False):\n  \"\"\"Helper to create a Variable stored on CPU memory.\n  Args:\n    name: name of the variable\n    shape: list of ints\n    initializer: initializer for Variable\n  Returns:\n    Variable Tensor\n  \"\"\"\n  with tf.device('/cpu:0'):\n    dtype = tf.float16 if use_fp16 else tf.float32\n    var = tf.get_variable(name, shape, initializer=initializer, dtype=dtype)\n  return var\n\ndef _variable_with_weight_decay(name, shape, stddev, wd, use_xavier=True):\n  \"\"\"Helper to create an initialized Variable with weight decay.\n\n  Note that the Variable is initialized with a truncated normal distribution.\n  A weight decay is added only if one is specified.\n\n  Args:\n    name: name of the variable\n    shape: list of ints\n    stddev: standard deviation of a truncated Gaussian\n    wd: add L2Loss weight decay multiplied by this float. If None, weight\n        decay is not added for this Variable.\n    use_xavier: bool, whether to use xavier initializer\n\n  Returns:\n    Variable Tensor\n  \"\"\"\n  if use_xavier:\n    initializer = tf.contrib.layers.xavier_initializer()\n  else:\n    initializer = tf.truncated_normal_initializer(stddev=stddev)\n  var = _variable_on_cpu(name, shape, initializer)\n  if wd is not None:\n    weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss')\n    tf.add_to_collection('losses', weight_decay)\n  return var\n\n\ndef conv1d(inputs,\n           num_output_channels,\n           kernel_size,\n           scope,\n           stride=1,\n           padding='SAME',\n           use_xavier=True,\n           stddev=1e-3,\n           weight_decay=0.0,\n           activation_fn=tf.nn.relu,\n           bn=False,\n           bn_decay=None,\n           is_training=None):\n  \"\"\" 1D convolution with non-linear operation.\n\n  Args:\n    inputs: 3-D tensor variable BxLxC\n    num_output_channels: int\n    kernel_size: int\n    scope: string\n    stride: int\n    padding: 'SAME' or 'VALID'\n    use_xavier: bool, use xavier_initializer if true\n    stddev: float, stddev for truncated_normal init\n    weight_decay: float\n    activation_fn: function\n    bn: bool, whether to use batch norm\n    bn_decay: float or float tensor variable in [0,1]\n    is_training: bool Tensor variable\n\n  Returns:\n    Variable tensor\n  \"\"\"\n  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:\n    num_in_channels = inputs.get_shape()[-1].value\n    kernel_shape = [kernel_size,\n                    num_in_channels, num_output_channels]\n    kernel = _variable_with_weight_decay('weights',\n                                         shape=kernel_shape,\n                                         use_xavier=use_xavier,\n                                         stddev=stddev,\n                                         wd=weight_decay)\n    outputs = tf.nn.conv1d(inputs, kernel,\n                           stride=stride,\n                           padding=padding)\n    biases = _variable_on_cpu('biases', [num_output_channels],\n                              tf.constant_initializer(0.0))\n    outputs = tf.nn.bias_add(outputs, biases)\n\n    if bn:\n      outputs = batch_norm_for_conv1d(outputs, is_training,\n                                      bn_decay=bn_decay, scope='bn')\n\n    if activation_fn is not None:\n      outputs = activation_fn(outputs)\n    return outputs\n\n\n\n\ndef conv2d(inputs,\n           num_output_channels,\n           kernel_size,\n           scope,\n           stride=[1, 1],\n           padding='SAME',\n           use_xavier=True,\n           stddev=1e-3,\n           weight_decay=0.0,\n           activation_fn=tf.nn.relu,\n           bn=False,\n           bn_decay=None,\n           is_training=None):\n  \"\"\" 2D convolution with non-linear operation.\n\n  Args:\n    inputs: 4-D tensor variable BxHxWxC\n    num_output_channels: int\n    kernel_size: a list of 2 ints\n    scope: string\n    stride: a list of 2 ints\n    padding: 'SAME' or 'VALID'\n    use_xavier: bool, use xavier_initializer if true\n    stddev: float, stddev for truncated_normal init\n    weight_decay: float\n    activation_fn: function\n    bn: bool, whether to use batch norm\n    bn_decay: float or float tensor variable in [0,1]\n    is_training: bool Tensor variable\n\n  Returns:\n    Variable tensor\n  \"\"\"\n  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:\n      kernel_h, kernel_w = kernel_size\n      num_in_channels = inputs.get_shape()[-1].value\n      kernel_shape = [kernel_h, kernel_w,\n                      num_in_channels, num_output_channels]\n      kernel = _variable_with_weight_decay('weights',\n                                           shape=kernel_shape,\n                                           use_xavier=use_xavier,\n                                           stddev=stddev,\n                                           wd=weight_decay)\n      stride_h, stride_w = stride\n      outputs = tf.nn.conv2d(inputs, kernel,\n                             [1, stride_h, stride_w, 1],\n                             padding=padding)\n      biases = _variable_on_cpu('biases', [num_output_channels],\n                                tf.constant_initializer(0.0))\n      outputs = tf.nn.bias_add(outputs, biases)\n\n      if bn:\n        outputs = batch_norm_for_conv2d(outputs, is_training,\n                                        bn_decay=bn_decay, scope='bn')\n\n      if activation_fn is not None:\n        outputs = activation_fn(outputs)\n      return outputs\n\n\ndef conv2d_transpose(inputs,\n                     num_output_channels,\n                     kernel_size,\n                     scope,\n                     stride=[1, 1],\n                     padding='SAME',\n                     use_xavier=True,\n                     stddev=1e-3,\n                     weight_decay=0.0,\n                     activation_fn=tf.nn.relu,\n                     bn=False,\n                     bn_decay=None,\n                     is_training=None):\n  \"\"\" 2D convolution transpose with non-linear operation.\n\n  Args:\n    inputs: 4-D tensor variable BxHxWxC\n    num_output_channels: int\n    kernel_size: a list of 2 ints\n    scope: string\n    stride: a list of 2 ints\n    padding: 'SAME' or 'VALID'\n    use_xavier: bool, use xavier_initializer if true\n    stddev: float, stddev for truncated_normal init\n    weight_decay: float\n    activation_fn: function\n    bn: bool, whether to use batch norm\n    bn_decay: float or float tensor variable in [0,1]\n    is_training: bool Tensor variable\n\n  Returns:\n    Variable tensor\n\n  Note: conv2d(conv2d_transpose(a, num_out, ksize, stride), a.shape[-1], ksize, stride) == a\n  \"\"\"\n  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:\n      kernel_h, kernel_w = kernel_size\n      num_in_channels = inputs.get_shape()[-1].value\n      kernel_shape = [kernel_h, kernel_w,\n                      num_output_channels, num_in_channels] # reversed to conv2d\n      kernel = _variable_with_weight_decay('weights',\n                                           shape=kernel_shape,\n                                           use_xavier=use_xavier,\n                                           stddev=stddev,\n                                           wd=weight_decay)\n      stride_h, stride_w = stride\n      \n      # from slim.convolution2d_transpose\n      def get_deconv_dim(dim_size, stride_size, kernel_size, padding):\n          dim_size *= stride_size\n\n          if padding == 'VALID' and dim_size is not None:\n            dim_size += max(kernel_size - stride_size, 0)\n          return dim_size\n\n      # caculate output shape\n      batch_size = inputs.get_shape()[0].value\n      height = inputs.get_shape()[1].value\n      width = inputs.get_shape()[2].value\n      out_height = get_deconv_dim(height, stride_h, kernel_h, padding)\n      out_width = get_deconv_dim(width, stride_w, kernel_w, padding)\n      output_shape = [batch_size, out_height, out_width, num_output_channels]\n\n      outputs = tf.nn.conv2d_transpose(inputs, kernel, output_shape,\n                             [1, stride_h, stride_w, 1],\n                             padding=padding)\n      biases = _variable_on_cpu('biases', [num_output_channels],\n                                tf.constant_initializer(0.0))\n      outputs = tf.nn.bias_add(outputs, biases)\n\n      if bn:\n        outputs = batch_norm_for_conv2d(outputs, is_training,\n                                        bn_decay=bn_decay, scope='bn')\n\n      if activation_fn is not None:\n        outputs = activation_fn(outputs)\n      return outputs\n\n   \n\ndef conv3d(inputs,\n           num_output_channels,\n           kernel_size,\n           scope,\n           stride=[1, 1, 1],\n           padding='SAME',\n           use_xavier=True,\n           stddev=1e-3,\n           weight_decay=0.0,\n           activation_fn=tf.nn.relu,\n           bn=False,\n           bn_decay=None,\n           is_training=None):\n  \"\"\" 3D convolution with non-linear operation.\n\n  Args:\n    inputs: 5-D tensor variable BxDxHxWxC\n    num_output_channels: int\n    kernel_size: a list of 3 ints\n    scope: string\n    stride: a list of 3 ints\n    padding: 'SAME' or 'VALID'\n    use_xavier: bool, use xavier_initializer if true\n    stddev: float, stddev for truncated_normal init\n    weight_decay: float\n    activation_fn: function\n    bn: bool, whether to use batch norm\n    bn_decay: float or float tensor variable in [0,1]\n    is_training: bool Tensor variable\n\n  Returns:\n    Variable tensor\n  \"\"\"\n  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:\n    kernel_d, kernel_h, kernel_w = kernel_size\n    num_in_channels = inputs.get_shape()[-1].value\n    kernel_shape = [kernel_d, kernel_h, kernel_w,\n                    num_in_channels, num_output_channels]\n    kernel = _variable_with_weight_decay('weights',\n                                         shape=kernel_shape,\n                                         use_xavier=use_xavier,\n                                         stddev=stddev,\n                                         wd=weight_decay)\n    stride_d, stride_h, stride_w = stride\n    outputs = tf.nn.conv3d(inputs, kernel,\n                           [1, stride_d, stride_h, stride_w, 1],\n                           padding=padding)\n    biases = _variable_on_cpu('biases', [num_output_channels],\n                              tf.constant_initializer(0.0))\n    outputs = tf.nn.bias_add(outputs, biases)\n    \n    if bn:\n      outputs = batch_norm_for_conv3d(outputs, is_training,\n                                      bn_decay=bn_decay, scope='bn')\n\n    if activation_fn is not None:\n      outputs = activation_fn(outputs)\n    return outputs\n\ndef fully_connected(inputs,\n                    num_outputs,\n                    scope,\n                    use_xavier=True,\n                    stddev=1e-3,\n                    weight_decay=0.0,\n                    activation_fn=tf.nn.relu,\n                    bn=False,\n                    bn_decay=None,\n                    is_training=None):\n  \"\"\" Fully connected layer with non-linear operation.\n  \n  Args:\n    inputs: 2-D tensor BxN\n    num_outputs: int\n  \n  Returns:\n    Variable tensor of size B x num_outputs.\n  \"\"\"\n  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:\n    num_input_units = inputs.get_shape()[-1].value\n    weights = _variable_with_weight_decay('weights',\n                                          shape=[num_input_units, num_outputs],\n                                          use_xavier=use_xavier,\n                                          stddev=stddev,\n                                          wd=weight_decay)\n    outputs = tf.matmul(inputs, weights)\n    biases = _variable_on_cpu('biases', [num_outputs],\n                             tf.constant_initializer(0.0))\n    outputs = tf.nn.bias_add(outputs, biases)\n     \n    if bn:\n      outputs = batch_norm_for_fc(outputs, is_training, bn_decay, 'bn')\n\n    if activation_fn is not None:\n      outputs = activation_fn(outputs)\n    return outputs\n\n\ndef max_pool2d(inputs,\n               kernel_size,\n               scope,\n               stride=[2, 2],\n               padding='VALID'):\n  \"\"\" 2D max pooling.\n\n  Args:\n    inputs: 4-D tensor BxHxWxC\n    kernel_size: a list of 2 ints\n    stride: a list of 2 ints\n  \n  Returns:\n    Variable tensor\n  \"\"\"\n  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:\n    kernel_h, kernel_w = kernel_size\n    stride_h, stride_w = stride\n    outputs = tf.nn.max_pool(inputs,\n                             ksize=[1, kernel_h, kernel_w, 1],\n                             strides=[1, stride_h, stride_w, 1],\n                             padding=padding,\n                             name=sc.name)\n    return outputs\n\ndef avg_pool2d(inputs,\n               kernel_size,\n               scope,\n               stride=[2, 2],\n               padding='VALID'):\n  \"\"\" 2D avg pooling.\n\n  Args:\n    inputs: 4-D tensor BxHxWxC\n    kernel_size: a list of 2 ints\n    stride: a list of 2 ints\n  \n  Returns:\n    Variable tensor\n  \"\"\"\n  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:\n    kernel_h, kernel_w = kernel_size\n    stride_h, stride_w = stride\n    outputs = tf.nn.avg_pool(inputs,\n                             ksize=[1, kernel_h, kernel_w, 1],\n                             strides=[1, stride_h, stride_w, 1],\n                             padding=padding,\n                             name=sc.name)\n    return outputs\n\n\ndef max_pool3d(inputs,\n               kernel_size,\n               scope,\n               stride=[2, 2, 2],\n               padding='VALID'):\n  \"\"\" 3D max pooling.\n\n  Args:\n    inputs: 5-D tensor BxDxHxWxC\n    kernel_size: a list of 3 ints\n    stride: a list of 3 ints\n  \n  Returns:\n    Variable tensor\n  \"\"\"\n  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:\n    kernel_d, kernel_h, kernel_w = kernel_size\n    stride_d, stride_h, stride_w = stride\n    outputs = tf.nn.max_pool3d(inputs,\n                               ksize=[1, kernel_d, kernel_h, kernel_w, 1],\n                               strides=[1, stride_d, stride_h, stride_w, 1],\n                               padding=padding,\n                               name=sc.name)\n    return outputs\n\ndef avg_pool3d(inputs,\n               kernel_size,\n               scope,\n               stride=[2, 2, 2],\n               padding='VALID'):\n  \"\"\" 3D avg pooling.\n\n  Args:\n    inputs: 5-D tensor BxDxHxWxC\n    kernel_size: a list of 3 ints\n    stride: a list of 3 ints\n  \n  Returns:\n    Variable tensor\n  \"\"\"\n  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:\n    kernel_d, kernel_h, kernel_w = kernel_size\n    stride_d, stride_h, stride_w = stride\n    outputs = tf.nn.avg_pool3d(inputs,\n                               ksize=[1, kernel_d, kernel_h, kernel_w, 1],\n                               strides=[1, stride_d, stride_h, stride_w, 1],\n                               padding=padding,\n                               name=sc.name)\n    return outputs\n\n\n\n\n\ndef batch_norm_template(inputs, is_training, scope, moments_dims, bn_decay):\n  \"\"\" Batch normalization on convolutional maps and beyond...\n  Ref.: http://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow\n  \n  Args:\n      inputs:        Tensor, k-D input ... x C could be BC or BHWC or BDHWC\n      is_training:   boolean tf.Varialbe, true indicates training phase\n      scope:         string, variable scope\n      moments_dims:  a list of ints, indicating dimensions for moments calculation\n      bn_decay:      float or float tensor variable, controling moving average weight\n  Return:\n      normed:        batch-normalized maps\n  \"\"\"\n  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:\n    num_channels = inputs.get_shape()[-1].value\n    beta = tf.Variable(tf.constant(0.0, shape=[num_channels]),\n                       name='beta', trainable=True)\n    gamma = tf.Variable(tf.constant(1.0, shape=[num_channels]),\n                        name='gamma', trainable=True)\n    batch_mean, batch_var = tf.nn.moments(inputs, moments_dims, name='moments')\n    decay = bn_decay if bn_decay is not None else 0.9\n    ema = tf.train.ExponentialMovingAverage(decay=decay)\n    # Operator that maintains moving averages of variables.\n    ema_apply_op = tf.cond(is_training,\n                           lambda: ema.apply([batch_mean, batch_var]),\n                           lambda: tf.no_op())\n    \n    # Update moving average and return current batch's avg and var.\n    def mean_var_with_update():\n      with tf.control_dependencies([ema_apply_op]):\n        return tf.identity(batch_mean), tf.identity(batch_var)\n    \n    # ema.average returns the Variable holding the average of var.\n    mean, var = tf.cond(is_training,\n                        mean_var_with_update,\n                        lambda: (ema.average(batch_mean), ema.average(batch_var)))\n    normed = tf.nn.batch_normalization(inputs, mean, var, beta, gamma, 1e-3)\n  return normed\n\n\ndef batch_norm_for_fc(inputs, is_training, bn_decay, scope):\n  \"\"\" Batch normalization on FC data.\n  \n  Args:\n      inputs:      Tensor, 2D BxC input\n      is_training: boolean tf.Varialbe, true indicates training phase\n      bn_decay:    float or float tensor variable, controling moving average weight\n      scope:       string, variable scope\n  Return:\n      normed:      batch-normalized maps\n  \"\"\"\n  return batch_norm_template(inputs, is_training, scope, [0,], bn_decay)\n\n\ndef batch_norm_for_conv1d(inputs, is_training, bn_decay, scope):\n  \"\"\" Batch normalization on 1D convolutional maps.\n  \n  Args:\n      inputs:      Tensor, 3D BLC input maps\n      is_training: boolean tf.Varialbe, true indicates training phase\n      bn_decay:    float or float tensor variable, controling moving average weight\n      scope:       string, variable scope\n  Return:\n      normed:      batch-normalized maps\n  \"\"\"\n  return batch_norm_template(inputs, is_training, scope, [0,1], bn_decay)\n\n\n\n  \ndef batch_norm_for_conv2d(inputs, is_training, bn_decay, scope):\n  \"\"\" Batch normalization on 2D convolutional maps.\n  \n  Args:\n      inputs:      Tensor, 4D BHWC input maps\n      is_training: boolean tf.Varialbe, true indicates training phase\n      bn_decay:    float or float tensor variable, controling moving average weight\n      scope:       string, variable scope\n  Return:\n      normed:      batch-normalized maps\n  \"\"\"\n  return batch_norm_template(inputs, is_training, scope, [0,1,2], bn_decay)\n\n\n\ndef batch_norm_for_conv3d(inputs, is_training, bn_decay, scope):\n  \"\"\" Batch normalization on 3D convolutional maps.\n  \n  Args:\n      inputs:      Tensor, 5D BDHWC input maps\n      is_training: boolean tf.Varialbe, true indicates training phase\n      bn_decay:    float or float tensor variable, controling moving average weight\n      scope:       string, variable scope\n  Return:\n      normed:      batch-normalized maps\n  \"\"\"\n  return batch_norm_template(inputs, is_training, scope, [0,1,2,3], bn_decay)\n\n\ndef dropout(inputs,\n            is_training,\n            scope,\n            keep_prob=0.5,\n            noise_shape=None):\n  \"\"\" Dropout layer.\n\n  Args:\n    inputs: tensor\n    is_training: boolean tf.Variable\n    scope: string\n    keep_prob: float in [0,1]\n    noise_shape: list of ints\n\n  Returns:\n    tensor variable\n  \"\"\"\n  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:\n    outputs = tf.cond(is_training,\n                      lambda: tf.nn.dropout(inputs, keep_prob, noise_shape),\n                      lambda: inputs)\n    return outputs\n"
  },
  {
    "path": "paper_experiments/utils/pointnet_transform_nets.py",
    "content": "import tensorflow as tf\nimport numpy as np\nimport sys\nimport os\nBASE_DIR = os.path.dirname(os.path.abspath(__file__))\nsys.path.append(BASE_DIR)\nsys.path.append(os.path.join(BASE_DIR, '../utils'))\nimport pointnet_tf_util\n\ndef input_transform_net(point_cloud, is_training, bn_decay=None, K=3):\n    \"\"\" Input (XYZ) Transform Net, input is BxNx3 gray image\n        Return:\n            Transformation matrix of size 3xK \"\"\"\n    batch_size = point_cloud.get_shape()[0].value\n\n    input_image = tf.expand_dims(point_cloud, -1)\n    net = pointnet_tf_util.conv2d(input_image, 64, [1,3],\n                         padding='VALID', stride=[1,1],\n                         bn=True, is_training=is_training,\n                         scope='tconv1', bn_decay=bn_decay)\n    net = pointnet_tf_util.conv2d(net, 128, [1,1],\n                         padding='VALID', stride=[1,1],\n                         bn=True, is_training=is_training,\n                         scope='tconv2', bn_decay=bn_decay)\n    net = pointnet_tf_util.conv2d(net, 1024, [1,1],\n                         padding='VALID', stride=[1,1],\n                         bn=True, is_training=is_training,\n                         scope='tconv3', bn_decay=bn_decay)\n    net = tf.reduce_max(net, axis = 1)\n\n\n    net = tf.reshape(net, [batch_size, -1])\n    net = pointnet_tf_util.fully_connected(net, 512, bn=True, is_training=is_training,\n                                  scope='tfc1', bn_decay=bn_decay)\n    net = pointnet_tf_util.fully_connected(net, 256, bn=True, is_training=is_training,\n                                  scope='tfc2', bn_decay=bn_decay)\n\n    with tf.variable_scope('transform_XYZ') as sc:\n        assert(K==3)\n        weights = tf.get_variable('weights', [256, 3*K],\n                                  initializer=tf.constant_initializer(0.0),\n                                  dtype=tf.float32)\n        biases = tf.get_variable('biases', [3*K],\n                                 initializer=tf.constant_initializer(0.0),\n                                 dtype=tf.float32)\n        biases = biases + tf.constant([1,0,0,0,1,0,0,0,1], dtype=tf.float32)\n        transform = tf.matmul(net, weights)\n        transform = tf.nn.bias_add(transform, biases)\n\n    transform = tf.reshape(transform, [-1, 3, K])\n    return transform\n\n\ndef feature_transform_net(inputs, is_training, bn_decay=None, K=64):\n    \"\"\" Feature Transform Net, input is BxNx1xK\n        Return:\n            Transformation matrix of size KxK \"\"\"\n    batch_size = inputs.get_shape()[0].value\n    num_point = inputs.get_shape()[1].value\n\n    net = pointnet_tf_util.conv2d(inputs, 64, [1,1],\n                         padding='VALID', stride=[1,1],\n                         bn=True, is_training=is_training,\n                         scope='tconv1', bn_decay=bn_decay)\n    net = pointnet_tf_util.conv2d(net, 128, [1,1],\n                         padding='VALID', stride=[1,1],\n                         bn=True, is_training=is_training,\n                         scope='tconv2', bn_decay=bn_decay)\n    net = pointnet_tf_util.conv2d(net, 1024, [1,1],\n                         padding='VALID', stride=[1,1],\n                         bn=True, is_training=is_training,\n                         scope='tconv3', bn_decay=bn_decay)\n    net = tf.reduce_max(net, axis = 1)\n\n    net = tf.reshape(net, [batch_size, -1])\n    net = pointnet_tf_util.fully_connected(net, 512, bn=True, is_training=is_training,\n                                  scope='tfc1', bn_decay=bn_decay)\n    net = pointnet_tf_util.fully_connected(net, 256, bn=True, is_training=is_training,\n                                  scope='tfc2', bn_decay=bn_decay)\n\n    with tf.variable_scope('transform_feat') as sc:\n        weights = tf.get_variable('weights', [256, K*K],\n                                  initializer=tf.constant_initializer(0.0),\n                                  dtype=tf.float32)\n        biases = tf.get_variable('biases', [K*K],\n                                 initializer=tf.constant_initializer(0.0),\n                                 dtype=tf.float32)\n        biases = biases + tf.constant(np.eye(K).flatten(), dtype=tf.float32)\n        transform = tf.matmul(net, weights)\n        transform = tf.nn.bias_add(transform, biases)\n\n    transform = tf.reshape(transform, [batch_size, K, K])\n    return transform\n"
  },
  {
    "path": "paper_experiments/utils/read_detections.py",
    "content": "import numpy as np\nimport pdb\nfrom deep_sort_utils import non_max_suppression as deepsort_nms\n\ndef read_ground_truth_2d_detections(detection_path_2d, frame_idx, detection_matrix = None, threshold = -np.inf, nms_threshold = 0.75):\n    if detection_matrix is None:\n        detection_matrix = np.loadtxt(detection_path_2d, delimiter=',')\n\n    if len(detection_matrix) == 0:\n        return [], [], [], []\n    if len(detection_matrix.shape) == 1:\n        detection_matrix = np.expand_dims(detection_matrix, axis=0)\n\n    frame_indices = detection_matrix[:, 0].astype(np.int32)\n    if frame_idx is not None:\n        mask = frame_indices == frame_idx\n        detection_file = detection_matrix[mask]\n    else:\n        detection_file = detection_matrix\n\n    frame_indices = detection_matrix[:, 0].astype(np.int32)\n    if frame_idx is not None:\n        conf = np.expand_dims(detection_file[:,6].astype(np.float32), 1)\n        mask = conf[:,0] > threshold\n        detection_file = detection_file[mask]\n        object_ids = np.expand_dims(detection_file[:,1].astype(np.float32), 1)\n        x1 = np.expand_dims(detection_file[:,2].astype(np.float32), 1)\n        y1 = np.expand_dims(detection_file[:,3].astype(np.float32), 1)\n        w = np.expand_dims(detection_file[:,4].astype(np.float32), 1)\n        h = np.expand_dims(detection_file[:,5].astype(np.float32), 1)\n        conf = np.expand_dims(detection_file[:,6].astype(np.float32), 1)\n        cls_conf = -np.ones(conf.shape)\n        cls_pred = -np.ones(conf.shape)\n        detections = np.hstack([x1,y1,x1+w,y1+h, conf, cls_conf, cls_pred])\n        boxes = np.hstack([x1, y1, w, h])\n        indices = deepsort_nms(boxes, nms_threshold, np.squeeze(conf))\n        detections_out = []\n        for i in range(len(boxes)):\n            if i in indices:\n                detections_out.append(detections[i])\n        if detections_out:\n            detections = np.vstack(detections_out)\n        else:\n            detections = []\n        return detections, object_ids, detection_matrix\n    else:\n        detections = []\n        total_ids = []\n        object_ids = np.expand_dims(detection_file[:,1].astype(np.float32), 1)\n        for frame in np.unique(frame_indices):\n            frame_mask = frame_indices==frame\n            x1 = np.expand_dims(detection_file[frame_mask,2].astype(np.float32), 1)\n            y1 = np.expand_dims(detection_file[frame_mask,3].astype(np.float32), 1)\n            w = np.expand_dims(detection_file[frame_mask,4].astype(np.float32), 1)\n            h = np.expand_dims(detection_file[frame_mask,5].astype(np.float32), 1)\n            conf = np.expand_dims(detection_file[frame_mask,6].astype(np.float32), 1)\n            boxes = np.hstack([x1, y1, w, h])\n            cls_conf = -np.ones(conf.shape)\n            cls_pred = -np.ones(conf.shape)\n            frame_detections = np.hstack([x1,y1,x1+w,y1+h, conf, cls_conf, cls_pred])\n            indices = deepsort_nms(boxes, nms_threshold, np.squeeze(conf))\n            frame_detections_out = []\n            ids = np.zeros((x1.shape[0], 1))\n            for i in range(len(object_ids)):\n                if i in indices:\n                    frame_detections_out.append(frame_detections[i])\n                elif i < ids.shape[0]:\n                    ids[i] = -1\n            if frame_detections_out:\n                frame_detections = np.vstack(frame_detections_out)\n                detections.append(frame_detections)\n            total_ids.append(ids)\n\n    detections = np.vstack(detections)\n    ids = np.vstack(total_ids)\n\n    frame_indices = frame_indices[np.squeeze(ids != -1)]\n    object_ids = object_ids[np.squeeze(ids != -1)]\n\n    return detections, object_ids, frame_indices\n\ndef read_ground_truth_3d_detections(detection_path_3d, frame_idx):\n    \n    detection_file = np.loadtxt(detection_path_3d, delimiter=',')\n    frame_indices = detection_file[:, 0].astype(np.int32)\n    if frame_idx is not None:\n        mask = frame_indices == frame_idx\n        detection_file = detection_file[mask]\n\n    x = np.expand_dims(detection_file[:,2].astype(np.float32), 1)\n    y = np.expand_dims(detection_file[:,3].astype(np.float32), 1)\n    z = np.expand_dims(detection_file[:,4].astype(np.float32), 1)\n    l = np.expand_dims(detection_file[:,5].astype(np.float32), 1)\n    h = np.expand_dims(detection_file[:,6].astype(np.float32), 1)\n    w = np.expand_dims(detection_file[:,7].astype(np.float32), 1)\n    theta = np.expand_dims(detection_file[:,8].astype(np.float32), 1)\n    ids = np.expand_dims(detection_file[:,1].astype(np.float32), 1)\n\n    boxes_3d = np.hstack([x, y, z, l, h, w, theta])\n    if frame_idx is None:\n        return boxes_3d, ids, frame_indices\n    return boxes_3d, ids\n"
  },
  {
    "path": "paper_experiments/utils/resnet_reid_utils.py",
    "content": "import torch\nimport os\nimport sys\nimport torch.nn as nn\nfrom torch.autograd import Variable\nfrom torchvision import transforms\nfrom PIL import Image\nimport numpy as np\nPACKAGE_PARENT = '..'\nSCRIPT_DIR = os.path.dirname(os.path.realpath(os.path.join(os.getcwd(), os.path.expanduser(__file__))))\nsys.path.append(os.path.normpath(os.path.join(SCRIPT_DIR, PACKAGE_PARENT)))\nfrom models.resnet_reid_models import ICT_ResNet\n\nclass Feature_ResNet(nn.Module):\n    def __init__(self,n_layer,output_color):\n        super(Feature_ResNet,self).__init__()\n        all_model = ICT_ResNet(1,10,9,n_layer,pretrained=False)\n        for name,modules in all_model._modules.items():\n            if name.find('fc') == -1 :\n                self.add_module(name,modules)\n        if output_color == True:\n            self.fc_c = all_model.fc_c\n        self.output_color = output_color\n    def forward(self,x):\n        for name,module in self._modules.items():\n            if name.find('fc') == -1:\n                x = module(x)\n        x = x.view(x.size(0),-1)\n        if self.output_color == False:  return x\n        else:\n            output  = self.fc_c(x)\n            color = torch.max(self.fc_c(x),dim=1)[1]\n            return x,color\n\nclass ResNet_Loader(object):\n    def __init__(self,model_path,n_layer=50,batch_size=4,output_color=False):\n        self.batch_size = batch_size\n        self.output_color = output_color\n\n        self.model = Feature_ResNet(n_layer,output_color)\n        state_dict = torch.load(model_path)\n        for key in list(state_dict.keys()):\n            if key.find('fc') != -1 and key.find('fc_c') == -1 :\n                del state_dict[key]\n            elif output_color == False and key.find('fc_c') != -1:\n                del state_dict[key]\n        \n        self.model.load_state_dict(state_dict)\n        self.model.eval()\n        # print('loading resnet%d model'%(n_layer))\n        self.compose = transforms.Compose([transforms.Resize((224,224)),transforms.ToTensor(),\n                                           transforms.Normalize(mean=[0.485,0.456,0.406],std=[0.229,0.224,0.225])])\n        self.upsample = nn.Upsample(size=(224,224),mode='bilinear')\n\n    # @profile\n    def inference(self,patches):\n        self.model.cuda()\n        feature_list = []\n        color_list = []\n        batch_list = []\n        self.batch_size = len(patches)\n\n        for i, patch in enumerate(patches):\n            img = self.compose(transforms.ToPILImage()((patch.cpu().numpy()*255).astype(np.uint8)))\n            # img = self.upsample(patch.permute(2,0,1).unsqueeze_(0)).squeeze(0)\n\n            batch_list.append(img)\n            if (i+1)% self.batch_size == 0:\n                if self.output_color == False:\n                    features = self.model(Variable(torch.stack(batch_list)).cuda())\n                    for feature in features:\n                        feature_list.append(feature.data)\n                else:\n                    features,colors = self.model(Variable(torch.stack(batch_list)).cuda())\n                    feature_list.append(features.data)\n                    color_list.append(colors.data)\n                batch_list = []\n        if len(batch_list)>0:\n            if self.output_color == False:\n                features = self.model(Variable(torch.stack(batch_list)).cuda())\n                for feature in features:\n                    feature_list.append(feature.data)\n            else:\n                features,colors = self.model(Variable(torch.stack(batch_list)).cuda())\n                feature_list.append(features.data)\n                color_list.append(colors.data)\n            batch_list = []\n        # self.model.cpu() TODO: What does this do? Why would we move model to CPU?\n        if self.output_color == False:\n            # feature_list = torch.cat(feature_list,dim=0)\n            return feature_list\n        else:\n            feature_list = torch.cat(feature_list,dim=0)\n            color_list = torch.cat(color_list,dim=0)\n            return feature_list,color_list\n\n"
  },
  {
    "path": "paper_experiments/utils/test_jpda.py",
    "content": "from gurobipy import *\nfrom numpy import *\n'''\ndef mycallback(model, where):\n    if where == GRB.callback.MIP:\n        print model.cbGet(GRB.callback.MIP_NODCNT)\n        print model.cbGet(GRB.callback.MIP_ITRCNT),'HEY MOTHERFUCKER'\n    if where == GRB.callback.MIPNODE:\n        print model.cbGet(GRB.callback.MIPNODE_OBJBST),'BEST OBJ'\n'''\n\n\nnumT = 100\nnumC = 100\n\nAssignment = random.random((numT,numC))\n\nm=Model(\"Assignment\")\n\nX = []\nfor t in range(numT):\n    X.append([])\n    for c in range(numC):\n        X[t].append(m.addVar(vtype=GRB.BINARY,name=\"X%d%d\"% (t, c)))\nm.update()\nm.modelSense = GRB.MAXIMIZE\nconstraintT = []\nconstraintC = []\nfor t in range(numT):\n    constraintT.append(m.addConstr(quicksum(X[t][c] for c in range(numC)) == 1 ,'constraintT%d' % t))\n    \nfor c in range(numC):\n    constraintT.append(m.addConstr(quicksum(X[t][c] for t in range(numT)) == 1 ,'constraintC%d' % t))\n\nm.setObjective(quicksum(quicksum([X[t][c]*Assignment[t][c]    for c in range(numC)]) for t in range(numT)))\n    \nm.update()\n\n#m.optimize(mycallback)\nm.optimize()\n\n\nprint('runtime is %f'%m.Runtime)\n\n"
  },
  {
    "path": "paper_experiments/utils/test_kf/.gitignore",
    "content": "*.p\n"
  },
  {
    "path": "paper_experiments/utils/test_kf/run_kf_test.py",
    "content": "import sys\nsys.path.insert(0, '..')\nimport kalman_filter\nimport kf_simple3d\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport pickle\nimport os.path\nimport pdb\nnp.set_printoptions(precision=4)\n\nclass Track:\n    def __init__(self, track_id, first_detection, kf_type):\n        # initiate kf\n        if kf_type == \"2d\":\n            self.kf = kalman_filter.KalmanFilter()\n        elif kf_type == \"simple3d\":\n            self.kf = kf_simple3d.KalmanFilterSimple3D()\n\n        self.mean, self.cov = self.kf.initiate(first_detection)\n\n        self.id = track_id\n        n = len(self.mean)\n        self.n = n\n        m = len(first_detection)\n        self.m = m\n\n        # initialize data stores\n        self.frame_log = np.zeros((0))\n        self.measurement_log = np.zeros((0, m))\n        self.gt_log = np.zeros((0, m))\n        self.mean_log = np.zeros((0, n))\n        self.cov_log = np.zeros((0, n, n))\n        self.gating_distance_log = np.zeros((0))\n\n    def update(self, measurement, gt, frame):\n\n        # log data\n        self.mean_log = np.vstack((self.mean_log, self.mean))\n        self.cov_log = np.concatenate((self.cov_log, self.cov[np.newaxis,:,:]))\n        self.measurement_log = np.vstack((self.measurement_log, measurement))\n        self.gt_log = np.vstack((self.gt_log, gt))\n        self.frame_log = np.append(self.frame_log, frame)\n\n        gating_distance = self.kf.gating_distance(self.mean, self.cov, measurement)\n        self.gating_distance_log = np.append(self.gating_distance_log, gating_distance)\n\n        # KF predict and update\n        self.mean, self.cov = self.kf.predict(self.mean, self.cov)\n        self.mean, self.cov = self.kf.update(self.mean, self.cov, measurement)\n\n\n    def plot(self):\n        t = self.frame_log\n        gt = self.gt_log\n        meas = self.measurement_log\n        state = self.mean_log\n\n        plt.subplot(321)\n        plt.plot(t, gt[:,0], label='Ground Truth')\n        plt.plot(t, meas[:,0], label='Measured')\n        plt.plot(t, state[:,0], label='filtered')\n        plt.xlabel('time')\n        plt.ylabel('x')\n        plt.legend()\n\n        plt.subplot(322)\n        plt.plot(t, gt[:,1], label='Ground Truth')\n        plt.plot(t, meas[:,1], label='Measured')\n        plt.plot(t, state[:,1], label='filtered')\n        plt.xlabel('time')\n        plt.ylabel('y')\n        plt.legend()\n\n        plt.subplot(323)\n        plt.plot(gt[:,0], gt[:,1], label='Ground Truth')\n        plt.plot(meas[:,0], meas[:,1], label='Measured')\n        plt.plot(state[:,0], state[:,1], label='filtered')\n        plt.xlabel('x')\n        plt.ylabel('y')\n        plt.legend()\n\n\n        plt.subplot(324)\n        plt.plot(t, state[:,self.m], label='filtered')\n        plt.xlabel('time')\n        plt.ylabel('Vx')\n        plt.legend()\n\n        plt.subplot(325)\n        plt.plot(t, state[:,self.m+1], label='filtered')\n        plt.xlabel('time')\n        plt.ylabel('Vy')\n        plt.legend()\n\n        plt.show()\n\n\ndef file2data(fname):\n    # data should be a list of lists of numpy arrays\n    # Each element in the list represents a frame\n    # Each frame is a list of detections\n    # Each detection is a numpy array of measurements. \n    with open(fname, \"rb\") as f:\n        data = pickle.load(f)\n    return data\n\ndef cmp_tracks(track1, track2):\n    # Expect perfect match in mean log and gating distance\n    mean_log_pass = np.max(np.abs(track1.mean_log == track2.mean_log)) > 1e-12\n    gating_distance_pass = np.max(np.abs(track1.gating_distance_log == track2.gating_distance_log)) > 1e-12\n    return mean_log_pass and gating_distance_pass\n\ndef cmp(data, val):\n    any_fail = False\n    for itrack in data:\n        passed = cmp_tracks(data[itrack], val[itrack])\n        if not passed:\n            print(\"Mismatch found in track: \", itrack)\n            # pdb.set_trace()\n            any_fail = True\n        else: \n            print(\"Tracks matched: \", itrack)\n\n    return not any_fail\n\n\ndef validate(data, fname):\n    if os.path.isfile(fname):\n        val_data = file2data(fname)\n        return cmp(data, val_data)\n    else:\n        with open(fname, \"wb\") as f:\n            pickle.dump(data, f)   \n        return True\n\ndef run_kf_test(fname, kf_type):\n    print(\"Running test for: {}\".format(fname))\n    data = file2data(fname)\n\n    first_frame = data[0]\n\n    tracks = {}\n    for detection in first_frame:\n        meas, gt, gt_id = (detection[0], detection[1], detection[2])\n        tracks[gt_id] = Track(gt_id, meas, kf_type)\n\n    frame_cnt = 0; \n    for frame in data:\n        for detection in frame:\n            meas, gt, gt_id = (detection[0], detection[1], detection[2])\n            tracks[gt_id].update(meas, gt, frame_cnt)\n\n        frame_cnt += 1\n\n    passed = validate(tracks, fname + \".val\")\n\n    if not passed:\n        for track_id in tracks:\n            tracks[track_id].plot()\n\n\nif __name__=='__main__':\n    run_kf_test(\"single_track_4state_test.p\", \"2d\")\n    run_kf_test(\"two_track_4state_test.p\", \"2d\")\n    run_kf_test(\"single_track_6state_test.p\", \"simple3d\")\n\n\n\n"
  },
  {
    "path": "paper_experiments/utils/test_kf/write_kf_test.py",
    "content": "import sys\nsys.path.insert(0, '..')\nimport kalman_filter\nimport numpy as np\nimport pickle\nimport pdb\n\ndef data2file(data, fname):\n    # data should be a list\n    # Each element in the list represents a frame\n    # Each frame is a list of detections\n    # Each detection is a list of [measurement, ground truth, gt track id]\n\n    with open(fname, \"wb\") as f:\n        pickle.dump(data, f)   \n\ndef add_noise(center, std):\n    out = np.zeros_like(center)\n    for i in range(len(center)):\n        out[i] = np.random.normal(center[i], std[i])\n    return out\n\ndef single_track_4state_test():\n    np.random.seed(0)\n\n    data = []\n\n    # Iterate over frames\n    for i in range(100):\n        frame = []\n        track_id = 0\n\n        # Track 0\n        x_gt = i*10\n        y_gt = i*10\n        a_gt = 2\n        h_gt = 400\n\n        gt = np.array([x_gt, y_gt, a_gt, h_gt])\n        meas = add_noise(gt, [15, 15, 0.1, 5])\n\n        detection = [meas, gt, 0]\n        frame.append(detection)\n\n        data.append(frame)\n\n    data2file(data, \"single_track_4state_test.p\")\n\ndef two_track_4state_test():\n    np.random.seed(0)\n\n    data = []\n\n    # Iterate over frames\n    for i in range(100):\n        frame = []\n        track_id = 0\n\n        # Track 0\n        x_gt = i*10\n        y_gt = i*10\n        a_gt = 2\n        h_gt = 400\n\n        gt = np.array([x_gt, y_gt, a_gt, h_gt])\n        meas = add_noise(gt, [15, 15, 0.1, 5])\n\n        detection = [meas, gt, track_id]\n        frame.append(detection)\n\n        # Track 1\n        track_id += 1\n        x_gt = i*i/10\n        y_gt = i**0.5*30\n        a_gt = 2\n        h_gt = 400\n\n        gt = np.array([x_gt, y_gt, a_gt, h_gt])\n        meas = add_noise(gt, [20, 10, 0.2, 8])\n\n        detection = [meas, gt, track_id]\n        frame.append(detection)\n\n        data.append(frame)\n\n    data2file(data, \"two_track_4state_test.p\")\n\ndef single_track_6state_test():\n    np.random.seed(0)\n\n    data = []\n\n    # Iterate over frames\n    for i in range(100):\n        frame = []\n        track_id = 0\n\n        # Track 0\n        x_gt = i*10\n        y_gt = i*10\n        l_gt = 400\n        h_gt = 400\n        w_gt = 400\n        theta_gt = i/10\n\n        gt = np.array([x_gt, y_gt, l_gt, h_gt, w_gt, theta_gt])\n        gt = add_noise(gt, [3, 3, 1, 1, 1, 1*0.017])\n        meas = add_noise(gt, [15, 15, 5, 5, 5, 5*0.017])\n\n        detection = [meas, gt, 0]\n        frame.append(detection)\n\n        data.append(frame)\n\n    data2file(data, \"single_track_6state_test.p\")\n\nif __name__=='__main__':\n    single_track_4state_test()\n    two_track_4state_test()\n    single_track_6state_test()"
  },
  {
    "path": "paper_experiments/utils/track.py",
    "content": "# vim: expandtab:ts=4:sw=4\nimport numpy as np\nimport pdb\nimport torch\nimport copy\n\nfrom .imm import IMMFilter2D\n\nclass TrackState:\n    \"\"\"\n    Enumeration type for the single target track state. Newly created tracks are\n    classified as `tentative` until enough evidence has been collected. Then,\n    the track state is changed to `confirmed`. Tracks that are no longer alive\n    are classified as `deleted` to mark them for removal from the set of active\n    tracks.\n\n    \"\"\"\n\n    Tentative = 1\n    Confirmed = 2\n    Deleted = 3\n\n\nclass Track:\n    \"\"\"\n    A single target track with state space `(x, y, a, h)` and associated\n    velocities, where `(x, y)` is the center of the bounding box, `a` is the\n    aspect ratio and `h` is the height.\n\n    Parameters\n    ----------\n    mean : ndarray\n        Mean vector of the initial state distribution.\n    covariance : ndarray\n        Covariance matrix of the initial state distribution.\n    track_id : int\n        A unique track identifier.\n    n_init : int\n        Number of consecutive detections before the track is confirmed. The\n        track state is set to `Deleted` if a miss occurs within the first\n        `n_init` frames.\n    max_age : int\n        The maximum number of consecutive misses before the track state is\n        set to `Deleted`.\n    feature : Optional[ndarray]\n        Feature vector of the detection this track originates from. If not None,\n        this feature is added to the `features` cache.\n\n    Attributes\n    ----------\n    mean : ndarray\n        Mean vector of the initial state distribution.\n    covariance : ndarray\n        Covariance matrix of the initial state distribution.\n    track_id : int\n        A unique track identifier.\n    hits : int\n        Total number of measurement updates.\n    age : int\n        Total number of frames since first occurance.\n    time_since_update : int\n        Total number of frames since last measurement update.\n    state : TrackState\n        The current track state.\n    features : List[ndarray]\n        A cache of features. On each measurement update, the associated feature\n        vector is added to this list.\n\n    \"\"\"\n    def __init__(self, mean, covariance, model_probabilities, track_id, n_init, max_age,\n                 feature=None, appearance_feature = None, cuda = False, lstm = None, kf_appearance_feature=False, last_det = None):\n\n        self.mean = mean\n        self.covariance = covariance\n        self.model_probabilities = model_probabilities\n        self.track_id = track_id\n        self.hits = 1\n        self.age = 1\n        self.time_since_update = 0\n        self.tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor\n        self.cuda = cuda\n        self.state = TrackState.Tentative\n        self.features = []\n        self.features_2d = []\n        self.hidden = None\n        self.kf_appearance_feature = kf_appearance_feature\n        if lstm is None:\n            self.features.append(feature)\n            self.features_2d.append(appearance_feature)\n        else:\n            self.feature_update(feature, appearance_feature, lstm)\n        if self.model_probabilities is not None:\n            self.first_detection = mean[:,:4]\n        else:\n            self.first_detection = mean[:4]\n\n        self._n_init = n_init\n        if self.state == TrackState.Tentative and self.hits >= self._n_init:\n            self.state = TrackState.Confirmed\n        self._max_age = max_age\n        self.matched = True\n        self.exiting = False\n        self.next_to_last_detection = None\n        self.last_detection = last_det\n        self.last_2d_det = last_det\n\n    def to_tlwh(self, kf):\n        \"\"\"Get current position in bounding box format `(top left x, top left y,\n        width, height)`.\n\n        Returns\n        -------\n        ndarray\n            The bounding box.\n\n        \"\"\"\n        if self.model_probabilities is None:\n            if self.last_2d_det is not None: #TODO: This part\n                # print(self.last_2d_det.to_xywh(), self.mean[:4])\n                ret = self.last_2d_det.to_xywh()\n            else:\n                ret = self.mean[:4].copy()\n        else:\n            mean, _ = IMMFilter2D.combine_states(self.mean, self.covariance, self.model_probabilities)\n            ret = mean[:4].copy()\n        ret[:2] -= ret[2:] / 2\n        return ret\n\n    def to_tlbr(self):\n        \"\"\"Get current position in bounding box format `(min x, miny, max x,\n        max y)`.\n\n        Returns\n        -------\n        ndarray\n            The bounding box.\n\n        \"\"\"\n        ret = self.to_tlwh(None)\n        ret[2:] = ret[:2] + ret[2:]\n        return ret\n\n    def update_feature(self, img, appearance_model):\n\n        x = round(self.mean[0])\n        y = round(self.mean[1])\n        a = self.mean[2]\n        box_h = int(round(self.mean[3]))\n\n        x1 = int(round(x - (x / 2)))\n        y1 = int(round(y - (y / 2)))\n        box_w = int(round(a * box_h))\n\n        Tensor = torch.cuda.FloatTensor if self.cuda else torch.FloatTensor\n\n        # patch = torch.Tensor(img[y1:y1+box_h, x1:x1+box_w, :]).type(Tensor).permute(2,0,1)\n        patch = img[:, y1:y1+box_h, x1:x1+box_w]\n\n        if patch is None or patch.nelement()==0:\n            return None\n        patch = patch.unsqueeze(0)\n\n        with torch.no_grad():\n            feature ,_ = appearance_model(patch)\n\n            return feature.squeeze(0)\n\n    def predict(self, kf):\n        \"\"\"Propagate the state distribution to the current time step using a\n        Kalman filter prediction step.\n\n        Parameters\n        ----------\n        kf : kalman_filter.KalmanFilter\n            The Kalman filter.\n\n        \"\"\"\n        if self.model_probabilities is None:\n            self.mean, self.covariance = kf.predict(self.mean, self.covariance, self.last_detection, self.next_to_last_detection)\n        else:\n            self.mean, self.covariance, self.model_probabilities = kf.predict(self.mean, self.covariance, self.model_probabilities)\n        self.age += 1\n        self.time_since_update += 1\n\n    # @profile\n    def update(self, kf, detection, detections_3d=None,\n                marginalization=None, detection_idx=None, JPDA=False,\n                cur_frame = None, appearance_model = None, lstm = None,\n                only_feature=False):\n        \"\"\"Perform Kalman filter measurement update step and update the feature\n        cache.\n\n        Parameters\n        ----------\n        kf : kalman_filter.KalmanFilter\n            The Kalman filter.\n        detection : Detection\n            The associated detection.\n\n        \"\"\"\n        if JPDA:\n            detections = [det.to_xywh() for det in detection]\n            if self.model_probabilities is None:\n                self.mean, self.covariance = kf.update(\n                    self.mean, self.covariance, detections, marginalization, JPDA)\n            else:\n                self.mean, self.covariance, self.model_probabilities = kf.update(self.mean, self.covariance, detections, self.model_probabilities, marginalization, JPDA)\n            self.feature_update(detection, detection_idx, lstm)\n            if np.argmax(marginalization) != 0:\n                self.matched=True\n            else:\n                self.matched=False\n            if detection_idx < 0:\n                self.last_2d_det = None\n                return\n            self.hits += 1\n            self.time_since_update = 0\n            detection = detection[detection_idx]\n            self.last_2d_det = detection\n\n        else:\n            detection = detection[detection_idx]\n            if self.model_probabilities is None:\n                self.mean, self.covariance = kf.update(\n                    self.mean, self.covariance, detection.to_xywh())\n            else:\n                self.mean, self.covariance, self.model_probabilities = kf.update(self.mean, self.covariance, detection.to_xyah(), self.model_probabilities)\n            self.feature_update(detection.feature, detection.appearance_feature, lstm)\n            self.hits += 1\n            self.time_since_update = 0\n        if detection.box_3d is not None:\n            self.next_to_last_detection = self.last_detection\n            self.last_detection = detection\n        if self.age==2:\n            self.update_velocity(detection.to_xywh())\n        if self.state == TrackState.Tentative and self.hits >= self._n_init:\n            self.state = TrackState.Confirmed\n    \n    def delete_track(self):\n        self.state = TrackState.Deleted\n    \n    def mark_missed(self):\n        \"\"\"Mark this track as missed (no association at the current time step).\n        \"\"\"\n        if self.state == TrackState.Tentative:\n            self.state = TrackState.Deleted\n        elif self.time_since_update > self._max_age:\n            self.state = TrackState.Deleted\n\n    def update_velocity(self, new_detection):\n        if self.model_probabilities is not None:\n            for kf_n in range(2):\n                velocity_estimate = new_detection - self.first_detection\n                self.mean[kf_n,4:] = velocity_estimate[kf_n,:4]\n                # Reduce covariance of velocity by 4 times (half the standard deviation)\n                self.covariance[kf_n,:,4:] /= 4\n                self.covariance[kf_n,4:,:] /= 4\n        else:\n            velocity_estimate = new_detection - self.first_detection\n            self.mean[4:] = velocity_estimate[:4]\n            # Reduce covariance of velocity by 4 times (half the standard deviation)\n            self.covariance[:,4:] /= 4\n            self.covariance[4:,:4] /= 4\n\n    def is_tentative(self):\n        \"\"\"Returns True if this track is tentative (unconfirmed).\n        \"\"\"\n        return self.state == TrackState.Tentative\n\n    def is_confirmed(self):\n        \"\"\"Returns True if this track is confirmed.\"\"\"\n        return self.state == TrackState.Confirmed\n\n    def is_deleted(self):\n        \"\"\"Returns True if this track is dead and should be deleted.\"\"\"\n        return self.state == TrackState.Deleted\n    \n    def is_exiting(self):\n        return self.exiting\n    \n    def mark_exiting(self):\n        self.exiting = True\n\n    def feature_update(self, detections, detection_idx, lstm, JPDA=False, marginalization=None):\n        if JPDA:\n            features=[d.feature for d in detections]\n            appearance_features=[d.appearance_feature for d in detections]\n            if len([i for i in features if i is None])==0:\n                combined_feature=np.sum(np.array(features).reshape(len(features), -1)\n                                        *marginalization[1:].reshape(-1, 1), axis=0).astype(np.float32)\n                self.features.append(combined_feature)\n            if len([i for i in appearance_features if i is None])==0:\n                combined_feature=np.sum(\n                                np.array(appearance_features).reshape(len(appearance_features), -1)\n                                *marginalization[1:].reshape(-1, 1), axis=0).astype(np.float32)\n                self.features_2d.append(combined_feature)\n        else:\n            feature = detections[detection_idx].feature\n            appearance_feature = detections[detection_idx].appearance_feature\n            if feature is not None:\n                if lstm is not None:\n                    input_feature = torch.Tensor(feature).type(self.tensor)\n                    input_feature = input_feature.unsqueeze(0)\n                    with torch.no_grad():\n                        if self.hidden is None:\n                            output_feature, self.hidden = lstm(input_feature)\n                        else:\n                            output_feature, self.hidden = lstm(input_feature, self.hidden)\n                    output_feature = output_feature.cpu().numpy().squeeze(0)\n                else:\n                    output_feature = feature\n                # print(\"track:\", self.track_id, \"original\", len(self.features), \"2D\", len(self.features_2d))\n                self.features.append(output_feature)\n                # diffs = [] #TODO: REMOVE\n                # for i in range(len(self.features)-1):\n                #     diffs.append(np.linalg.norm(self.features[i],self.features[i+1]))\n                # diffs = np.asarray(diffs)\n                # print(\"track:\", self.track_id, \"count:\", len(self.features),\"mean\", np.mean(diffs), \"std\", np.std(diffs))\n            if appearance_feature is not None:\n                self.features_2d.append(appearance_feature)\n"
  },
  {
    "path": "paper_experiments/utils/track_3d.py",
    "content": "# vim: expandtab:ts=4:sw=4\nimport numpy as np\nimport pdb\nimport torch\n\nclass TrackState:\n    \"\"\"\n    Enumeration type for the single target track state. Newly created tracks are\n    classified as `tentative` until enough evidence has been collected. Then,\n    the track state is changed to `confirmed`. Tracks that are no longer alive\n    are classified as `deleted` to mark them for removal from the set of active\n    tracks.\n\n    \"\"\"\n\n    Tentative = 1\n    Confirmed = 2\n    Deleted = 3\n\n\nclass Track_3d:\n    \"\"\"\n    A single target track with state space `(x, y, a, h)` and associated\n    velocities, where `(x, y)` is the center of the bounding box, `a` is the\n    aspect ratio and `h` is the height.\n\n    Parameters\n    ----------\n    mean : ndarray\n        Mean vector of the initial state distribution.\n    covariance : ndarray\n        Covariance matrix of the initial state distribution.\n    track_id : int\n        A unique track identifier.\n    n_init : int\n        Number of consecutive detections before the track is confirmed. The\n        track state is set to `Deleted` if a miss occurs within the first\n        `n_init` frames.\n    max_age : int\n        The maximum number of consecutive misses before the track state is\n        set to `Deleted`.\n    feature : Optional[ndarray]\n        Feature vector of the detection this track originates from. If not None,\n        this feature is added to the `features` cache.\n\n    Attributes\n    ----------\n    mean : ndarray\n        Mean vector of the initial state distribution.\n    covariance : ndarray\n        Covariance matrix of the initial state distribution.\n    track_id : int\n        A unique track identifier.\n    hits : int\n        Total number of measurement updates.\n    age : int\n        Total number of frames since first occurance.\n    time_since_update : int\n        Total number of frames since last measurement update.\n    state : TrackState\n        The current track state.\n    features : List[ndarray]\n        A cache of features. On each measurement update, the associated feature\n        vector is added to this list.\n\n    \"\"\"\n    def __init__(self, mean, covariance, track_id, n_init, max_age,\n                 feature=None, appearance_feature = None, cuda = False, lstm = None):\n\n        self.mean = mean\n        self.covariance = covariance\n        self.track_id = track_id\n        self.hits = 1\n        self.age = 1\n        self.time_since_update = 0\n        self.tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor\n        self.cuda = cuda\n        self.state = TrackState.Tentative\n        self.features = []\n        self.features_2d = []\n        self.hidden = None\n        if lstm is None:\n            self.features.append(feature)\n            self.features_2d.append(appearance_feature)\n        else:\n            self.feature_update(feature, appearance_feature, lstm)\n        self.first_detection = mean[:7]\n        self._n_init = n_init\n        if self.state == TrackState.Tentative and self.hits >= self._n_init:\n            self.state = TrackState.Confirmed\n        self._max_age = max_age\n        self.matched = True\n        self.exiting = False\n\n\n    def to_tlwh3d(self):\n        \"\"\"Get current position in bounding box format `(box center of bottom face [x, y, z], l, w, h)`.\n\n        Returns\n        -------\n        ndarray\n            The bounding box.\n\n        \"\"\"\n        ret = self.mean[[0,1,2,3,4,5,6]].copy()\n\n        return ret\n\n    def to_tlwh(self, kf):\n        \"\"\"Get current position in bounding box format `(box center of bottom face [x, y, z], l, w, h)`.\n\n        Returns\n        -------\n        ndarray\n            The bounding box.\n\n        \"\"\"\n        corner_points, _ = kf.calculate_corners(kf)\n        min_x, min_y = np.amin(corner_points, axis = 0)[:2]\n        max_x, max_y = np.amax(corner_points, axis = 0)[:2]\n        ret = np.array([min_x, min_y, max_x - min_x, max_y - min_y])\n        return ret\n\n    def predict(self, kf):\n        \"\"\"Propagate the state distribution to the current time step using a\n        Kalman filter prediction step.\n\n        Parameters\n        ----------\n        kf : kalman_filter.KalmanFilter\n            The Kalman filter.\n\n        \"\"\"\n        self.mean, self.covariance = kf.predict(self.mean, self.covariance)\n        self.age += 1\n        self.time_since_update += 1\n\n    # @profile\n    def update(self, kf, detection, compare_2d=False,\n                marginalization=None, detection_idx=None, JPDA=False, lstm = None):\n        \"\"\"Perform Kalman filter measurement update step and update the feature\n        cache.\n\n        Parameters\n        ----------\n        kf : kalman_filter.KalmanFilter\n            The Kalman filter.\n        detection : Detection\n            The associated detection.\n\n        \"\"\"\n\n        if JPDA:\n\n            detections_2d = [det.tlwh for det in detection]\n            if compare_2d:\n                detections_3d = None\n            else:\n                detections_3d = [det.box_3d for det in detection]\n            self.mean, self.covariance, self.mean_post_3d = kf.update(\n                self.mean, self.covariance, detections_2d, detections_3d, marginalization, JPDA)\n\n            if detection_idx < 0:\n                return\n            detection = detection[detection_idx]\n\n        else:\n            detection = detection[detection_idx]\n            detections_3d = detections_3d[detection_idx]\n            self.mean, self.covariance = kf.update(\n                self.mean, self.covariance, detection.tlwh, detections_3d)\n\n        self.hits += 1\n        self.time_since_update = 0\n        if self.state == TrackState.Tentative and self.hits >= self._n_init:\n            self.state = TrackState.Confirmed\n\n    def mark_missed(self):\n        \"\"\"Mark this track as missed (no association at the current time step).\n        \"\"\"\n        if self.state == TrackState.Tentative:\n            self.state = TrackState.Deleted\n        elif self.time_since_update > self._max_age:\n            self.state = TrackState.Deleted\n\n    def is_tentative(self):\n        \"\"\"Returns True if this track is tentative (unconfirmed).\n        \"\"\"\n        return self.state == TrackState.Tentative\n\n    def is_confirmed(self):\n        \"\"\"Returns True if this track is confirmed.\"\"\"\n        return self.state == TrackState.Confirmed\n\n    def is_deleted(self):\n        \"\"\"Returns True if this track is dead and should be deleted.\"\"\"\n        return self.state == TrackState.Deleted\n\n"
  },
  {
    "path": "paper_experiments/utils/tracker.py",
    "content": "# vim: expandtab:ts=4:sw=4\nfrom __future__ import absolute_import\nimport numpy as np\nimport pdb\nfrom . import kf_2d, kf_3d, double_measurement_kf, imm\nfrom . import linear_assignment\nfrom . import iou_matching\nfrom .track import Track\nfrom . import JPDA_matching\nfrom . import tracking_utils\nimport math\nfrom nn_matching import NearestNeighborDistanceMetric\nimport cv2\n\n\nclass Tracker:\n    \"\"\"\n    This is the multi-target tracker.\n\n    Parameters\n    ----------\n    metric : nn_matching.NearestNeighborDistanceMetric\n        A distance metric for measurement-to-track association.\n    max_age : int\n        Maximum number of missed misses before a track is deleted.\n    n_init : int\n        Number of consecutive detections before the track is confirmed. The\n        track state is set to `Deleted` if a miss occurs within the first\n        `n_init` frames.\n\n    Attributes\n    ----------\n    metric : nn_matching.NearestNeighborDistanceMetric\n        The distance metric used for measurement to track association.\n    max_age : int\n        Maximum number of missed misses before a track is deleted.\n    n_init : int\n        Number of frames that a track remains in initialization phase.\n    kf : EKF.KalmanFilter\n        A Kalman filter to filter target trajectories in image space.\n    tracks : List[Track]\n        The list of active tracks at the current time step.\n\n    \"\"\"\n\n    def __init__(self, max_age=5, n_init=3,\n                 JPDA=False, m_best_sol=1, assn_thresh=0.0,\n                 matching_strategy=None,\n                 kf_appearance_feature=None,\n                 gate_full_state=False, lstm = None, cuda = False, appearance_model = None,\n                 calib = None, kf_vel_params=(1./20, 1./160, 1, 1, 2), dummy_node_cost_iou=0.4, dummy_node_cost_app=0.2, nn_budget = None, use_imm=False, kf_walk_params=(1./20, 1./160, 1, 1, 2),\n                 markov=(0.9, 0.7), uncertainty_limit=1.8, optical_flow=False, gate_limit=400):\n\n        self.max_age = max_age\n        self.n_init = n_init\n        self.metric = NearestNeighborDistanceMetric(\"euclidean\", nn_budget)\n        if not use_imm:\n            self.kf = kf_2d.KalmanFilter2D(*kf_vel_params, gate_limit)\n            self.use_imm = False\n        else:\n            self.kf = imm.IMMFilter2D(kf_vel_params, kf_walk_params, markov=markov)\n            self.use_imm = True\n        self.tracks = []\n        self._next_id = 1\n        self.JPDA = JPDA\n        self.m_best_sol = m_best_sol\n        self.assn_thresh = assn_thresh\n        self.matching_strategy = matching_strategy\n        self.kf_appearance_feature = kf_appearance_feature\n        self.gate_only_position = not gate_full_state\n        self.lstm = lstm\n        self.cuda = cuda\n        self.dummy_node_cost_app = dummy_node_cost_app\n        self.dummy_node_cost_iou = dummy_node_cost_iou\n        self.appearance_model = appearance_model\n        self.prev_frame = None\n        self.uncertainty_limit = uncertainty_limit\n        self.optical_flow = optical_flow\n\n    # @profile\n    def gated_metric(self, tracks, dets, track_indices, detection_indices, compare_2d = False):\n        targets = np.array([tracks[i].track_id for i in track_indices])\n        if not compare_2d and self.metric.check_samples(targets):\n            compare_2d = True\n        if compare_2d:\n            features = np.array([dets[i].appearance_feature for i in detection_indices])\n        else:\n            features = np.array([dets[i].feature for i in detection_indices])\n        #cost_matrix = self.metric.distance(features, targets, compare_2d)\n        cost_matrix_appearance = self.metric.distance_torch(features, targets, compare_2d)\n        cost_matrix_iou = iou_matching.iou_cost(tracks, dets, track_indices, detection_indices)\n\n        gate_mask = linear_assignment.gate_cost_matrix(\n            self.kf, tracks, dets, track_indices,\n            detection_indices, only_position=self.gate_only_position)\n        cost_matrix = np.dstack((cost_matrix_appearance, cost_matrix_iou))\n\n        return cost_matrix, gate_mask\n\n    def predict(self):\n        \"\"\"Propagate track state distributions one time step forward.\n\n        This function should be called once every time step, before `update`.\n        \"\"\"\n        for track in self.tracks:\n            track.predict(self.kf)\n\n    # @profile\n    def update(self, cur_frame, detections, compare_2d = False):\n        \"\"\"Perform measurement update and track management.\n\n        Parameters\n        ----------\n        detections : List[deep_sort.detection.Detection]\n            A list of detections at the current time step.\n\n        \"\"\"\n        \n        self.cur_frame = cv2.cvtColor((255*cur_frame).permute(1,2,0).cpu().numpy(), cv2.COLOR_BGR2GRAY)\n\n        matches, unmatched_tracks, unmatched_detections = \\\n            self._match(detections, compare_2d)\n\n        # update filter for each assigned track\n        # Only do this for non-JPDA because in JPDA the kf states are updated\n        # during the matching process\n        if not self.JPDA:\n            # Map matched tracks to detections\n            track_detection_map = {t:d for (t,d) in matches}\n            # Map unmatched tracks to -1 for no detection\n            for t in unmatched_tracks:\n                track_detection_map[t] = -1\n            \n            for track_idx, detection_idx in matches:\n                self.tracks[track_idx].update(self.kf, detections,\n                        detection_idx=detection_idx, JPDA=self.JPDA, \n                        cur_frame = self.cur_frame, appearance_model = self.appearance_model, \n                        lstm = self.lstm)\n\n        # update track state for unmatched tracks\n        for track_idx in unmatched_tracks:\n            self.tracks[track_idx].mark_missed()\n\n        # create new tracks\n        self.prune_tracks()\n        flow = None\n        if unmatched_detections:\n            if self.optical_flow and self.prev_frame is not None:\n                flow = cv2.calcOpticalFlowFarneback(self.prev_frame, self.cur_frame, None, 0.5, 3, 15, 3, 5, 1.2, 0)\n\n        for detection_idx in unmatched_detections:\n            self._initiate_track(detections[detection_idx], flow)\n\n        # Update distance metric.\n        active_targets = [t.track_id for t in self.tracks]\n        features, features_2d, targets, targets_2d = [], [], [], []\n        for track in self.tracks:\n            features += track.features\n            features_2d += track.features_2d\n            targets += [track.track_id for _ in track.features]\n            targets_2d += [track.track_id for _ in track.features_2d]\n            track.features = []\n            track.features_2d = []\n\n        self.metric.partial_fit(\n            np.asarray(features), np.asarray(features_2d), np.asarray(targets), np.asarray(targets_2d), active_targets)\n        self.prev_frame = self.cur_frame\n\n    # @profile\n    def _match(self, detections, compare_2d):\n\n        # Associate all tracks using combined cost matrices.\n        if self.JPDA:\n            # Run JPDA on all tracks\n            marginalizations = \\\n            linear_assignment.JPDA(self.gated_metric, self.dummy_node_cost_app, self.dummy_node_cost_iou, self.tracks, \\\n                detections, m=self.m_best_sol, compare_2d = compare_2d)\n            # for track in self.tracks: #TODO: REMOVE\n            #     print(track.track_id)\n            # print(marginalizations)\n\n            jpda_matcher = JPDA_matching.Matcher(\n                detections, marginalizations, range(len(self.tracks)),\n                self.matching_strategy, assignment_threshold=self.assn_thresh)\n            matches_a, unmatched_tracks_a, unmatched_detections = jpda_matcher.match()\n\n            # Map matched tracks to detections\n            # Map matched tracks to detections\n            track_detection_map = {t:d for (t,d) in matches_a}\n            # Map unmatched tracks to -1 for no detection\n            for t in unmatched_tracks_a:\n                track_detection_map[t] = -1\n            # update Kalman state\n            if marginalizations.shape[0] > 0:\n                for i in range(len(self.tracks)):\n                    self.tracks[i].update(self.kf, detections,\n                        marginalization=marginalizations[i,:], detection_idx=track_detection_map[i], \n                        JPDA=self.JPDA, cur_frame = self.cur_frame, appearance_model = self.appearance_model, lstm = self.lstm)\n        else:\n            confirmed_tracks = [i for i, t in enumerate(self.tracks) if t.is_confirmed()]\n            matches_a, unmatched_tracks_a, unmatched_detections = \\\n                linear_assignment.matching_cascade(\n                    self.gated_metric, self.dummy_node_cost_iou, self.max_age,\n                    self.tracks, detections, confirmed_tracks, compare_2d = compare_2d)\n        return matches_a, unmatched_tracks_a, unmatched_detections\n\n    def _initiate_track(self, detection, flow=None):\n        if self.use_imm:\n            mean, covariance, model_probabilities = self.kf.initiate(detection.to_xywh(), flow)\n        else:\n            mean, covariance = self.kf.initiate(detection.to_xywh(), flow)\n            model_probabilities = None\n\n        self.tracks.append(Track(\n            mean, covariance, model_probabilities, self._next_id, self.n_init, self.max_age,\n            kf_appearance_feature = self.kf_appearance_feature,\n            feature=detection.feature, appearance_feature = detection.appearance_feature,\n            cuda = self.cuda, lstm = self.lstm, last_det = detection))\n        self._next_id += 1\n    \n    def prune_tracks(self):\n        h, w = self.cur_frame.shape\n        for track in self.tracks:\n            # Check if track is leaving\n            if self.use_imm:\n                predicted_mean, predicted_cov = self.kf.combine_states(track.mean, track.covariance, track.model_probabilities) #TODO: This doesn't predict. Mean should def predict\n            else:\n                predicted_mean = self.kf.predict_mean(track.mean)\n                predicted_cov = track.covariance\n            predicted_pos = predicted_mean[:2]\n            predicted_vel = predicted_mean[4:6]\n            predicted_pos[0] -= w/2\n            predicted_pos[1] -= h/2\n\n            cos_theta = np.dot(predicted_pos, predicted_vel)/(np.linalg.norm(predicted_pos)*\n                                                    np.linalg.norm(predicted_vel) + 1e-6)\n            predicted_pos[0] += w/2\n            predicted_pos[1] += h/2\n            # Thresholds for deciding whether track is outside image\n            BORDER_VALUE = 0\n            if (cos_theta > 0 and\n                (predicted_pos[0] - track.mean[2]/2<= BORDER_VALUE or\n                predicted_pos[0] + track.mean[2]/2 >= w - BORDER_VALUE)):\n                if track.is_exiting() and not track.matched:\n                    track.delete_track()\n                else:\n                    track.mark_exiting()\n            # Check if track is too uncertain\n            # cov_axis,_ = np.linalg.eigh(predicted_cov)\n            # if np.abs(np.sqrt(cov_axis[-1]))*6 > self.uncertainty_limit*np.linalg.norm(predicted_mean[2:4]):\n            #    track.delete_track()\n        self.tracks = [t for t in self.tracks if not t.is_deleted()]\n"
  },
  {
    "path": "paper_experiments/utils/tracker_3d.py",
    "content": "# vim: expandtab:ts=4:sw=4\nfrom __future__ import absolute_import\nimport numpy as np\nimport pdb\nfrom . import double_measurement_kf\nfrom . import linear_assignment\nfrom . import iou_matching\nfrom .track_3d import Track_3d\nfrom . import JPDA_matching\nfrom . import tracking_utils\nimport math\nfrom nn_matching import NearestNeighborDistanceMetric\n\nclass Tracker_3d:\n    \"\"\"\n    This is the multi-target tracker.\n\n    Parameters\n    ----------\n    metric : nn_matching.NearestNeighborDistanceMetric\n        A distance metric for measurement-to-track association.\n    max_age : int\n        Maximum number of missed misses before a track is deleted.\n    n_init : int\n        Number of consecutive detections before the track is confirmed. The\n        track state is set to `Deleted` if a miss occurs within the first\n        `n_init` frames.\n\n    Attributes\n    ----------\n    metric : nn_matching.NearestNeighborDistanceMetric\n        The distance metric used for measurement to track association.\n    max_age : int\n        Maximum number of missed misses before a track is deleted.\n    n_init : int\n        Number of frames that a track remains in initialization phase.\n    kf : EKF.KalmanFilter\n        A Kalman filter to filter target trajectories in image space.\n    tracks : List[Track]\n        The list of active tracks at the current time step.\n\n    \"\"\"\n\n    def __init__(self, max_age=30, n_init=3,\n                 JPDA=False, m_best_sol=1, assn_thresh=0.0,\n                 matching_strategy=None, appearance_model = None,\n                 gate_full_state=False, lstm = None, cuda = False, calib=None, omni=False,\n                 kf_vel_params=(1./20, 1./160, 1, 1, 2), dummy_node_cost=0.2, nn_budget = None, use_imm=False,\n                 markov=(0.9, 0.7), uncertainty_limit=1.8, optical_flow=False, gate_limit=400):\n\n        self.metric = NearestNeighborDistanceMetric(\"euclidean\", nn_budget)\n        self.max_age = max_age\n        self.n_init = n_init\n        self.kf = double_measurement_kf.KF_3D(calib, *kf_vel_params, omni=omni)\n        self.tracks = []\n        self._next_id = 1\n        self.JPDA = JPDA\n        self.m_best_sol = m_best_sol\n        self.assn_thresh = assn_thresh\n        self.matching_strategy = matching_strategy\n        self.gate_only_position = not gate_full_state\n        self.lstm = lstm\n        self.cuda = cuda\n        self.dummy_node_cost = dummy_node_cost\n        self.appearance_model = appearance_model\n\n    # @profile\n    def gated_metric(self, tracks, dets, track_indices, detection_indices, compare_2d=None):\n        targets = np.array([tracks[i].track_id for i in track_indices])\n        if not compare_2d and self.metric.check_samples(targets):\n            compare_2d = True\n        if compare_2d:\n            features = np.array([dets[i].appearance_feature for i in detection_indices])\n        else:\n            features = np.array([dets[i].feature for i in detection_indices])\n        #cost_matrix = self.metric.distance(features, targets, compare_2d)\n        cost_matrix_appearance = self.metric.distance_torch(features, targets, compare_2d)\n        use_3d = True\n        for i in detection_indices:\n            if dets[i].box_3d is None:\n                use_3d = False\n                break\n        if use_3d:\n            cost_matrix_iou = iou_matching.iou_cost(tracks, dets, track_indices, detection_indices, use3d=use_3d)\n        else:\n            cost_matrix_iou = np.ones(cost_matrix_appearance.shape)\n        kf = self.kf\n        dets_for_gating = dets\n\n        gate_mask = linear_assignment.gate_cost_matrix(\n            kf, tracks, dets_for_gating, track_indices,\n            detection_indices, only_position=self.gate_only_position, use3d=use_3d)\n        cost_matrix = np.dstack((cost_matrix_appearance, cost_matrix_iou))\n\n        return cost_matrix, gate_mask\n\n    def predict(self):\n        \"\"\"Propagate track state distributions one time step forward.\n\n        This function should be called once every time step, before `update`.\n        \"\"\"\n        for track in self.tracks:\n            track.predict(self.kf)\n\n    # @profile\n    def update(self, input_img, detections, compare_2d):\n        \"\"\"Perform measurement update and track management.\n\n        Parameters\n        ----------\n        detections : List[deep_sort.detection.Detection]\n            A list of detections at the current time step.\n\n        \"\"\"\n\n        matches, unmatched_tracks, unmatched_detections = \\\n            self._match(detections, compare_2d)\n\n        # update filter for each assigned track\n        # Only do this for non-JPDA because in JPDA the kf states are updated\n        # during the matching process\n        \n        if not self.JPDA:\n            # Map matched tracks to detections\n            track_detection_map = {t:d for (t,d) in matches}\n            # Map unmatched tracks to -1 for no detection\n            for t in unmatched_tracks:\n                track_detection_map[t] = -1\n            \n            for track_idx, detection_idx in matches:\n                self.tracks[track_idx].update(self.kf, detections,\n                        detection_idx=detection_idx, JPDA=self.JPDA, \n                        cur_frame = self.cur_frame, appearance_model = self.appearance_model, \n                        lstm = self.lstm)\n        # update track state for unmatched tracks\n        for track_idx in unmatched_tracks:\n            self.tracks[track_idx].mark_missed()\n        \n        self.prune_tracks()\n        # create new tracks\n        for detection_idx in unmatched_detections:\n            self._initiate_track(detections[detection_idx])\n\n         # Update distance metric.\n        active_targets = [t.track_id for t in self.tracks]\n        features, features_2d, targets, targets_2d = [], [], [], []\n        for track in self.tracks:\n            features += track.features\n            features_2d += track.features_2d\n            targets += [track.track_id for _ in track.features]\n            targets_2d += [track.track_id for _ in track.features_2d]\n            track.features = []\n            track.features_2d = []\n\n        self.metric.partial_fit(\n            np.asarray(features), np.asarray(features_2d), np.asarray(targets), np.asarray(targets_2d), active_targets)\n\n    # @profile\n    def _match(self, detections, compare_2d):\n\n        # Associate confirmed tracks using appearance features.\n        if self.JPDA:\n            # Only run JPDA on confirmed tracks\n            marginalizations = \\\n            linear_assignment.JPDA(self.gated_metric, self.dummy_node_cost, self.tracks, \\\n                detections, compare_2d=compare_2d)\n\n            jpda_matcher = JPDA_matching.Matcher(\n                detections, marginalizations, range(len(self.tracks)),\n                self.matching_strategy, assignment_threshold=self.assn_thresh)\n            matches_a, unmatched_tracks_a, unmatched_detections = jpda_matcher.match()\n\n            # Map matched tracks to detections\n            track_detection_map = {t:d for (t,d) in matches_a}\n\n            # Map unmatched tracks to -1 for no detection\n            for t in unmatched_tracks_a:\n                track_detection_map[t] = -1\n\n            # udpate Kalman state\n            if marginalizations.shape[0] > 0:\n                for i in range(len(self.tracks)):\n                    self.tracks[i].update(self.kf, detections,\n                        marginalization=marginalizations[i,:], detection_idx=track_detection_map[i], \n                        JPDA=self.JPDA, lstm = self.lstm)\n\n        else:\n            matches_a, unmatched_tracks_a, unmatched_detections = \\\n                linear_assignment.matching_cascade(\n                    self.gated_metric, self.metric.matching_threshold, self.max_age,\n                    self.tracks, detections, confirmed_tracks, compare_2d = compare_2d, detections_3d=detections_3d)\n\n        return matches_a, unmatched_tracks_a, unmatched_detections\n\n    def _initiate_track(self, detection):\n        if detection.box_3d is None:\n            return\n        mean, covariance = self.kf.initiate(detection.box_3d)\n        self.tracks.append(Track_3d(\n            mean, covariance, self._next_id, self.n_init, self.max_age,\n            feature=detection.feature, appearance_feature = detection.appearance_feature,\n            cuda = self.cuda, lstm = self.lstm))\n        self._next_id += 1\n    \n    def prune_tracks(self):\n\n        # for track in self.tracks:\n        #     # Check if track is leaving\n        #     predicted_mean = self.kf.predict_mean(track.mean)\n        #     predicted_cov = track.covariance\n        #     predicted_pos = predicted_mean[:2]\n        #     predicted_vel = predicted_mean[4:6]\n        #     predicted_pos[0] -= w/2\n        #     predicted_pos[1] -= h/2\n\n        #     cos_theta = np.dot(predicted_pos, predicted_vel)/(np.linalg.norm(predicted_pos)*\n        #                                             np.linalg.norm(predicted_vel) + 1e-6)\n        #     predicted_pos[0] += w/2\n        #     predicted_pos[1] += h/2\n        #     # Thresholds for deciding whether track is outside image\n        #     BORDER_VALUE = 0\n        #     if (cos_theta > 0 and\n        #         (predicted_pos[0] - track.mean[2]/2<= BORDER_VALUE or\n        #         predicted_pos[0] + track.mean[2]/2 >= w - BORDER_VALUE)):\n        #         if track.is_exiting() and not track.matched:\n        #             track.delete_track()\n        #         else:\n        #             track.mark_exiting()\n            # Check if track is too uncertain\n            # cov_axis,_ = np.linalg.eigh(predicted_cov)\n            # if np.abs(np.sqrt(cov_axis[-1]))*6 > self.uncertainty_limit*np.linalg.norm(predicted_mean[2:4]):\n            #    track.delete_track()\n        self.tracks = [t for t in self.tracks if not t.is_deleted()]\n"
  },
  {
    "path": "paper_experiments/utils/tracking_utils.py",
    "content": "import torch, sys, os, pdb\nimport numpy as np\nfrom PIL import Image\nfrom scipy.spatial import Delaunay\nsys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir)))\nfrom .aligned_reid_utils import load_state_dict\nfrom models.yolo_models import Darknet\nfrom .featurepointnet_model_util import rotate_pc_along_y\nfrom .deep_sort_utils import non_max_suppression as deepsort_nms\nimport math\nfrom .detection import Detection\n\n\ndef create_detector(config_path, weight_path, cuda):\n\n    detector = Darknet(config_path)\n    detector.load_weights(weight_path)\n    if cuda:\n        detector.cuda()\n    detector.eval()\n    return detector\n\ndef get_depth_patches(point_cloud, box_3d, ids_3d, rot_angles, num_point = 1024):\n    #print(ids_3d)\n    depth_patches = []\n    for i, box in enumerate(box_3d):\n        if ids_3d[i] == -1:\n            depth_patches.append(None)\n            continue\n        box_center = np.asarray([ [box[0], box[1], box[2]] ])\n        rotate_pc_along_y(box_center, np.pi/2 + np.squeeze(box[6]))\n        box_center = box_center[0]\n        rotate_pc_along_y(point_cloud, np.pi/2 + np.squeeze(box[6]))\n        x = point_cloud[:, 0]\n        y = point_cloud[:, 1]\n        z = point_cloud[:, 2]\n        idx_1 = np.logical_and(x >= float(box_center[0] - box[3]/2.0), x <= float(box_center[0] + box[3]/2.0))\n        idx_2 = np.logical_and(y <= (box_center[1]+0.1), y >= float(box_center[1] - box[4]))\n        idx_3 = np.logical_and(z >= float(box_center[2] - box[5]/2.0), z <= float(box_center[2] + box[5]/2.0))\n        idx = np.logical_and(idx_1, idx_2)\n        idx = np.logical_and(idx, idx_3)\n        depth_patch = point_cloud[idx, :]\n        rotate_pc_along_y(point_cloud, -(np.squeeze(box[6])+np.pi/2)) #unrotate to prep for next iteration\n        rotate_pc_along_y(depth_patch, -(np.squeeze(box[6])+np.pi/2))\n\n        if depth_patch.size == 0:\n            ids_3d[i] = -1\n            depth_patches.append(None)\n        else:\n            if depth_patch.shape[0] > num_point:\n                pc_in_box_fov = np.expand_dims(depth_patch[np.random.choice(range(depth_patch.shape[0]), size = (num_point), replace=False)], 0)\n            else:\n\n                pc_in_box_fov = np.expand_dims(\n                            np.vstack([depth_patch, \n                            depth_patch[np.random.choice(range(depth_patch.shape[0]), size = (num_point - depth_patch.shape[0]), replace=True)]])\n                            , 0)\n            depth_patches.append( get_center_view_point_set(pc_in_box_fov, rot_angles[i])[0])\n\n    return depth_patches, ids_3d\n\n\ndef non_max_suppression_3D_prime(detections, boxes_3d, ids_3d, ids_2d, nms_thresh = 1, confidence = None):\n    x = [boxes_3d[i][0] for i in range(len(boxes_3d))]\n    z = [boxes_3d[i][2] for i in range(len(boxes_3d))]\n    l = [boxes_3d[i][5] for i in range(len(boxes_3d))] #[3]\n    w = [boxes_3d[i][3] for i in range(len(boxes_3d))] #[5]\n    indices = deepsort_nms(boxes_3d, nms_thresh, np.squeeze(confidence))\n    for i in range(len(ids_3d)):\n        if i not in indices:\n            ids_3d[i] = -1\n            ids_2d[i] = -1\n            boxes_3d[i] = None\n            detections[i] = None\n    return detections, boxes_3d, ids_2d, ids_3d\n\ndef non_max_suppression_3D(depth_patches, ids_3d, ids_2d, nms_thresh = 1, confidence = None):\n    #depth_patches list of patches\n    \n    if len(depth_patches) == 0:\n        return []\n\n    pick = []\n\n    if confidence is not None:\n        idxs = np.argsort(confidence)\n    else:\n        idxs = list(range(len(depth_patches)))\n\n    while len(idxs) > 0:\n        last = len(idxs) - 1\n        i = idxs[last]\n        overlap = np.asarray([iou_3d(depth_patches[i], depth_patches[idxs[x]]) for x in range(last)])\n        if np.any(overlap == -np.inf):\n            idxs = np.delete(idxs, [last])\n            continue\n        pick.append(i)        \n        idxs = np.delete(\n            idxs, np.concatenate(\n                ([last], np.where(overlap > nms_thresh)[0])))\n    for i in range(len(depth_patches)):\n        if i not in pick:\n            if ids_3d[i]!=-1:\n                ids_2d[i] = -1\n            ids_3d[i] = -1\n    return depth_patches, ids_3d, ids_2d\n\ndef iou_3d(patch_1, patch_2):\n    #Expecting patches of shape (N, 4) or (N,3) (numpy arrays)\n    if patch_2 is None:\n        return np.inf\n    elif patch_1 is None:\n        return -np.inf\n    # Unique points\n    patch_unique_1 = np.unique(patch_1, axis = 0)\n    patch_unique_2 = np.unique(patch_2, axis = 0)\n    intersection_points = 0\n    for point_1_idx in range(patch_unique_1.shape[0]):\n        point_distance = np.sqrt(np.sum((patch_unique_1[point_1_idx]-patch_unique_2)**2, axis = 1))\n        intersection_points += np.any(point_distance<0.3)\n\n    union_points = patch_unique_1.shape[0] + patch_unique_2.shape[0] - intersection_points\n\n    iou = intersection_points/union_points\n\n    return iou\n\ndef convert_detections(detections, features, appearance_features, detections_3d):\n    detection_list = []\n    if detections_3d is None:\n        detections_3d = [None] * len(detections)\n    for detection, feature, appearance_feature, detection_3d in zip(detections, features, appearance_features, detections_3d):\n        x1, y1, x2, y2, conf, _, _ = detection\n        box_2d = [x1, y1, x2-x1, y2-y1]\n        if detection_3d is not None:\n            x, y, z, l, w, h, theta = detection_3d\n            box_3d = [x, y, z, l, w, h, theta]\n        else:\n            box_3d = None\n        if feature is None:\n            detection_list.append(Detection(box_2d, None, conf, appearance_feature.cpu(), feature))\n        else:\n            detection_list.append(Detection(box_2d, box_3d, conf, appearance_feature.cpu(), feature.cpu()))\n\n    return detection_list\n\ndef combine_features(features, depth_features, ids_3d, combination_model, depth_weight = 1):\n\n    combined_features = []\n    appearance_features = []\n    for i, (appearance_feature, depth_feature) in enumerate(zip(features, depth_features)):\n        if ids_3d[i] == -1:\n            depth_feature = torch.zeros(512, device=torch.device(\"cuda\"))\n        combined_features.append(torch.cat([appearance_feature, depth_feature* depth_weight]))\n        appearance_features.append(appearance_feature)\n\n    if combination_model is not None and len(combined_features) > 0:\n        combination_model.eval()\n        combined_feature = torch.stack(combined_features)\n        combined_features = combination_model(combined_feature).detach()\n        combined_features = list(torch.unbind(combined_features))\n    return combined_features, appearance_features\n\ndef filter(detections):\n    for i, det in enumerate(detections): #Note image is 1242 x 375\n        left = det[0]\n        top = det[1]\n        right = det[2]\n        bottom = det[3]\n        if (left < 10 or right > 1232) and (top < 10 or bottom > 365):\n            detections[i] = None\n    return detections"
  },
  {
    "path": "paper_experiments/utils/visualise.py",
    "content": "import matplotlib\nimport matplotlib.pyplot as plt\nimport matplotlib.patches as plt_patches\nimport numpy as np\nimport utils.imm as imm\nfrom PIL import Image\nimport pdb\n\ndef draw_track(bbox, track = None, bbox_colors = None, det = True,\n               do_ellipse = False, axis = None, id_num = 0, do_velocity=False):\n    if axis is None:\n        axis = plt.gca()\n    if track is None:\n        color = plt.get_cmap('tab20b')(8) if det else plt.get_cmap('tab20b')(6)\n        # plt.imshow(original_img)\n        width = bbox[2]\n        height = bbox[3]\n    else:\n        color = bbox_colors[track.track_id]\n        id_num = track.track_id\n        width = bbox[2]\n        height = bbox[3]\n\n    plot_bbox = plt_patches.Rectangle((bbox[0], bbox[1]), width, height, linewidth=2,\n                        edgecolor=color,\n                        facecolor='none')\n    ax = axis\n    ax.add_patch(plot_bbox)\n    ax.text(bbox[0], bbox[1], s = id_num, color='white', verticalalignment='top',\n        bbox={'color': color, 'pad': 0})\n  \n    if do_ellipse:\n        draw_ellipse(track, color)\n    if do_velocity:\n        draw_velocity(track, color)\n\n\ndef draw_detection(detection, color='k'):\n    bbox = detection.tlwh\n    plot_bbox = plt_patches.Rectangle((bbox[0], bbox[1]), bbox[2], bbox[3], linewidth=2,\n                        edgecolor=color,\n                        facecolor = 'w',\n                        alpha = 0.5)\n    ax = plt.gca()\n    ax.add_patch(plot_bbox)\n   \ndef draw_ellipse(track, color):\n    ax = plt.gca()\n    if track.model_probabilities is not None:\n        mean, cov = imm.IMMFilter2D.combine_states(track.mean, track.covariance, track.model_probabilities)\n        # print(\"New orig mat\",track.covariance)\n        # print(\"New\",cov)\n    else:\n        mean = track.mean\n        cov = track.covariance\n        # print(\"Old\",cov)\n\n    lambda_, v = np.linalg.eig(cov[:2, :2])\n    lambda_ = np.sqrt(lambda_)\n    idx = np.argsort(lambda_)[::-1]\n    lambda_ = lambda_[idx]\n    v = v[:, idx]\n    nsigma = np.sqrt(5.99)\n    ell = plt_patches.Ellipse(xy=(mean[0], mean[1])\n                              , width= lambda_[0]*2*nsigma \n                              , height=lambda_[1]*2*nsigma\n                              , angle=np.rad2deg(np.arctan2(v[1, 0], v[0, 0]))\n                              , edgecolor=color\n                              , facecolor='none'\n                              )\n    ax.add_patch(ell)\n\ndef draw_velocity(track, color):\n    ax = plt.gca()\n    if track.model_probabilities is not None:\n        mean, cov = imm.IMMFilter2D.combine_states(track.mean, track.covariance, track.model_probabilities)\n    else:\n        mean = track.mean\n    ax.arrow(mean[0], mean[1], \n            mean[4], mean[5],\n            edgecolor=color,\n            head_width=5)\n\ndef draw_box3d(mu, color, alpha, facecolor='none', ax=None):\n    if np.any(np.isnan(mu)):\n        return\n    if ax is None:\n        ax = plt.gca()\n    x, z, l, w, theta = mu[0], mu[2], mu[3], mu[5], mu[6]\n    r = np.sqrt(w**2 + l**2)/2\n    psi = np.arctan2(w, l)\n    dx, dz = r*np.cos(psi), r*np.sin(psi)\n    rect = plt_patches.Rectangle((-dx, -dz), l, w, linewidth=2,\n                        edgecolor=color,\n                        alpha=alpha,\n                        facecolor=facecolor)\n    t = matplotlib.transforms.Affine2D().translate(x, z)\n    t = t.rotate_around(x, z, theta)\n    t_start = ax.transData\n    t_end =  t + t_start\n    rect.set_transform(t_end)\n    ax.add_patch(rect)\n\n    \ndef draw_velocity_3d(track, color, ax=None):\n    mean = track.mean\n    if ax is None:\n        ax = plt.gca()\n    x, z, vx, vz = mean[0], mean[2], mean[7], mean[8]\n    arr = plt.arrow(x, z, vx, vz,\n                        color=color,\n                        head_width=0.5,\n                        head_length=0.5)\n    ax.add_patch(arr)\n                    \ndef draw_ellipse3d(covariance, x, y, color, ax=None):\n    if np.any(np.isnan(covariance)):\n        return\n    if ax is None:\n        ax = plt.gca()\n    lambda_, v = np.linalg.eig(np.reshape(covariance[[0, 0, 2, 2], [0, 2, 0, 2]], (2,2)))\n    lambda_ = np.sqrt(lambda_)\n    idx = np.argsort(lambda_)[::-1]\n    lambda_ = lambda_[idx]\n    v = v[:, idx]\n    nsigma = np.sqrt(5.99)\n    ell = plt_patches.Ellipse(xy=(x,y)\n                              , width= lambda_[0]*2*nsigma\n                              , height=lambda_[1]*2*nsigma\n                              , angle=np.rad2deg(np.arctan2(v[1, 0], v[0, 0]))\n                              , edgecolor=color\n                              , facecolor='none'\n                              )\n    ax.add_patch(ell)\n\ndef draw_track3d(track, color, ax=None):\n    mu = track.mean\n    draw_box3d(mu, color, 1, ax=ax)\n    if ax is None:\n        ax = plt.gca()\n    x, z = mu[0], mu[2]\n    ax.text(x, z, s = track.track_id, color='white', verticalalignment='top',\n        bbox={'color': color, 'pad': 0})\n\n    draw_ellipse3d(track.covariance, x, z, color, ax)\n    draw_velocity_3d(track, color, ax)\n\ndef draw_detection3d(det, color, ax=None):\n    draw_box3d(det.box_3d, color, 0.5, color, ax=ax)"
  },
  {
    "path": "paper_experiments/utils/yolo_utils/__init__.py",
    "content": ""
  },
  {
    "path": "paper_experiments/utils/yolo_utils/datasets.py",
    "content": "import glob\nimport random\nimport os\nimport numpy as np\n\nimport torch\n\nfrom torch.utils.data import Dataset\nfrom PIL import Image\nimport torchvision.transforms as transforms\n\nimport matplotlib.pyplot as plt\nimport matplotlib.patches as patches\n\nfrom skimage.transform import resize\n\nimport sys\n\nclass ImageFolder(Dataset):\n    def __init__(self, folder_path, img_size=416):\n        self.files = sorted(glob.glob('%s/*.*' % folder_path))\n        self.img_shape = (img_size, img_size)\n\n    def __getitem__(self, index):\n        img_path = self.files[index % len(self.files)]\n        # Extract image\n        img = np.array(Image.open(img_path))\n        h, w, _ = img.shape\n        dim_diff = np.abs(h - w)\n        # Upper (left) and lower (right) padding\n        pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2\n        # Determine padding\n        pad = ((pad1, pad2), (0, 0), (0, 0)) if h <= w else ((0, 0), (pad1, pad2), (0, 0))\n        # Add padding\n        input_img = np.pad(img, pad, 'constant', constant_values=127.5) / 255.\n        # Resize and normalize\n        input_img = resize(input_img, (*self.img_shape, 3), mode='reflect', anti_aliasing = True)\n        # Channels-first\n        input_img = np.transpose(input_img, (2, 0, 1))\n        # As pytorch tensor\n        input_img = torch.from_numpy(input_img).float()\n\n        return img_path, input_img\n\n    def __len__(self):\n        return len(self.files)\n\n\nclass ListDataset(Dataset):\n    def __init__(self, list_path, img_size=416):\n        with open(list_path, 'r') as file:\n            self.img_files = file.readlines()\n        self.label_files = [path.replace('images', 'labels').replace('.png', '.txt').replace('.jpg', '.txt') for path in self.img_files]\n        self.img_shape = (img_size, img_size)\n        self.max_objects = 50\n\n    def __getitem__(self, index):\n\n        #---------\n        #  Image\n        #---------\n\n        img_path = self.img_files[index % len(self.img_files)].rstrip()\n        img = np.array(Image.open(img_path))\n\n        # Handles images with less than three channels\n        while len(img.shape) != 3:\n            index += 1\n            img_path = self.img_files[index % len(self.img_files)].rstrip()\n            img = np.array(Image.open(img_path))\n\n        h, w, _ = img.shape\n        dim_diff = np.abs(h - w)\n        # Upper (left) and lower (right) padding\n        pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2\n        # Determine padding\n        pad = ((pad1, pad2), (0, 0), (0, 0)) if h <= w else ((0, 0), (pad1, pad2), (0, 0))\n        # Add padding\n        input_img = np.pad(img, pad, 'constant', constant_values=128) / 255.\n        padded_h, padded_w, _ = input_img.shape\n        # Resize and normalize\n        input_img = resize(input_img, (*self.img_shape, 3), mode='reflect')\n        # Channels-first\n        input_img = np.transpose(input_img, (2, 0, 1))\n        # As pytorch tensor\n        input_img = torch.from_numpy(input_img).float()\n\n        #---------\n        #  Label\n        #---------\n\n        label_path = self.label_files[index % len(self.img_files)].rstrip()\n\n        labels = None\n        if os.path.exists(label_path):\n            labels = np.loadtxt(label_path).reshape(-1, 5)\n            # Extract coordinates for unpadded + unscaled image\n            x1 = w * (labels[:, 1] - labels[:, 3]/2)\n            y1 = h * (labels[:, 2] - labels[:, 4]/2)\n            x2 = w * (labels[:, 1] + labels[:, 3]/2)\n            y2 = h * (labels[:, 2] + labels[:, 4]/2)\n            # Adjust for added padding\n            x1 += pad[1][0]\n            y1 += pad[0][0]\n            x2 += pad[1][0]\n            y2 += pad[0][0]\n            # Calculate ratios from coordinates\n            labels[:, 1] = ((x1 + x2) / 2) / padded_w\n            labels[:, 2] = ((y1 + y2) / 2) / padded_h\n            labels[:, 3] *= w / padded_w\n            labels[:, 4] *= h / padded_h\n        # Fill matrix\n        filled_labels = np.zeros((self.max_objects, 5))\n        if labels is not None:\n            filled_labels[range(len(labels))[:self.max_objects]] = labels[:self.max_objects]\n        filled_labels = torch.from_numpy(filled_labels)\n\n        return img_path, input_img, filled_labels\n\n    def __len__(self):\n        return len(self.img_files)\n"
  },
  {
    "path": "paper_experiments/utils/yolo_utils/parse_config.py",
    "content": "\n\ndef parse_model_config(path):\n    \"\"\"Parses the yolo-v3 layer configuration file and returns module definitions\"\"\"\n    file = open(path, 'r')\n    lines = file.read().split('\\n')\n    lines = [x for x in lines if x and not x.startswith('#')]\n    lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces\n    module_defs = []\n    for line in lines:\n        if line.startswith('['): # This marks the start of a new block\n            module_defs.append({})\n            module_defs[-1]['type'] = line[1:-1].rstrip()\n            if module_defs[-1]['type'] == 'convolutional':\n                module_defs[-1]['batch_normalize'] = 0\n        else:\n            key, value = line.split(\"=\")\n            value = value.strip()\n            module_defs[-1][key.rstrip()] = value.strip()\n\n    return module_defs\n\ndef parse_data_config(path):\n    \"\"\"Parses the data configuration file\"\"\"\n    options = dict()\n    options['gpus'] = '0,1,2,3'\n    options['num_workers'] = '10'\n    with open(path, 'r') as fp:\n        lines = fp.readlines()\n    for line in lines:\n        line = line.strip()\n        if line == '' or line.startswith('#'):\n            continue\n        key, value = line.split('=')\n        options[key.strip()] = value.strip()\n    return options\n"
  },
  {
    "path": "paper_experiments/utils/yolo_utils/utils.py",
    "content": "from __future__ import division\nimport math\nimport time\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom torch.autograd import Variable\nimport numpy as np\n\nimport matplotlib.pyplot as plt\nimport matplotlib.patches as patches\n\n\ndef load_classes(path):\n    \"\"\"\n    Loads class labels at 'path'\n    \"\"\"\n    fp = open(path, \"r\")\n    names = fp.read().split(\"\\n\")[:-1]\n    return names\n\n\ndef weights_init_normal(m):\n    classname = m.__class__.__name__\n    if classname.find(\"Conv\") != -1:\n        torch.nn.init.normal_(m.weight.data, 0.0, 0.02)\n    elif classname.find(\"BatchNorm2d\") != -1:\n        torch.nn.init.normal_(m.weight.data, 1.0, 0.02)\n        torch.nn.init.constant_(m.bias.data, 0.0)\n\n\ndef compute_ap(recall, precision):\n    \"\"\" Compute the average precision, given the recall and precision curves.\n    Code originally from https://github.com/rbgirshick/py-faster-rcnn.\n\n    # Arguments\n        recall:    The recall curve (list).\n        precision: The precision curve (list).\n    # Returns\n        The average precision as computed in py-faster-rcnn.\n    \"\"\"\n    # correct AP calculation\n    # first append sentinel values at the end\n    mrec = np.concatenate(([0.0], recall, [1.0]))\n    mpre = np.concatenate(([0.0], precision, [0.0]))\n\n    # compute the precision envelope\n    for i in range(mpre.size - 1, 0, -1):\n        mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])\n\n    # to calculate area under PR curve, look for points\n    # where X axis (recall) changes value\n    i = np.where(mrec[1:] != mrec[:-1])[0]\n\n    # and sum (\\Delta recall) * prec\n    ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])\n    return ap\n\n\ndef bbox_iou(box1, box2, x1y1x2y2=True):\n    \"\"\"\n    Returns the IoU of two bounding boxes\n    \"\"\"\n    if not x1y1x2y2:\n        # Transform from center and width to exact coordinates\n        b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2\n        b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2\n        b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2\n        b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2\n    else:\n        # Get the coordinates of bounding boxes\n        b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]\n        b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]\n\n    # get the corrdinates of the intersection rectangle\n    inter_rect_x1 = torch.max(b1_x1, b2_x1)\n    inter_rect_y1 = torch.max(b1_y1, b2_y1)\n    inter_rect_x2 = torch.min(b1_x2, b2_x2)\n    inter_rect_y2 = torch.min(b1_y2, b2_y2)\n    # Intersection area\n    inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp(\n        inter_rect_y2 - inter_rect_y1 + 1, min=0\n    )\n    # Union Area\n    b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)\n    b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)\n\n    iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)\n\n    return iou\n\n\ndef bbox_iou_numpy(box1, box2):\n    \"\"\"Computes IoU between bounding boxes.\n    Parameters\n    ----------\n    box1 : ndarray\n        (N, 4) shaped array with bboxes\n    box2 : ndarray\n        (M, 4) shaped array with bboxes\n    Returns\n    -------\n    : ndarray\n        (N, M) shaped array with IoUs\n    \"\"\"\n    area = (box2[:, 2] - box2[:, 0]) * (box2[:, 3] - box2[:, 1])\n\n    iw = np.minimum(np.expand_dims(box1[:, 2], axis=1), box2[:, 2]) - np.maximum(\n        np.expand_dims(box1[:, 0], 1), box2[:, 0]\n    )\n    ih = np.minimum(np.expand_dims(box1[:, 3], axis=1), box2[:, 3]) - np.maximum(\n        np.expand_dims(box1[:, 1], 1), box2[:, 1]\n    )\n\n    iw = np.maximum(iw, 0)\n    ih = np.maximum(ih, 0)\n\n    ua = np.expand_dims((box1[:, 2] - box1[:, 0]) * (box1[:, 3] - box1[:, 1]), axis=1) + area - iw * ih\n\n    ua = np.maximum(ua, np.finfo(float).eps)\n\n    intersection = iw * ih\n\n    return intersection / ua\n\n\ndef non_max_suppression(prediction, num_classes, conf_thres=0.5, nms_thres=0.4):\n    \"\"\"\n    Removes detections with lower object confidence score than 'conf_thres' and performs\n    Non-Maximum Suppression to further filter detections.\n    Returns detections with shape:\n        (x1, y1, x2, y2, object_conf, class_score, class_pred)\n    \"\"\"\n\n    # From (center x, center y, width, height) to (x1, y1, x2, y2)\n    box_corner = prediction.new(prediction.shape)\n    box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2\n    box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2\n    box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2\n    box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2\n    prediction[:, :, :4] = box_corner[:, :, :4]\n\n    output = [None for _ in range(len(prediction))]\n    for image_i, image_pred in enumerate(prediction):\n        # Filter out confidence scores below threshold\n        conf_mask = (image_pred[:, 4] >= conf_thres).squeeze()\n        image_pred = image_pred[conf_mask]\n        # If none are remaining => process next image\n        if not image_pred.size(0):\n            continue\n        # Get score and class with highest confidence\n        class_conf, class_pred = torch.max(image_pred[:, 5 : 5 + num_classes], 1, keepdim=True)\n        # Detections ordered as (x1, y1, x2, y2, obj_conf, class_conf, class_pred)\n        detections = torch.cat((image_pred[:, :5], class_conf.float(), class_pred.float()), 1)\n        # Iterate through all predicted classes\n        unique_labels = detections[:, -1].cpu().unique()\n        if prediction.is_cuda:\n            unique_labels = unique_labels.cuda()\n        for c in unique_labels:\n            # Get the detections with the particular class\n            detections_class = detections[detections[:, -1] == c]\n            # Sort the detections by maximum objectness confidence\n            _, conf_sort_index = torch.sort(detections_class[:, 4], descending=True)\n            detections_class = detections_class[conf_sort_index]\n            # Perform non-maximum suppression\n            max_detections = []\n            while detections_class.size(0):\n                # Get detection with highest confidence and save as max detection\n                max_detections.append(detections_class[0].unsqueeze(0))\n                # Stop if we're at the last detection\n                if len(detections_class) == 1:\n                    break\n                # Get the IOUs for all boxes with lower confidence\n                ious = bbox_iou(max_detections[-1], detections_class[1:])\n                # Remove detections with IoU >= NMS threshold\n                detections_class = detections_class[1:][ious < nms_thres]\n\n            max_detections = torch.cat(max_detections).data\n            # Add max detections to outputs\n            output[image_i] = (\n                max_detections if output[image_i] is None else torch.cat((output[image_i], max_detections))\n            )\n\n    return output\n\n\ndef build_targets(\n    pred_boxes, pred_conf, pred_cls, target, anchors, num_anchors, num_classes, grid_size, ignore_thres, img_dim\n):\n    nB = target.size(0)\n    nA = num_anchors\n    nC = num_classes\n    nG = grid_size\n    mask = torch.zeros(nB, nA, nG, nG)\n    conf_mask = torch.ones(nB, nA, nG, nG)\n    tx = torch.zeros(nB, nA, nG, nG)\n    ty = torch.zeros(nB, nA, nG, nG)\n    tw = torch.zeros(nB, nA, nG, nG)\n    th = torch.zeros(nB, nA, nG, nG)\n    tconf = torch.ByteTensor(nB, nA, nG, nG).fill_(0)\n    tcls = torch.ByteTensor(nB, nA, nG, nG, nC).fill_(0)\n\n    nGT = 0\n    nCorrect = 0\n    for b in range(nB):\n        for t in range(target.shape[1]):\n            if target[b, t].sum() == 0:\n                continue\n            nGT += 1\n            # Convert to position relative to box\n            gx = target[b, t, 1] * nG\n            gy = target[b, t, 2] * nG\n            gw = target[b, t, 3] * nG\n            gh = target[b, t, 4] * nG\n            # Get grid box indices\n            gi = int(gx)\n            gj = int(gy)\n            # Get shape of gt box\n            gt_box = torch.FloatTensor(np.array([0, 0, gw, gh])).unsqueeze(0)\n            # Get shape of anchor box\n            anchor_shapes = torch.FloatTensor(np.concatenate((np.zeros((len(anchors), 2)), np.array(anchors)), 1))\n            # Calculate iou between gt and anchor shapes\n            anch_ious = bbox_iou(gt_box, anchor_shapes)\n            # Where the overlap is larger than threshold set mask to zero (ignore)\n            conf_mask[b, anch_ious > ignore_thres, gj, gi] = 0\n            # Find the best matching anchor box\n            best_n = np.argmax(anch_ious)\n            # Get ground truth box\n            gt_box = torch.FloatTensor(np.array([gx, gy, gw, gh])).unsqueeze(0)\n            # Get the best prediction\n            pred_box = pred_boxes[b, best_n, gj, gi].unsqueeze(0)\n            # Masks\n            mask[b, best_n, gj, gi] = 1\n            conf_mask[b, best_n, gj, gi] = 1\n            # Coordinates\n            tx[b, best_n, gj, gi] = gx - gi\n            ty[b, best_n, gj, gi] = gy - gj\n            # Width and height\n            tw[b, best_n, gj, gi] = math.log(gw / anchors[best_n][0] + 1e-16)\n            th[b, best_n, gj, gi] = math.log(gh / anchors[best_n][1] + 1e-16)\n            # One-hot encoding of label\n            target_label = int(target[b, t, 0])\n            tcls[b, best_n, gj, gi, target_label] = 1\n            tconf[b, best_n, gj, gi] = 1\n\n            # Calculate iou between ground truth and best matching prediction\n            iou = bbox_iou(gt_box, pred_box, x1y1x2y2=False)\n            pred_label = torch.argmax(pred_cls[b, best_n, gj, gi])\n            score = pred_conf[b, best_n, gj, gi]\n            if iou > 0.5 and pred_label == target_label and score > 0.5:\n                nCorrect += 1\n\n    return nGT, nCorrect, mask, conf_mask, tx, ty, tw, th, tconf, tcls\n\n\ndef to_categorical(y, num_classes):\n    \"\"\" 1-hot encodes a tensor \"\"\"\n    return torch.from_numpy(np.eye(num_classes, dtype=\"uint8\")[y])\n"
  },
  {
    "path": "requirements.txt",
    "content": "cycler==0.10.0\nkiwisolver==1.1.0\nmatplotlib==3.1.2\nnumpy==1.21.0\nopencv-python==4.2.0.32\npyparsing==2.4.6\npython-dateutil==2.8.1\nsix==1.14.0\ntqdm==4.41.1\n"
  },
  {
    "path": "src/3d_detector.py",
    "content": "#!/home/sibot/anaconda2/bin/python\n\"\"\" yolo_bbox_to_sort.py\n    Subscribe to the Yolo 2 bboxes, and publish the detections with a 2d appearance feature used for reidentification\n\"\"\"\nimport time\nimport rospy\nimport ros_numpy\nimport sys\nimport numpy as np\nimport torch\nimport pdb\nimport time\nimport os\nimport cv2\nfrom std_msgs.msg import Int8\nimport message_filters\nfrom sensor_msgs.msg import PointCloud2, Image\nfrom darknet_ros_msgs.msg import BoundingBoxes, BoundingBox\nfrom featurepointnet_model_util import generate_detections_3d, convert_depth_features\nfrom featurepointnet_model import create_depth_model\nfrom calibration import OmniCalibration\nfrom visualization_msgs.msg import MarkerArray, Marker\nfrom cv_bridge import CvBridge, CvBridgeError\nfrom geometry_msgs.msg import Pose, Vector3\nfrom std_msgs.msg import ColorRGBA\nfrom jpda_rospack.msg import detection3d_with_feature_array, detection3d_with_feature\n\nclass Detector_3d:\n    def __init__(self):\n        self.node_name = \"fpointnet_detector_plus_feature\"\n        \n        rospy.init_node(self.node_name)\n        rospy.on_shutdown(self.cleanup)\n        fpointnet_config = \\\n            rospy.get_param('~fpointnet_config',\n                            '~/jr2_catkin_workspace/src/jpda_rospack/src/fpointnet_jrdb/model.ckpt')\n        calibration_folder = rospy.get_param('~calib_3d', 'src/jpda_rospack/calib/')\n        self.depth_model = create_depth_model('FPointNet', fpointnet_config)\n        self.calib = OmniCalibration(calibration_folder)\n        self.velodyne_sub_upper = \\\n            message_filters.Subscriber(\"/upper_velodyne/velodyne_points\", PointCloud2, queue_size=2)\n        self.velodyne_sub_lower = \\\n            message_filters.Subscriber(\"/lower_velodyne/velodyne_points\", PointCloud2, queue_size=2)\n        self.yolo_bbox_sub = \\\n            message_filters.Subscriber(\"/omni_yolo_bboxes\", BoundingBoxes, queue_size=2)\n        \n        self.time_sync = \\\n            message_filters.ApproximateTimeSynchronizer([self.yolo_bbox_sub,\n                                                         self.velodyne_sub_upper,\n                                                         self.velodyne_sub_lower], 5, 0.06)\n        self.time_sync.registerCallback(self.get_3d_feature)\n    \n        self.feature_3d_pub = rospy.Publisher(\"detection3d_with_feature\", detection3d_with_feature_array, queue_size=10)\n        self.pc_transform_pub = rospy.Publisher(\"/transformed_pointcloud\", PointCloud2, queue_size=10)\n        self.pc_pub = rospy.Publisher(\"/frustum\", PointCloud2, queue_size=10)\n        self.debug_pub = rospy.Publisher(\"/test\", Int8, queue_size=1)\n        self.marker_box_pub = rospy.Publisher(\"/3d_detection_markers\", MarkerArray, queue_size=10)\n        rospy.loginfo(\"3D detector ready.\")\n        \n    def get_3d_feature(self, y1_bboxes, pointcloud_upper, pointcloud_lower):\n        start = time.time()\n        #rospy.loginfo('Processing Pointcloud with FPointNet')\n        # Assumed that pointclouds have 64 bit floats!\n        pc_upper = ros_numpy.numpify(pointcloud_upper).astype({'names':['x','y','z','intensity','ring'], 'formats':['f4','f4','f4','f4','f4'], 'offsets':[0,4,8,16,20], 'itemsize':32})\n        pc_lower = ros_numpy.numpify(pointcloud_lower).astype({'names':['x','y','z','intensity','ring'], 'formats':['f4','f4','f4','f4','f4'], 'offsets':[0,4,8,16,20], 'itemsize':32})\n        pc_upper = torch.from_numpy(pc_upper.view(np.float32).reshape(pc_upper.shape + (-1,)))[:, [0,1,2,4]]\n        pc_lower = torch.from_numpy(pc_lower.view(np.float32).reshape(pc_lower.shape + (-1,)))[:, [0,1,2,4]]\n        # move onto gpu if available\n        try:\n            pc_upper = pc_upper.cuda()\n            pc_lower = pc_lower.cuda()\n        except:\n            pass\n        # translate and rotate into camera frame using calib object\n        # in message pointcloud has x pointing forward, y pointing to the left and z pointing upward\n        # need to transform this such that x is pointing to the right, y pointing downwards, z pointing forward\n        # also done inside calib\n        pc_upper = self.calib.move_lidar_to_camera_frame(pc_upper, upper=True)\n        pc_lower = self.calib.move_lidar_to_camera_frame(pc_lower, upper=False)\n        pc = torch.cat([pc_upper, pc_lower], dim = 0)\n        pc[:, 3] = 1\n        # pc = pc.cpu().numpy()\n        # self.publish_pointcloud_from_array(pc, self.pc_transform_pub, header = pointcloud_upper.header)\n        # idx = torch.randperm(pc.shape[0]).cuda()\n        # pc = pc[idx]\n        detections_2d = []\n        frame_det_ids = []\n        count = 0\n        for y1_bbox in y1_bboxes.bounding_boxes:\n            if y1_bbox.Class == 'person':\n                xmin = y1_bbox.xmin\n                xmax = y1_bbox.xmax\n                ymin = y1_bbox.ymin\n                ymax = y1_bbox.ymax\n                probability = y1_bbox.probability\n                frame_det_ids.append(count)\n                count += 1\n                detections_2d.append([xmin, ymin, xmax, ymax, probability, -1, -1])\n        features_3d = detection3d_with_feature_array()\n        features_3d.header.stamp = y1_bboxes.header.stamp\n        features_3d.header.frame_id = 'occam'\n        boxes_3d_markers = MarkerArray()\n        if not detections_2d:\n            self.marker_box_pub.publish(boxes_3d_markers)\n            self.feature_3d_pub.publish(features_3d)\n            return\n        boxes_3d, valid_3d, rot_angles, _, depth_features, frustums = \\\n            generate_detections_3d(self.depth_model, detections_2d, pc,\n                                   self.calib, (3, 480, 3760), omni=True,\n                                   peds=True)\n        depth_features = convert_depth_features(depth_features, valid_3d)\n\n        for box, feature, i in zip(boxes_3d, depth_features, frame_det_ids):\n            #frustum = frustums[i]\n            #frustum[:, [0,2]] = np.squeeze(np.matmul(\n            #                 np.array([[np.cos(rot_angles[i]), np.sin(rot_angles[i])], \n            #                 [-np.sin(rot_angles[i]), np.cos(rot_angles[i])]]), \n            #                 np.expand_dims(frustum[:, [0,2]], 2)), 2)\n            # frustum[:, 3] = np.amax(logits[i], axis = 1)\n            #self.publish_pointcloud_from_array(frustum, self.pc_pub, header = pointcloud_upper.header)\n            det_msg = detection3d_with_feature()\n            det_msg.header.frame_id = 'occam'\n            det_msg.header.stamp = features_3d.header.stamp\n            det_msg.valid = True if valid_3d[i] != -1 else False\n            det_msg.frame_det_id = i\n            if det_msg.valid:\n                det_msg.x = box[0]\n                det_msg.y = box[1]\n                det_msg.z = box[2]\n                det_msg.l = box[3]\n                det_msg.h = box[4]\n                det_msg.w = box[5]\n                det_msg.theta = box[6]\n                det_msg.feature = feature\n                features_3d.detection3d_with_features.append(det_msg)\n                pose_msg = Pose()\n                marker_msg = Marker()\n                marker_msg.header.stamp = pointcloud_lower.header.stamp\n                marker_msg.header.frame_id = 'occam'\n                marker_msg.action = 0\n                marker_msg.id = i\n                marker_msg.lifetime = rospy.Duration(0.2)\n                marker_msg.type = 1\n                marker_msg.scale = Vector3(box[3], box[4], box[5])\n                pose_msg.position.x = det_msg.x\n                pose_msg.position.y = det_msg.y - det_msg.h/2\n                pose_msg.position.z = det_msg.z\n                marker_msg.pose = pose_msg\n                marker_msg.color = ColorRGBA(g=1, a =0.5)\n                boxes_3d_markers.markers.append(marker_msg)\n            else:\n                det_msg.y = -1\n                det_msg.x = -1\n                det_msg.z = -1\n                det_msg.l = -1\n                det_msg.w = -1\n                det_msg.h = -1\n                det_msg.theta = -1\n                det_msg.feature = [-1]\n            features_3d.detection3d_with_features.append(det_msg)\n\n        \n        self.marker_box_pub.publish(boxes_3d_markers)\n        \n        self.feature_3d_pub.publish(features_3d)\n        \n        # rospy.loginfo(\"3D detector time: {}\".format(time.time() - start))\n\n    def publish_pointcloud_from_array(self, pointcloud, publisher, frame = 'occam', header = None):\n        list_pc = [tuple(j) for j in pointcloud]\n        pc_output_msg = np.array(list_pc, dtype = [('x', 'f4'),('y', 'f4'),('z', 'f4'),('intensity', 'f4')])\n        pc_msg = ros_numpy.msgify(PointCloud2, pc_output_msg)\n        if header is not None:\n            pc_msg.header.stamp = header.stamp\n        pc_msg.header.frame_id = 'occam'\n        publisher.publish(pc_msg)\n\n    def cleanup(self):\n        print(\"Shutting down 3D-Detection node.\")\n    \ndef main(args):       \n    try:\n        Detector_3d()\n        rospy.spin()\n    except KeyboardInterrupt:\n        print(\"Shutting down 3D-Detection node.\")\n\nif __name__ == '__main__':\n    main(sys.argv)\n"
  },
  {
    "path": "src/EKF.py",
    "content": "# vim: expandtab:ts=4:sw=4\nimport numpy as np\nimport scipy.linalg\nimport pdb\n\n\"\"\"\nTable for the 0.95 quantile of the chi-square distribution with N degrees of\nfreedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv\nfunction and used as Mahalanobis gating threshold.\n\"\"\"\nchi2inv95 = {\n    1: 3.8415,\n    2: 5.9915,\n    3: 7.8147,\n    4: 9.4877,\n    5: 11.070,\n    6: 12.592,\n    7: 14.067,\n    8: 15.507,\n    9: 16.919}\n\nchi2inv90 = {\n    1: 2.706,\n    2: 4.605,\n    3: 6.251,\n    4: 7.779,\n    5: 9.236,\n    6: 10.645,\n    7: 12.017,\n    8: 13.363,\n    9: 14.684}\n\nchi2inv975 = {\n    1: 5.025,\n    2: 7.378,\n    3: 9.348,\n    4: 11.143,\n    5: 12.833,\n    6: 14.449,\n    7: 16.013,\n    8: 17.535,\n    9: 19.023}\n\nchi2inv10 = {\n    1: .016,\n    2: .221,\n    3: .584,\n    4: 1.064,\n    5: 1.610,\n    6: 2.204,\n    7: 2.833,\n    8: 3.490,\n    9: 4.168}\n\n\nchi2inv995 = {\n    1: 0.0000393,\n    2: 0.0100,\n    3: .0717,\n    4: .207,\n    5: .412,\n    6: .676,\n    7: .989,\n    8: 1.344,\n    9: 1.735}\n\n\nchi2inv75 = {\n    1: 1.323,\n    2: 2.773,\n    3: 4.108,\n    4: 5.385,\n    5: 6.626,\n    6: 7.841,\n    7: 9.037,\n    8: 10.22,\n    9: 11.39}\n\ndef squared_mahalanobis_distance(mean, covariance, measurements):\n    # cholesky factorization used to solve for \n    # z = d * inv(covariance)\n    # so z is also the solution to \n    # covariance * z = d       \n    d = measurements - mean\n    # cholesky_factor = np.linalg.cholesky(covariance)\n    # z = scipy.linalg.solve_triangular(\n    #     cholesky_factor, d.T, lower=True, check_finite=False,\n    #     overwrite_b=True)\n\n    squared_maha = np.linalg.multi_dot([d, np.linalg.inv(covariance),\n                                        d.T]).diagonal()\n    return squared_maha\n\n\nclass EKF(object):\n    \"\"\"\n    Generic extended kalman filter class\n\n    \"\"\"\n\n    def __init__(self):\n        pass\n\n    def initiate(self, measurement):\n        \"\"\"Create track from unassociated measurement.\n\n        Parameters\n        ----------\n        measurement : ndarray\n\n        Returns\n        -------\n        (ndarray, ndarray)\n            Returns the mean vector and covariance matrix of the new track. \n            Unobserved velocities are initialized to 0 mean.\n\n        \"\"\"\n        pass\n\n\n    def predict_mean(self, mean):\n        # Updates predicted state from previous state (function g)\n        # Calculates motion update Jacobian (Gt)\n        # Returns (g(mean), Gt)\n        pass\n\n    def get_process_noise(self, mean, covariance):\n        # Returns Rt the motion noise covariance\n        pass\n    def predict_covariance(self, mean, covariance):\n        pass\n\n    def project_mean(self, mean):\n        # Measurement prediction from state (function h)\n        # Calculations sensor update Jacobian (Ht)\n        # Returns (h(mean), Ht)\n        pass\n    def project_cov(self, mean, covariance):\n        pass\n\n    def predict(self, mean, covariance):\n        \"\"\"Run Kalman filter prediction step.\n\n        Parameters\n        ----------\n        mean : ndarray\n            The mean vector of the object state at the previous\n            time step.\n        covariance : ndarray\n            The covariance matrix of the object state at the\n            previous time step.\n\n        Returns\n        -------\n        (ndarray, ndarray)\n            Returns the mean vector and covariance matrix of the predicted\n            state. Unobserved velocities are initialized to 0 mean.\n\n        \"\"\"\n        # Perform prediction\n        covariance = self.predict_covariance(mean, covariance) \n        mean = self.predict_mean(mean)\n\n        return mean, covariance\n    def get_innovation_cov(self, covariance):\n        pass\n\n    def project(self, mean, covariance):\n        \"\"\"Project state distribution to measurement space.\n\n        Parameters\n        ----------\n        mean : ndarray\n            The state's mean vector \n        covariance : ndarray\n            The state's covariance matrix\n\n        Returns\n        -------\n        (ndarray, ndarray)\n            Returns the projected mean and covariance matrix of the given state\n            estimate.\n\n        \"\"\"\n\n        # Measurement uncertainty scaled by estimated height\n        return self.project_mean(mean), self.project_cov(mean, covariance)\n\n    def update(self, mean, covariance, measurement_t, marginalization=None, JPDA=False):\n        \"\"\"Run Kalman filter correction step.\n\n        Parameters\n        ----------\n        mean : ndarray\n            The predicted state's mean vector (8 dimensional).\n        covariance : ndarray\n            The state's covariance matrix (8x8 dimensional).\n        measurement : ndarray\n            The 4 dimensional measurement vector (x, y, a, h), where (x, y)\n            is the center position, a the aspect ratio, and h the height of the\n            bounding box.\n\n        Returns\n        -------\n        (ndarray, ndarray)\n            Returns the measurement-corrected state distribution.\n\n        \"\"\"\n        predicted_measurement, innovation_cov  = self.project(mean, covariance)\n        # cholesky factorization used to solve for kalman gain since\n        # K = covariance * update_mat.T * inv(innovation_cov)\n        # so K is also the solution to \n        # innovation_cov * K = covariance * update_mat.T\n        try:\n            chol_factor, lower = scipy.linalg.cho_factor(\n                innovation_cov, lower=True, check_finite=False)\n            kalman_gain = scipy.linalg.cho_solve(\n                (chol_factor, lower), np.dot(covariance, self._observation_mat.T).T,\n                check_finite=False).T\n        except:\n            # in case cholesky factorization fails, revert to standard solver\n            kalman_gain = np.linalg.solve(innovation_cov, np.dot(covariance, self._observation_mat.T).T).T\n\n        if JPDA:\n            # marginalization\n            innovation = np.zeros((self.ndim)) \n            cov_soft = np.zeros((self.ndim, self.ndim))\n\n            for measurement_idx, measurement in enumerate(measurement_t):\n\n                p_ij = marginalization[measurement_idx + 1] # + 1 for dummy\n                y_ij = measurement - predicted_measurement\n                innovation += y_ij * p_ij\n                cov_soft += p_ij * np.outer(y_ij, y_ij)\n\n            cov_soft = cov_soft - np.outer(innovation, innovation)\n\n            P_star = covariance - np.linalg.multi_dot((\n                kalman_gain, innovation_cov, kalman_gain.T))\n\n            p_0 = marginalization[0]\n            P_0 = p_0 * covariance + (1 - p_0) * P_star\n\n            new_covariance = P_0 + np.linalg.multi_dot((kalman_gain, cov_soft, kalman_gain.T))\n            \n        else:\n            innovation = measurement_t - predicted_measurement\n\n            new_covariance = covariance - np.linalg.multi_dot((\n                kalman_gain, innovation_cov, kalman_gain.T))\n\n        new_mean = mean + np.dot(innovation, kalman_gain.T)\n        return new_mean, new_covariance\n"
  },
  {
    "path": "src/JPDA_matching.py",
    "content": "# vim: expandtab:ts=4:sw=4\nfrom __future__ import absolute_import\nimport numpy as np\nfrom linear_assignment import min_marg_matching\nimport pdb\n\n\ndef get_unmatched(all_idx, matches, i, marginalization=None):\n    assigned = [match[i] for match in matches]\n    unmatched = set(all_idx) - set(assigned)\n    if marginalization is not None:\n        # from 1 for dummy node\n        in_gate_dets = np.nonzero(np.sum(\n            marginalization[:, 1:], axis=0))[0].tolist()\n        unmatched = [d for d in unmatched if d not in in_gate_dets]\n    return list(unmatched)\n\n\nclass Matcher:\n\n    def __init__(self, detections, marginalizations, confirmed_tracks,\n                 matching_strategy,\n                 assignment_threshold=None):\n        self.detections = detections\n        self.marginalizations = marginalizations\n        self.confirmed_tracks = confirmed_tracks\n        self.assignment_threshold = assignment_threshold\n        self.detection_indices = np.arange(len(detections))\n        self.matching_strategy = matching_strategy\n\n    def match(self):\n        self.get_matches()\n        self.get_unmatched_tracks()\n        self.get_unmatched_detections()\n        return self.matches, self.unmatched_tracks, self.unmatched_detections\n\n    def get_matches(self):\n\n        if self.matching_strategy == \"max_and_threshold\":\n            self.max_and_threshold_matching()\n        elif self.matching_strategy == \"hungarian\":\n            self.hungarian()\n        elif self.matching_strategy == \"max_match\":\n            self.max_match()\n        elif self.matching_strategy == \"none\":\n            self.matches = []\n        else: \n            raise Exception('Unrecognized matching strategy: {}'.\n                            format(self.matching_strategy))\n\n    def get_unmatched_tracks(self):\n        self.unmatched_tracks = get_unmatched(self.confirmed_tracks,\n                                              self.matches, 0)\n\n    def get_unmatched_detections(self):\n        self.unmatched_detections = get_unmatched(self.detection_indices, self.matches, 1, self.marginalizations)\n\n    def max_match(self):\n        self.matches = []\n        if self.marginalizations.shape[0] == 0:\n            return\n\n        detection_map = {}\n        for i, track_idx in enumerate(self.confirmed_tracks):\n            marginalization = self.marginalizations[i,:]\n            detection_id = np.argmax(marginalization) - 1  # subtract one for dummy\n\n            if detection_id < 0:\n                continue\n\n            if detection_id not in detection_map.keys():\n                detection_map[detection_id] = track_idx\n            else:\n                cur_track = detection_map[detection_id]\n                track_update = track_idx if self.marginalizations[track_idx, detection_id] > self.marginalizations[cur_track, detection_id] else cur_track\n                detection_map[detection_id] = track_update\n            threshold_p = marginalization[detection_id + 1]\n            if threshold_p < self.assignment_threshold:\n                continue\n\n        for detection in detection_map.keys():\n            self.matches.append((detection_map[detection], detection))\n\n    def max_and_threshold_matching(self):\n\n        self.matches = []\n        if self.marginalizations.shape[0] == 0:\n            return\n\n        for i, track_idx in enumerate(self.confirmed_tracks):\n            marginalization = self.marginalizations[i,:]\n            detection_id = np.argmax(marginalization) - 1  # subtract one for dummy\n\n            if detection_id < 0:\n                continue\n\n            threshold_p = marginalization[detection_id + 1]\n            if threshold_p < self.assignment_threshold:\n                continue\n\n            self.matches.append((track_idx, detection_id))\n\n    def hungarian(self):\n        self.matches, _, _ = min_marg_matching(self.marginalizations,\n                                               self.confirmed_tracks,\n                                               self.assignment_threshold)\n                               "
  },
  {
    "path": "src/__init__.py",
    "content": ""
  },
  {
    "path": "src/aligned_reid_model.py",
    "content": "import torch\nimport torch.nn as nn\nimport torch.nn.init as init\nimport torch.nn.functional as F\nimport torch.utils.model_zoo as model_zoo\nimport os\nimport math\n\n\nclass Model(nn.Module):\n  def __init__(self, local_conv_out_channels=128, num_classes=None):\n    super(Model, self).__init__()\n    self.base = resnet50(pretrained=True)\n    planes = 2048\n    self.local_conv = nn.Conv2d(planes, local_conv_out_channels, 1)\n    self.local_bn = nn.BatchNorm2d(local_conv_out_channels)\n    self.local_relu = nn.ReLU(inplace=True)\n\n    if num_classes is not None:\n      self.fc = nn.Linear(planes, num_classes)\n      init.normal(self.fc.weight, std=0.001)\n      init.constant(self.fc.bias, 0)\n\n  def forward(self, x):\n    \"\"\"\n    Returns:\n      global_feat: shape [N, C]\n      local_feat: shape [N, H, c]\n    \"\"\"\n    # shape [N, C, H, W]\n    feat = self.base(x)\n    global_feat = F.avg_pool2d(feat, feat.size()[2:])\n    # shape [N, C]\n    # global_feat = global_feat.view(global_feat.size(0), -1)\n    # shape [N, C, H, 1]\n    # local_feat = torch.mean(feat, -1, keepdim=True)\n    # local_feat = self.local_relu(self.local_bn(self.local_conv(local_feat)))\n    # # shape [N, H, c]\n    # local_feat = local_feat.squeeze(-1).permute(0, 2, 1)\n    return global_feat\n\n\n__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',\n           'resnet152']\n\nmodel_urls = {\n  'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',\n  'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',\n  'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',\n  'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',\n  'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',\n}\nos.environ[\"TORCH_HOME\"] = \"./ResNet_Model\"\n\ndef conv3x3(in_planes, out_planes, stride=1):\n  \"\"\"3x3 convolution with padding\"\"\"\n  return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,\n                   padding=1, bias=False)\n\n\nclass BasicBlock(nn.Module):\n  expansion = 1\n\n  def __init__(self, inplanes, planes, stride=1, downsample=None):\n    super(BasicBlock, self).__init__()\n    self.conv1 = conv3x3(inplanes, planes, stride)\n    self.bn1 = nn.BatchNorm2d(planes)\n    self.relu = nn.ReLU(inplace=True)\n    self.conv2 = conv3x3(planes, planes)\n    self.bn2 = nn.BatchNorm2d(planes)\n    self.downsample = downsample\n    self.stride = stride\n\n  def forward(self, x):\n    residual = x\n\n    out = self.conv1(x)\n    out = self.bn1(out)\n    out = self.relu(out)\n\n    out = self.conv2(out)\n    out = self.bn2(out)\n\n    if self.downsample is not None:\n      residual = self.downsample(x)\n\n    out += residual\n    out = self.relu(out)\n\n    return out\n\n\nclass Bottleneck(nn.Module):\n  expansion = 4\n\n  def __init__(self, inplanes, planes, stride=1, downsample=None):\n    super(Bottleneck, self).__init__()\n    self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)\n    self.bn1 = nn.BatchNorm2d(planes)\n    self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,\n                           padding=1, bias=False)\n    self.bn2 = nn.BatchNorm2d(planes)\n    self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)\n    self.bn3 = nn.BatchNorm2d(planes * 4)\n    self.relu = nn.ReLU(inplace=True)\n    self.downsample = downsample\n    self.stride = stride\n\n  def forward(self, x):\n    residual = x\n\n    out = self.conv1(x)\n    out = self.bn1(out)\n    out = self.relu(out)\n\n    out = self.conv2(out)\n    out = self.bn2(out)\n    out = self.relu(out)\n\n    out = self.conv3(out)\n    out = self.bn3(out)\n\n    if self.downsample is not None:\n      residual = self.downsample(x)\n\n    out += residual\n    out = self.relu(out)\n\n    return out\n\n\nclass ResNet(nn.Module):\n\n  def __init__(self, block, layers):\n    self.inplanes = 64\n    super(ResNet, self).__init__()\n    self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,\n                           bias=False)\n    self.bn1 = nn.BatchNorm2d(64)\n    self.relu = nn.ReLU(inplace=True)\n    self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)\n    self.layer1 = self._make_layer(block, 64, layers[0])\n    self.layer2 = self._make_layer(block, 128, layers[1], stride=2)\n    self.layer3 = self._make_layer(block, 256, layers[2], stride=2)\n    self.layer4 = self._make_layer(block, 512, layers[3], stride=2)\n\n    for m in self.modules():\n      if isinstance(m, nn.Conv2d):\n        n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels\n        m.weight.data.normal_(0, math.sqrt(2. / n))\n      elif isinstance(m, nn.BatchNorm2d):\n        m.weight.data.fill_(1)\n        m.bias.data.zero_()\n\n  def _make_layer(self, block, planes, blocks, stride=1):\n    downsample = None\n    if stride != 1 or self.inplanes != planes * block.expansion:\n      downsample = nn.Sequential(\n        nn.Conv2d(self.inplanes, planes * block.expansion,\n                  kernel_size=1, stride=stride, bias=False),\n        nn.BatchNorm2d(planes * block.expansion),\n      )\n\n    layers = []\n    layers.append(block(self.inplanes, planes, stride, downsample))\n    self.inplanes = planes * block.expansion\n    for i in range(1, blocks):\n      layers.append(block(self.inplanes, planes))\n\n    return nn.Sequential(*layers)\n\n  def forward(self, x):\n    x = self.conv1(x)\n    x = self.bn1(x)\n    x = self.relu(x)\n    x = self.maxpool(x)\n\n    x = self.layer1(x)\n    x = self.layer2(x)\n    x = self.layer3(x)\n    x = self.layer4(x)\n\n    return x\n\n\ndef remove_fc(state_dict):\n  \"\"\"Remove the fc layer parameters from state_dict.\"\"\"\n  new_state_dict = state_dict.copy()\n  for key, value in state_dict.items():\n    if key.startswith('fc.'):\n      del new_state_dict[key]\n  return new_state_dict\n\n\ndef resnet18(pretrained=False):\n  \"\"\"Constructs a ResNet-18 model.\n\n  Args:\n      pretrained (bool): If True, returns a model pre-trained on ImageNet\n  \"\"\"\n  model = ResNet(BasicBlock, [2, 2, 2, 2])\n  if pretrained:\n    model.load_state_dict(remove_fc(model_zoo.load_url(model_urls['resnet18'])))\n  return model\n\n\ndef resnet34(pretrained=False):\n  \"\"\"Constructs a ResNet-34 model.\n\n  Args:\n      pretrained (bool): If True, returns a model pre-trained on ImageNet\n  \"\"\"\n  model = ResNet(BasicBlock, [3, 4, 6, 3])\n  if pretrained:\n    model.load_state_dict(remove_fc(model_zoo.load_url(model_urls['resnet34'])))\n  return model\n\n\ndef resnet50(pretrained=False):\n  \"\"\"Constructs a ResNet-50 model.\n\n  Args:\n      pretrained (bool): If True, returns a model pre-trained on ImageNet\n  \"\"\"\n  model = ResNet(Bottleneck, [3, 4, 6, 3])\n  if pretrained:\n    model.load_state_dict(remove_fc(model_zoo.load_url(model_urls['resnet50'], model_dir=\"./ResNet_Model\")))### ADDED MODEL_DIR\n  return model\n\n\ndef resnet101(pretrained=False):\n  \"\"\"Constructs a ResNet-101 model.\n\n  Args:\n      pretrained (bool): If True, returns a model pre-trained on ImageNet\n  \"\"\"\n  model = ResNet(Bottleneck, [3, 4, 23, 3])\n  if pretrained:\n    model.load_state_dict(\n      remove_fc(model_zoo.load_url(model_urls['resnet101'])))\n  return model\n\n\ndef resnet152(pretrained=False):\n  \"\"\"Constructs a ResNet-152 model.\n\n  Args:\n      pretrained (bool): If True, returns a model pre-trained on ImageNet\n  \"\"\"\n  model = ResNet(Bottleneck, [3, 8, 36, 3])\n  if pretrained:\n    model.load_state_dict(\n      remove_fc(model_zoo.load_url(model_urls['resnet152'])))\n  return model\n"
  },
  {
    "path": "src/aligned_reid_utils.py",
    "content": "from __future__ import print_function\nimport os\nimport os.path as osp\nimport pickle\nfrom scipy import io\nimport datetime\nimport time\nfrom contextlib import contextmanager\nimport numpy as np\nfrom PIL import Image\nimport torch\nfrom torch.autograd import Variable\nfrom aligned_reid_model import Model as aligned_reid_model\n# from models.deep_sort_model import ImageEncoder as deep_sort_model\n\ndef time_str(fmt=None):\n  if fmt is None:\n    fmt = '%Y-%m-%d_%H:%M:%S'\n  return datetime.datetime.today().strftime(fmt)\n\n\ndef load_pickle(path):\n  \"\"\"Check and load pickle object.\n  According to this post: https://stackoverflow.com/a/41733927, cPickle and \n  disabling garbage collector helps with loading speed.\"\"\"\n  assert osp.exists(path)\n  # gc.disable()\n  with open(path, 'rb') as f:\n    ret = pickle.load(f)\n  # gc.enable()\n  return ret\n\n\ndef save_pickle(obj, path):\n  \"\"\"Create dir and save file.\"\"\"\n  may_make_dir(osp.dirname(osp.abspath(path)))\n  with open(path, 'wb') as f:\n    pickle.dump(obj, f, protocol=2)\n\n\ndef save_mat(ndarray, path):\n  \"\"\"Save a numpy ndarray as .mat file.\"\"\"\n  io.savemat(path, dict(ndarray=ndarray))\n\n\ndef to_scalar(vt):\n  \"\"\"Transform a length-1 pytorch Variable or Tensor to scalar. \n  Suppose tx is a torch Tensor with shape tx.size() = torch.Size([1]), \n  then npx = tx.cpu().numpy() has shape (1,), not 1.\"\"\"\n  if isinstance(vt, Variable):\n    return vt.data.cpu().numpy().flatten()[0]\n  if torch.is_tensor(vt):\n    return vt.cpu().numpy().flatten()[0]\n  raise TypeError('Input should be a variable or tensor')\n\n\ndef transfer_optim_state(state, device_id=-1):\n  \"\"\"Transfer an optimizer.state to cpu or specified gpu, which means \n  transferring tensors of the optimizer.state to specified device. \n  The modification is in place for the state.\n  Args:\n    state: An torch.optim.Optimizer.state\n    device_id: gpu id, or -1 which means transferring to cpu\n  \"\"\"\n  for key, val in state.items():\n    if isinstance(val, dict):\n      transfer_optim_state(val, device_id=device_id)\n    elif isinstance(val, Variable):\n      raise RuntimeError(\"Oops, state[{}] is a Variable!\".format(key))\n    elif isinstance(val, torch.nn.Parameter):\n      raise RuntimeError(\"Oops, state[{}] is a Parameter!\".format(key))\n    else:\n      try:\n        if device_id == -1:\n          state[key] = val.cpu()\n        else:\n          state[key] = val.cuda(device=device_id)\n      except:\n        pass\n\n\ndef may_transfer_optims(optims, device_id=-1):\n  \"\"\"Transfer optimizers to cpu or specified gpu, which means transferring \n  tensors of the optimizer to specified device. The modification is in place \n  for the optimizers.\n  Args:\n    optims: A list, which members are either torch.nn.optimizer or None.\n    device_id: gpu id, or -1 which means transferring to cpu\n  \"\"\"\n  for optim in optims:\n    if isinstance(optim, torch.optim.Optimizer):\n      transfer_optim_state(optim.state, device_id=device_id)\n\n\ndef may_transfer_modules_optims(modules_and_or_optims, device_id=-1):\n  \"\"\"Transfer optimizers/modules to cpu or specified gpu.\n  Args:\n    modules_and_or_optims: A list, which members are either torch.nn.optimizer \n      or torch.nn.Module or None.\n    device_id: gpu id, or -1 which means transferring to cpu\n  \"\"\"\n  for item in modules_and_or_optims:\n    if isinstance(item, torch.optim.Optimizer):\n      transfer_optim_state(item.state, device_id=device_id)\n    elif isinstance(item, torch.nn.Module):\n      if device_id == -1:\n        item.cpu()\n      else:\n        item.cuda(device=device_id)\n    elif item is not None:\n      print('[Warning] Invalid type {}'.format(item.__class__.__name__))\n\n\nclass TransferVarTensor(object):\n  \"\"\"Return a copy of the input Variable or Tensor on specified device.\"\"\"\n\n  def __init__(self, device_id=-1):\n    self.device_id = device_id\n\n  def __call__(self, var_or_tensor):\n    return var_or_tensor.cpu() if self.device_id == -1 \\\n      else var_or_tensor.cuda(self.device_id)\n\n\nclass TransferModulesOptims(object):\n  \"\"\"Transfer optimizers/modules to cpu or specified gpu.\"\"\"\n\n  def __init__(self, device_id=-1):\n    self.device_id = device_id\n\n  def __call__(self, modules_and_or_optims):\n    may_transfer_modules_optims(modules_and_or_optims, self.device_id)\n\n\ndef set_devices(sys_device_ids):\n  \"\"\"\n  It sets some GPUs to be visible and returns some wrappers to transferring \n  Variables/Tensors and Modules/Optimizers.\n  Args:\n    sys_device_ids: a tuple; which GPUs to use\n      e.g.  sys_device_ids = (), only use cpu\n            sys_device_ids = (3,), use the 4th gpu\n            sys_device_ids = (0, 1, 2, 3,), use first 4 gpus\n            sys_device_ids = (0, 2, 4,), use the 1st, 3rd and 5th gpus\n  Returns:\n    TVT: a `TransferVarTensor` callable\n    TMO: a `TransferModulesOptims` callable\n  \"\"\"\n  # Set the CUDA_VISIBLE_DEVICES environment variable\n  import os\n  visible_devices = ''\n  for i in sys_device_ids:\n    visible_devices += '{}, '.format(i)\n  os.environ['CUDA_VISIBLE_DEVICES'] = visible_devices\n  # Return wrappers.\n  # Models and user defined Variables/Tensors would be transferred to the\n  # first device.\n  device_id = 0 if len(sys_device_ids) > 0 else -1\n  TVT = TransferVarTensor(device_id)\n  TMO = TransferModulesOptims(device_id)\n  return TVT, TMO\n\n\ndef set_devices_for_ml(sys_device_ids):\n  \"\"\"This version is for mutual learning.\n  \n  It sets some GPUs to be visible and returns some wrappers to transferring \n  Variables/Tensors and Modules/Optimizers.\n  \n  Args:\n    sys_device_ids: a tuple of tuples; which devices to use for each model, \n      len(sys_device_ids) should be equal to number of models. Examples:\n        \n      sys_device_ids = ((-1,), (-1,))\n        the two models both on CPU\n      sys_device_ids = ((-1,), (2,))\n        the 1st model on CPU, the 2nd model on GPU 2\n      sys_device_ids = ((3,),)\n        the only one model on the 4th gpu \n      sys_device_ids = ((0, 1), (2, 3))\n        the 1st model on GPU 0 and 1, the 2nd model on GPU 2 and 3\n      sys_device_ids = ((0,), (0,))\n        the two models both on GPU 0\n      sys_device_ids = ((0,), (0,), (1,), (1,))\n        the 1st and 2nd model on GPU 0, the 3rd and 4th model on GPU 1\n  \n  Returns:\n    TVTs: a list of `TransferVarTensor` callables, one for one model.\n    TMOs: a list of `TransferModulesOptims` callables, one for one model.\n    relative_device_ids: a list of lists; `sys_device_ids` transformed to \n      relative ids; to be used in `DataParallel`\n  \"\"\"\n  import os\n\n  all_ids = []\n  for ids in sys_device_ids:\n    all_ids += ids\n  unique_sys_device_ids = list(set(all_ids))\n  unique_sys_device_ids.sort()\n  if -1 in unique_sys_device_ids:\n    unique_sys_device_ids.remove(-1)\n\n  # Set the CUDA_VISIBLE_DEVICES environment variable\n\n  visible_devices = ''\n  for i in unique_sys_device_ids:\n    visible_devices += '{}, '.format(i)\n  os.environ['CUDA_VISIBLE_DEVICES'] = visible_devices\n\n  # Return wrappers\n\n  relative_device_ids = []\n  TVTs, TMOs = [], []\n  for ids in sys_device_ids:\n    relative_ids = []\n    for id in ids:\n      if id != -1:\n        id = find_index(unique_sys_device_ids, id)\n      relative_ids.append(id)\n    relative_device_ids.append(relative_ids)\n\n    # Models and user defined Variables/Tensors would be transferred to the\n    # first device.\n    TVTs.append(TransferVarTensor(relative_ids[0]))\n    TMOs.append(TransferModulesOptims(relative_ids[0]))\n  return TVTs, TMOs, relative_device_ids\n\n\ndef load_ckpt(modules_optims, ckpt_file, load_to_cpu=True, verbose=True):\n  \"\"\"Load state_dict's of modules/optimizers from file.\n  Args:\n    modules_optims: A list, which members are either torch.nn.optimizer \n      or torch.nn.Module.\n    ckpt_file: The file path.\n    load_to_cpu: Boolean. Whether to transform tensors in modules/optimizers \n      to cpu type.\n  \"\"\"\n  map_location = (lambda storage, loc: storage) if load_to_cpu else None\n  ckpt = torch.load(ckpt_file, map_location=map_location)\n  for m, sd in zip(modules_optims, ckpt['state_dicts']):\n    if 'fc.weight' in sd:\n      del sd['fc.weight']\n    if 'fc.bias' in sd:  \n      del sd['fc.bias']\n    load_state_dict(m, sd)\n  if verbose:\n    print('Resume from ckpt {}, \\nepoch {}, \\nscores {}'.format(\n      ckpt_file, ckpt['ep'], ckpt['scores']))\n  return ckpt['ep'], ckpt['scores']\n\n\ndef save_ckpt(modules_optims, ep, scores, ckpt_file):\n  \"\"\"Save state_dict's of modules/optimizers to file. \n  Args:\n    modules_optims: A list, which members are either torch.nn.optimizer \n      or torch.nn.Module.\n    ep: the current epoch number\n    scores: the performance of current model\n    ckpt_file: The file path.\n  Note:\n    torch.save() reserves device type and id of tensors to save, so when \n    loading ckpt, you have to inform torch.load() to load these tensors to \n    cpu or your desired gpu, if you change devices.\n  \"\"\"\n  state_dicts = [m.state_dict() for m in modules_optims]\n  ckpt = dict(state_dicts=state_dicts,\n              ep=ep,\n              scores=scores)\n  may_make_dir(osp.dirname(osp.abspath(ckpt_file)))\n  torch.save(ckpt, ckpt_file)\n\n\ndef load_state_dict(model, src_state_dict):\n  \"\"\"Copy parameters and buffers from `src_state_dict` into `model` and its \n  descendants. The `src_state_dict.keys()` NEED NOT exactly match \n  `model.state_dict().keys()`. For dict key mismatch, just\n  skip it; for copying error, just output warnings and proceed.\n\n  Arguments:\n    model: A torch.nn.Module object. \n    src_state_dict (dict): A dict containing parameters and persistent buffers.\n  Note:\n    This is modified from torch.nn.modules.module.load_state_dict(), to make\n    the warnings and errors more detailed.\n  \"\"\"\n  from torch.nn import Parameter\n  dest_state_dict = model.state_dict()\n  for name, param in src_state_dict.items():\n    ### CHANGED HERE FOR FINE TUNING\n    if name not in dest_state_dict:\n      continue\n    if isinstance(param, Parameter):\n      # backwards compatibility for serialized parameters\n      param = param.data\n    try:\n      dest_state_dict[name].copy_(param)\n    except Exception as e:\n      print(\"Warning: Error occurs when copying '{}': {}\"\n            .format(name, str(e)))\n\n  # src_missing = set(dest_state_dict.keys()) - set(src_state_dict.keys())\n  # if len(src_missing) > 0:\n  #   print(\"Keys not found in source state_dict: \")\n  #   for n in src_missing:\n  #     print('\\t', n)\n\n  # dest_missing = set(src_state_dict.keys()) - set(dest_state_dict.keys())\n  # if len(dest_missing) > 0:\n  #   print(\"Keys not found in destination state_dict: \")\n  #   for n in dest_missing:\n  #     print('\\t', n)\n\n\ndef is_iterable(obj):\n  return hasattr(obj, '__len__')\n\n\ndef may_set_mode(maybe_modules, mode):\n  \"\"\"maybe_modules: an object or a list of objects.\"\"\"\n  assert mode in ['train', 'eval']\n  if not is_iterable(maybe_modules):\n    maybe_modules = [maybe_modules]\n  for m in maybe_modules:\n    if isinstance(m, torch.nn.Module):\n      if mode == 'train':\n        m.train()\n      else:\n        m.eval()\n\n\ndef may_make_dir(path):\n  \"\"\"\n  Args:\n    path: a dir, or result of `osp.dirname(osp.abspath(file_path))`\n  Note:\n    `osp.exists('')` returns `False`, while `osp.exists('.')` returns `True`!\n  \"\"\"\n  # This clause has mistakes:\n  # if path is None or '':\n\n  if path in [None, '']:\n    return\n  if not osp.exists(path):\n    os.makedirs(path)\n\n\nclass AverageMeter(object):\n  \"\"\"Modified from Tong Xiao's open-reid. \n  Computes and stores the average and current value\"\"\"\n\n  def __init__(self):\n    self.val = 0\n    self.avg = 0\n    self.sum = 0\n    self.count = 0\n\n  def reset(self):\n    self.val = 0\n    self.avg = 0\n    self.sum = 0\n    self.count = 0\n\n  def update(self, val, n=1):\n    self.val = val\n    self.sum += val * n\n    self.count += n\n    self.avg = float(self.sum) / (self.count + 1e-20)\n\n\nclass RunningAverageMeter(object):\n  \"\"\"Computes and stores the running average and current value\"\"\"\n\n  def __init__(self, hist=0.99):\n    self.val = None\n    self.avg = None\n    self.hist = hist\n\n  def reset(self):\n    self.val = None\n    self.avg = None\n\n  def update(self, val):\n    if self.avg is None:\n      self.avg = val\n    else:\n      self.avg = self.avg * self.hist + val * (1 - self.hist)\n    self.val = val\n\n\nclass RecentAverageMeter(object):\n  \"\"\"Stores and computes the average of recent values.\"\"\"\n\n  def __init__(self, hist_size=100):\n    self.hist_size = hist_size\n    self.fifo = []\n    self.val = 0\n\n  def reset(self):\n    self.fifo = []\n    self.val = 0\n\n  def update(self, val):\n    self.val = val\n    self.fifo.append(val)\n    if len(self.fifo) > self.hist_size:\n      del self.fifo[0]\n\n  @property\n  def avg(self):\n    assert len(self.fifo) > 0\n    return float(sum(self.fifo)) / len(self.fifo)\n\n\ndef get_model_wrapper(model, multi_gpu):\n  from torch.nn.parallel import DataParallel\n  if multi_gpu:\n    return DataParallel(model)\n  else:\n    return model\n\n\nclass ReDirectSTD(object):\n  \"\"\"Modified from Tong Xiao's `Logger` in open-reid.\n  This class overwrites sys.stdout or sys.stderr, so that console logs can\n  also be written to file.\n  Args:\n    fpath: file path\n    console: one of ['stdout', 'stderr']\n    immediately_visible: If `False`, the file is opened only once and closed\n      after exiting. In this case, the message written to file may not be\n      immediately visible (Because the file handle is occupied by the\n      program?). If `True`, each writing operation of the console will\n      open, write to, and close the file. If your program has tons of writing\n      operations, the cost of opening and closing file may be obvious. (?)\n  Usage example:\n    `ReDirectSTD('stdout.txt', 'stdout', False)`\n    `ReDirectSTD('stderr.txt', 'stderr', False)`\n  NOTE: File will be deleted if already existing. Log dir and file is created\n    lazily -- if no message is written, the dir and file will not be created.\n  \"\"\"\n\n  def __init__(self, fpath=None, console='stdout', immediately_visible=False):\n    import sys\n    import os\n    import os.path as osp\n\n    assert console in ['stdout', 'stderr']\n    self.console = sys.stdout if console == 'stdout' else sys.stderr\n    self.file = fpath\n    self.f = None\n    self.immediately_visible = immediately_visible\n    if fpath is not None:\n      # Remove existing log file.\n      if osp.exists(fpath):\n        os.remove(fpath)\n\n    # Overwrite\n    if console == 'stdout':\n      sys.stdout = self\n    else:\n      sys.stderr = self\n\n  def __del__(self):\n    self.close()\n\n  def __enter__(self):\n    pass\n\n  def __exit__(self, *args):\n    self.close()\n\n  def write(self, msg):\n    self.console.write(msg)\n    if self.file is not None:\n      may_make_dir(os.path.dirname(osp.abspath(self.file)))\n      if self.immediately_visible:\n        with open(self.file, 'a') as f:\n          f.write(msg)\n      else:\n        if self.f is None:\n          self.f = open(self.file, 'w')\n        self.f.write(msg)\n\n  def flush(self):\n    self.console.flush()\n    if self.f is not None:\n      self.f.flush()\n      import os\n      os.fsync(self.f.fileno())\n\n  def close(self):\n    self.console.close()\n    if self.f is not None:\n      self.f.close()\n\n\ndef set_seed(seed):\n  import random\n  random.seed(seed)\n  print('setting random-seed to {}'.format(seed))\n\n  import numpy as np\n  np.random.seed(seed)\n  print('setting np-random-seed to {}'.format(seed))\n\n  import torch\n  torch.backends.cudnn.enabled = False\n  print('cudnn.enabled set to {}'.format(torch.backends.cudnn.enabled))\n  # set seed for CPU\n  torch.manual_seed(seed)\n  print('setting torch-seed to {}'.format(seed))\n\n\ndef print_array(array, fmt='{:.2f}', end=' '):\n  \"\"\"Print a 1-D tuple, list, or numpy array containing digits.\"\"\"\n  s = ''\n  for x in array:\n    s += fmt.format(float(x)) + end\n  s += '\\n'\n  print(s)\n  return s\n\n\n# Great idea from https://github.com/amdegroot/ssd.pytorch\ndef str2bool(v):\n  return v.lower() in (\"yes\", \"true\", \"t\", \"1\")\n\n\ndef tight_float_str(x, fmt='{:.4f}'):\n  return fmt.format(x).rstrip('0').rstrip('.')\n\n\ndef find_index(seq, item):\n  for i, x in enumerate(seq):\n    if item == x:\n      return i\n  return -1\n\n\ndef adjust_lr_exp(optimizer, base_lr, ep, total_ep, start_decay_at_ep):\n  \"\"\"Decay exponentially in the later phase of training. All parameters in the \n  optimizer share the same learning rate.\n  \n  Args:\n    optimizer: a pytorch `Optimizer` object\n    base_lr: starting learning rate\n    ep: current epoch, ep >= 1\n    total_ep: total number of epochs to train\n    start_decay_at_ep: start decaying at the BEGINNING of this epoch\n  \n  Example:\n    base_lr = 2e-4\n    total_ep = 300\n    start_decay_at_ep = 201\n    It means the learning rate starts at 2e-4 and begins decaying after 200 \n    epochs. And training stops after 300 epochs.\n  \n  NOTE: \n    It is meant to be called at the BEGINNING of an epoch.\n  \"\"\"\n  assert ep >= 1, \"Current epoch number should be >= 1\"\n\n  if ep < start_decay_at_ep:\n    return\n\n  for g in optimizer.param_groups:\n    g['lr'] = (base_lr * (0.001 ** (float(ep + 1 - start_decay_at_ep)\n                                    / (total_ep + 1 - start_decay_at_ep))))\n  print('=====> lr adjusted to {:.10f}'.format(g['lr']).rstrip('0'))\n\n\ndef adjust_lr_staircase(optimizer, base_lr, ep, decay_at_epochs, factor):\n  \"\"\"Multiplied by a factor at the BEGINNING of specified epochs. All \n  parameters in the optimizer share the same learning rate.\n  \n  Args:\n    optimizer: a pytorch `Optimizer` object\n    base_lr: starting learning rate\n    ep: current epoch, ep >= 1\n    decay_at_epochs: a list or tuple; learning rate is multiplied by a factor \n      at the BEGINNING of these epochs\n    factor: a number in range (0, 1)\n  \n  Example:\n    base_lr = 1e-3\n    decay_at_epochs = [51, 101]\n    factor = 0.1\n    It means the learning rate starts at 1e-3 and is multiplied by 0.1 at the \n    BEGINNING of the 51'st epoch, and then further multiplied by 0.1 at the \n    BEGINNING of the 101'st epoch, then stays unchanged till the end of \n    training.\n  \n  NOTE: \n    It is meant to be called at the BEGINNING of an epoch.\n  \"\"\"\n  assert ep >= 1, \"Current epoch number should be >= 1\"\n\n  if ep not in decay_at_epochs:\n    return\n\n  ind = find_index(decay_at_epochs, ep)\n  for g in optimizer.param_groups:\n    g['lr'] = base_lr * factor ** (ind + 1)\n  print('=====> lr adjusted to {:.10f}'.format(g['lr']).rstrip('0'))\n\n\n@contextmanager\ndef measure_time(enter_msg):\n  st = time.time()\n  print(enter_msg)\n  yield\n  print('Done, {:.2f}s'.format(time.time() - st))\n\n# @profile\ndef generate_features(appearance_model, patches):\n    features = []\n    for patch in patches:\n        patch = patch.unsqueeze(0)\n        with torch.no_grad():\n            feature = appearance_model(patch)\n            feature = feature.squeeze(0).cpu().numpy()\n        features.append(feature)\n    return features\n\n# @profile\ndef generate_features_batched(appearance_model, patches, object_ids = None):\n    \n    # return generate_features(appearance_model, patches)   #TODO: Fix batched appearance features. This currently gives bad features\n  maxx = -1\n  maxy = -1\n  idxs = []\n  features = []\n  for i, patch in enumerate(patches):\n      if patch is None or patch.nelement()==0:\n          continue\n      maxx = max(maxx, patch.size()[1])\n      maxy = max(maxy, patch.size()[2])\n      idxs.append(i)\n\n  if(maxx==-1 and maxy==-1):\n      return features\n  batch = torch.zeros(len(idxs),3,maxx,maxy).to('cuda:1')\n  padding = []\n  for i, idx in enumerate(idxs):\n      patch = patches[idx]\n      patchx = patch.size()[1]\n      patchy = patch.size()[2]\n      batch[i,:,:patchx,:patchy] = patch\n      padding.append((patchx, patchy))\n\n  with torch.no_grad():\n      features_torch = appearance_model(batch)\n      # out_features = features_torch.mean()\n      # for feat, pad in zip(features_torch, padding):\n      #   out_features.append(feat[:, :pad[0], :pad[1]].mean())\n\n      i = 0\n      ctr = 0\n      for idx in idxs:\n          while(i < idx):\n              features.append(None)\n              i+=1\n          features.append(features_torch[ctr])\n          i+=1\n          ctr+=1\n      while(i<len(patches)):\n          features.append(None)\n          i+=1\n\n  return features\n    # else:\n    #     print(\"Critical Error! Attempted to batch appearance features but no model was selected\")\n\ndef get_image_patches(input_img, detections):\n    #Generates patches and also converts detections\n    patches = []\n    for detection in detections:\n        x1, y1, x2, y2, _, _, _ = detection\n        box_h = y2-y1\n        box_w = x2-x1\n        x1=x1\n        y1=y1\n\n        patch = input_img[:, y1:y1+box_h, x1:x1+box_w]\n        patches.append(patch)\n\n    return patches\n\ndef create_appearance_model(alignreid_checkpoint, resnet_reid_checkpoint=None, cuda=True):\n  appearance_model = aligned_reid_model()\n  map_location = (lambda storage, loc: storage)\n  sd = torch.load(alignreid_checkpoint, map_location=map_location)\n  load_state_dict(appearance_model, sd['state_dicts'][0])\n  if cuda:\n    appearance_model.to('cuda:1')\n  appearance_model.eval()\n  return appearance_model\n"
  },
  {
    "path": "src/calibration.py",
    "content": "import numpy as np\nimport cv2\nimport os\nimport yaml\nimport torch\nimport pdb\n\nclass Calibration(object):\n    ''' Calibration matrices and utils\n        3d XYZ in <label>.txt are in rect camera coord.\n        2d box xy are in image2 coord\n        Points in <lidar>.bin are in Velodyne coord.\n        y_image2 = P^2_rect * x_rect\n        y_image2 = P^2_rect * R0_rect * Tr_velo_to_cam * x_velo\n        x_ref = Tr_velo_to_cam * x_velo\n        x_rect = R0_rect * x_ref\n        P^2_rect = [f^2_u,  0,      c^2_u,  -f^2_u b^2_x;\n                    0,      f^2_v,  c^2_v,  -f^2_v b^2_y;\n                    0,      0,      1,      0]\n                 = K * [1|t]\n        image2 coord:\n         ----> x-axis (u)\n        |\n        |\n        v y-axis (v)\n        velodyne coord:\n        front x, left y, up z\n        rect/ref camera coord:\n        right x, down y, front z\n        Ref (KITTI paper): http://www.cvlibs.net/publications/Geiger2013IJRR.pdf\n        TODO(rqi): do matrix multiplication only once for each projection.\n    '''\n    def __init__(self, calib_filepath):\n\n        calibs = self.read_calib_file(calib_filepath)\n        # Projection matrix from rect camera coord to image2 coord\n        self.P = calibs['P2'] \n        self.P = np.reshape(self.P, [3,4])\n        self.P_torch = torch.from_numpy(self.P).float().cuda()\n\n        # Rigid transform from Velodyne coord to reference camera coord\n        try:\n            self.V2C = calibs['Tr_velo_to_cam']\n        except:\n            self.V2C = calibs['Tr_velo_cam']\n\n        self.V2C = np.reshape(self.V2C, [3,4])\n        self.C2V = inverse_rigid_trans(self.V2C)\n        # Rotation from reference camera coord to rect camera coord\n        try:\n            self.R0 = calibs['R0_rect']\n        except:\n            self.R0 = calibs['R_rect']\n        self.R0 = np.reshape(self.R0,[3,3])\n        self.R0_torch = torch.from_numpy(self.R0).float().cuda()\n\n        RA = np.zeros((4,4))\n        RA[:3,:3] = self.R0\n        RA[3,3] = 1\n        self.D = np.matmul(self.P,RA).T\n        self.D_torch = torch.from_numpy(self.D).float().cuda()\n\n        # Camera intrinsics and extrinsics\n        self.c_u = self.P[0,2]\n        self.c_v = self.P[1,2]\n        self.f_u = self.P[0,0]\n        self.f_v = self.P[1,1]\n        self.b_x = self.P[0,3]/(-self.f_u) # relative \n        self.b_y = self.P[1,3]/(-self.f_v)\n\n    def read_calib_file(self, filepath):\n        ''' Read in a calibration file and parse into a dictionary.\n        Ref: https://github.com/utiasSTARS/pykitti/blob/master/pykitti/utils.py\n        '''\n        data = {}\n        with open(filepath, 'r') as f:\n            for line in f.readlines():\n                line = line.rstrip()\n                if len(line)==0: continue\n                key, value = line.split(' ', 1)\n                if key.endswith(':'):\n                    key = key[:-1]\n                # The only non-float values in these files are dates, which\n                # we don't care about anyway\n                try:\n                    data[key] = np.array([float(x) for x in value.split()])\n                except ValueError:\n                    pass\n\n        return data\n    \n    def read_calib_from_video(self, calib_root_dir):\n        ''' Read calibration for camera 2 from video calib files.\n            there are calib_cam_to_cam and calib_velo_to_cam under the calib_root_dir\n        '''\n        data = {}\n        cam2cam = self.read_calib_file(os.path.join(calib_root_dir, 'calib_cam_to_cam.txt'))\n        velo2cam = self.read_calib_file(os.path.join(calib_root_dir, 'calib_velo_to_cam.txt'))\n        Tr_velo_to_cam = np.zeros((3,4))\n        Tr_velo_to_cam[0:3,0:3] = np.reshape(velo2cam['R'], [3,3])\n        Tr_velo_to_cam[:,3] = velo2cam['T']\n        data['Tr_velo_to_cam'] = np.reshape(Tr_velo_to_cam, [12])\n        data['R0_rect'] = cam2cam['R_rect_00']\n        data['P2'] = cam2cam['P_rect_02']\n        return data\n\n    def cart2hom(self, pts_3d):\n        ''' Input: nx3 points in Cartesian\n            Oupput: nx4 points in Homogeneous by appending 1\n        '''\n        n = pts_3d.shape[0]\n        pts_3d_hom = np.hstack((pts_3d, np.ones((n,1))))\n        return pts_3d_hom\n \n    def cart2hom_torch(self, pts_3d):\n        n = pts_3d.size()[0]\n        pts_3d_hom = torch.cat((pts_3d, torch.ones(n,1).to(\"cuda:0\")), 1)\n        return pts_3d_hom\n\n    # =========================== \n    # ------- 3d to 3d ---------- \n    # =========================== \n    def project_velo_to_ref(self, pts_3d_velo):\n        pts_3d_velo = self.cart2hom(pts_3d_velo) # nx4\n        return np.dot(pts_3d_velo, np.transpose(self.V2C))\n\n    def project_ref_to_velo(self, pts_3d_ref):\n        pts_3d_ref = self.cart2hom(pts_3d_ref) # nx4\n        return np.dot(pts_3d_ref, np.transpose(self.C2V))\n\n    def project_rect_to_ref(self, pts_3d_rect):\n        ''' Input and Output are nx3 points '''\n        return np.transpose(np.dot(np.linalg.inv(self.R0), np.transpose(pts_3d_rect)))\n    \n    def project_ref_to_rect(self, pts_3d_ref):\n        ''' Input and Output are nx3 points '''\n        return np.transpose(np.dot(self.R0, np.transpose(pts_3d_ref)))\n\n    def project_ref_to_rect_torch(self, pts_3d_ref):\n        ''' Input and Output are nx3 points '''\n        return torch.transpose(torch.matmul(self.R0_torch, torch.transpose(pts_3d_ref,0,1)),0,1)\n \n    def project_rect_to_velo(self, pts_3d_rect):\n        ''' Input: nx3 points in rect camera coord.\n            Output: nx3 points in velodyne coord.\n        ''' \n        pts_3d_ref = self.project_rect_to_ref(pts_3d_rect)\n        return self.project_ref_to_velo(pts_3d_ref)\n\n    def project_velo_to_rect(self, pts_3d_velo):\n        pts_3d_ref = self.project_velo_to_ref(pts_3d_velo)\n        return self.project_ref_to_rect(pts_3d_ref)\n\n    # =========================== \n    # ------- 3d to 2d ---------- \n    # =========================== \n    def project_rect_to_image(self, pts_3d_rect):\n        ''' Input: nx3 points in rect camera coord.\n            Output: nx2 points in image2 coord.\n        '''\n        pts_3d_rect = self.cart2hom(pts_3d_rect)\n        pts_2d = np.dot(pts_3d_rect, np.transpose(self.P)) # nx3\n        pts_2d[:,0] /= pts_2d[:,2]\n        pts_2d[:,1] /= pts_2d[:,2]\n        return pts_2d[:,0:2]\n\n    def project_rect_to_image_torch(self, pts_3d_rect):\n        ''' Input: nx3 points in rect camera coord.\n            Output: nx2 points in image2 coord.\n        '''\n        pts_3d_rect = self.cart2hom_torch(pts_3d_rect)\n        pts_2d = torch.matmul(pts_3d_rect, torch.transpose(self.P_torch,0,1)) # nx3\n        pts_2d[:,0] /= pts_2d[:,2]\n        pts_2d[:,1] /= pts_2d[:,2]\n        return pts_2d[:,0:2]\n\n    def project_ref_to_image_torch(self, pts_3d_ref):\n        ''' Input: nx3 points in ref camera coord.\n            Output: nx2 points in image2 coord.\n        '''\n        pts_3d_ref = self.cart2hom_torch(pts_3d_ref)\n        pts_2d = torch.matmul(pts_3d_ref, self.D_torch) # nx3\n        pts_2d[:,0] /= pts_2d[:,2]\n        pts_2d[:,1] /= pts_2d[:,2]\n        return pts_2d[:,0:2]\n\n    def project_velo_to_image(self, pts_3d_velo):\n        ''' Input: nx3 points in velodyne coord.\n            Output: nx2 points in image2 coord.\n        '''\n        pts_3d_rect = self.project_velo_to_rect(pts_3d_velo)\n        return self.project_rect_to_image(pts_3d_rect)\n\n    # =========================== \n    # ------- 2d to 3d ---------- \n    # =========================== \n    def project_image_to_rect(self, uv_depth):\n        ''' Input: nx3 first two channels are uv, 3rd channel\n                   is depth in rect camera coord.\n            Output: nx3 points in rect camera coord.\n        '''\n        n = uv_depth.shape[0]\n        x = ((uv_depth[:,0]-self.c_u)*uv_depth[:,2])/self.f_u + self.b_x\n        y = ((uv_depth[:,1]-self.c_v)*uv_depth[:,2])/self.f_v + self.b_y\n        pts_3d_rect = np.zeros((n,3))\n        pts_3d_rect[:,0] = x\n        pts_3d_rect[:,1] = y\n        pts_3d_rect[:,2] = uv_depth[:,2]\n        return pts_3d_rect\n\n    def project_image_to_velo(self, uv_depth):\n        pts_3d_rect = self.project_image_to_rect(uv_depth)\n        return self.project_rect_to_velo(pts_3d_rect)\n\ndef rotx(t):\n    ''' 3D Rotation about the x-axis. '''\n    c = np.cos(t)\n    s = np.sin(t)\n    return np.array([[1,  0,  0],\n                     [0,  c, -s],\n                     [0,  s,  c]])\n\n\ndef roty(t):\n    ''' Rotation about the y-axis. '''\n    c = np.cos(t)\n    s = np.sin(t)\n    return np.array([[c,  0,  s],\n                     [0,  1,  0],\n                     [-s, 0,  c]])\n\n\ndef rotz(t):\n    ''' Rotation about the z-axis. '''\n    c = np.cos(t)\n    s = np.sin(t)\n    return np.array([[c, -s,  0],\n                     [s,  c,  0],\n                     [0,  0,  1]])\n\n\ndef transform_from_rot_trans(R, t):\n    ''' Transforation matrix from rotation matrix and translation vector. '''\n    R = R.reshape(3, 3)\n    t = t.reshape(3, 1)\n    return np.vstack((np.hstack([R, t]), [0, 0, 0, 1]))\n\n\ndef inverse_rigid_trans(Tr):\n    ''' Inverse a rigid body transform matrix (3x4 as [R|t])\n        [R'|-R't; 0|1]\n    '''\n    inv_Tr = np.zeros_like(Tr) # 3x4\n    inv_Tr[0:3,0:3] = np.transpose(Tr[0:3,0:3])\n    inv_Tr[0:3,3] = np.dot(-np.transpose(Tr[0:3,0:3]), Tr[0:3,3])\n    return inv_Tr\n\ndef read_label(label_filename):\n    lines = [line.rstrip() for line in open(label_filename)]\n    objects = [Object3d(line) for line in lines]\n    return objects\n\ndef load_image(img_filename):\n    return cv2.imread(img_filename)\n\ndef load_velo_scan(velo_filename):\n    scan = np.fromfile(velo_filename, dtype=np.float32)\n    scan = scan.reshape((-1, 4))\n    return scan\n\ndef project_to_image(pts_3d, P):\n    ''' Project 3d points to image plane.\n    Usage: pts_2d = projectToImage(pts_3d, P)\n      input: pts_3d: nx3 matrix\n             P:      3x4 projection matrix\n      output: pts_2d: nx2 matrix\n      P(3x4) dot pts_3d_extended(4xn) = projected_pts_2d(3xn)\n      => normalize projected_pts_2d(2xn)\n      <=> pts_3d_extended(nx4) dot P'(4x3) = projected_pts_2d(nx3)\n          => normalize projected_pts_2d(nx2)\n    '''\n    n = pts_3d.shape[0]\n    pts_3d_extend = np.hstack((pts_3d, np.ones((n,1))))\n    print(('pts_3d_extend shape: ', pts_3d_extend.shape))\n    pts_2d = np.dot(pts_3d_extend, np.transpose(P)) # nx3\n    pts_2d[:,0] /= pts_2d[:,2]\n    pts_2d[:,1] /= pts_2d[:,2]\n    return pts_2d[:,0:2]\n\n\ndef compute_box_3d(obj, P):\n    ''' Takes an object and a projection matrix (P) and projects the 3d\n        bounding box into the image plane.\n        Returns:\n            corners_2d: (8,2) array in left image coord.\n            corners_3d: (8,3) array in in rect camera coord.\n    '''\n    # compute rotational matrix around yaw axis\n    R = roty(obj.ry)    \n\n    # 3d bounding box dimensions\n    l = obj.l;\n    w = obj.w;\n    h = obj.h;\n    \n    # 3d bounding box corners\n    x_corners = [l/2,l/2,-l/2,-l/2,l/2,l/2,-l/2,-l/2];\n    y_corners = [0,0,0,0,-h,-h,-h,-h];\n    z_corners = [w/2,-w/2,-w/2,w/2,w/2,-w/2,-w/2,w/2];\n    \n    # rotate and translate 3d bounding box\n    corners_3d = np.dot(R, np.vstack([x_corners,y_corners,z_corners]))\n    #print corners_3d.shape\n    corners_3d[0,:] = corners_3d[0,:] + obj.t[0];\n    corners_3d[1,:] = corners_3d[1,:] + obj.t[1];\n    corners_3d[2,:] = corners_3d[2,:] + obj.t[2];\n    #print 'cornsers_3d: ', corners_3d \n    # only draw 3d bounding box for objs in front of the camera\n    if np.any(corners_3d[2,:]<0.1):\n        corners_2d = None\n        return corners_2d, np.transpose(corners_3d)\n    \n    # project the 3d bounding box into the image plane\n    corners_2d = project_to_image(np.transpose(corners_3d), P);\n    #print 'corners_2d: ', corners_2d\n    return corners_2d, np.transpose(corners_3d)\n\n\ndef compute_orientation_3d(obj, P):\n    ''' Takes an object and a projection matrix (P) and projects the 3d\n        object orientation vector into the image plane.\n        Returns:\n            orientation_2d: (2,2) array in left image coord.\n            orientation_3d: (2,3) array in in rect camera coord.\n    '''\n    \n    # compute rotational matrix around yaw axis\n    R = roty(obj.ry)\n   \n    # orientation in object coordinate system\n    orientation_3d = np.array([[0.0, obj.l],[0,0],[0,0]])\n    \n    # rotate and translate in camera coordinate system, project in image\n    orientation_3d = np.dot(R, orientation_3d)\n    orientation_3d[0,:] = orientation_3d[0,:] + obj.t[0]\n    orientation_3d[1,:] = orientation_3d[1,:] + obj.t[1]\n    orientation_3d[2,:] = orientation_3d[2,:] + obj.t[2]\n    \n    # vector behind image plane?\n    if np.any(orientation_3d[2,:]<0.1):\n      orientation_2d = None\n      return orientation_2d, np.transpose(orientation_3d)\n    \n    # project orientation into the image plane\n    orientation_2d = project_to_image(np.transpose(orientation_3d), P);\n    return orientation_2d, np.transpose(orientation_3d)\n\ndef draw_projected_box3d(image, qs, color=(255,255,255), thickness=2):\n    ''' Draw 3d bounding box in image\n        qs: (8,3) array of vertices for the 3d box in following order:\n            1 -------- 0\n           /|         /|\n          2 -------- 3 .\n          | |        | |\n          . 5 -------- 4\n          |/         |/\n          6 -------- 7\n    '''\n    qs = qs.astype(np.int32)\n    for k in range(0,4):\n       # Ref: http://docs.enthought.com/mayavi/mayavi/auto/mlab_helper_functions.html\n       i,j=k,(k+1)%4\n       # use LINE_AA for opencv3\n       cv2.line(image, (qs[i,0],qs[i,1]), (qs[j,0],qs[j,1]), color, thickness, cv2.CV_AA)\n\n       i,j=k+4,(k+1)%4 + 4\n       cv2.line(image, (qs[i,0],qs[i,1]), (qs[j,0],qs[j,1]), color, thickness, cv2.CV_AA)\n\n       i,j=k,k+4\n       cv2.line(image, (qs[i,0],qs[i,1]), (qs[j,0],qs[j,1]), color, thickness, cv2.CV_AA)\n    return image\n\n\n\nclass OmniCalibration(Calibration):\n    def __init__(self, calib_folder):\n\n        global_config = os.path.join(calib_folder, 'defaults.yaml')\n        camera_config = os.path.join(calib_folder, 'cameras.yaml')\n\n        with open(global_config) as f:\n            self.global_config_dict = yaml.safe_load(f)\n        \n        with open(camera_config) as f:\n            self.camera_config_dict = yaml.safe_load(f)\n        \n        self.median_focal_length_y = self.calculate_median_param_value(param = 'f_y')\n        self.median_optical_center_y = self.calculate_median_param_value(param = 't_y')\n        # image shape is (color channels, height, width)\n        self.img_shape = 3, self.global_config_dict['image']['height'], self.global_config_dict['image']['width']\n    \n    def project_ref_to_image_torch(self, pointcloud):\n\n        theta = (torch.atan2(pointcloud[:, 0], pointcloud[:, 2]) + np.pi) %(2*np.pi)\n        horizontal_fraction = theta/ (2*np.pi)\n        x = (horizontal_fraction * self.img_shape[2]) % self.img_shape[2]\n        y = -self.median_focal_length_y*(pointcloud[:, 1]*torch.cos(theta)/pointcloud[:, 2]) + self.median_optical_center_y\n        pts_2d = torch.stack([x, y], dim=1)\n        \n        return pts_2d\n\n\n    def project_image_to_rect(self, uvdepth):\n\n        theta = (uvdepth[:, 0]/self.img_shape[2])*2*np.pi - np.pi\n        z = uvdepth[:, 2]*np.cos(theta)\n        x = uvdepth[:, 2]*np.sin(theta)\n        y = z*-1*(uvdepth[:, 1] - self.median_optical_center_y)/(self.median_focal_length_y * np.cos(theta))\n\n        return np.stack([x,y,z], axis=1)\n\n    def project_velo_to_ref(self, pointcloud):\n\n        pointcloud = pointcloud[:, [1, 2, 0]]\n        pointcloud[:, 0] *= -1\n        pointcloud[:, 1] *= -1\n\n        return pointcloud\n\n    def move_lidar_to_camera_frame(self, pointcloud, upper = True):\n        # assumed only rotation about z axis\n        \n        if upper:\n            pointcloud[:,:3] =  \\\n                pointcloud[:,:3] - torch.Tensor(self.global_config_dict['calibrated']\n                                                ['lidar_upper_to_rgb']['translation']).type(pointcloud.type())\n            theta = self.global_config_dict['calibrated']['lidar_upper_to_rgb']['rotation'][-1]\n        else:\n            pointcloud[:,:3] =  \\\n                pointcloud[:,:3] - torch.Tensor(self.global_config_dict['calibrated']\n                                                ['lidar_lower_to_rgb']['translation']).type(pointcloud.type())\n            theta = self.global_config_dict['calibrated']['lidar_lower_to_rgb']['rotation'][-1]\n\n        rotation_matrix = torch.Tensor([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]]).type(pointcloud.type())\n        pointcloud[:, :2] = torch.matmul(rotation_matrix, pointcloud[:, :2].unsqueeze(2)).squeeze()\n        pointcloud[:, :3] = self.project_velo_to_ref(pointcloud[:, :3])\n        return pointcloud\n        \n    \n    def calculate_median_param_value(self, param):\n        if param=='f_y':\n            idx = 4\n        elif param == 'f_x':\n            idx = 0\n        elif param == 't_y':\n            idx = 5\n        elif param == 't_x':\n            idx = 2\n        elif param == 's':\n            idx = 1\n        else:\n            raise 'Wrong parameter!'\n\n        omni_camera = ['sensor_0', 'sensor_2', 'sensor_4', 'sensor_6', 'sensor_8']\n        parameter_list = []\n        for sensor, camera_params in self.camera_config_dict['cameras'].items():\n            if sensor not in omni_camera:\n                continue\n            K_matrix = camera_params['K'].split(' ')\n            parameter_list.append(float(K_matrix[idx]))\n        return np.median(parameter_list)\n"
  },
  {
    "path": "src/combination_model.py",
    "content": "import pdb\n\nimport numpy as np\nimport torch.nn as nn\n\nclass CombiNet(nn.Module):\n\tdef __init__(self, in_dim = 2560, hidden_units = 512, out_dim = 2560):\n\t\tsuper().__init__()\n\t\tself.fc1 = nn.Linear(in_dim, 2*hidden_units)\n\t\t# self.bn1 = nn.BatchNorm1d(hidden_units)\n\t\tself.fc2 = nn.Linear(2*hidden_units, 2*hidden_units)\n\t\t# self.bn2 = nn.BatchNorm1d(2*hidden_units)\n\t\tself.fc3 = nn.Linear(2*hidden_units, out_dim)\n\t\tself.relu = nn.ReLU()\n\t\tself.apply(weight_init)\n\tdef forward(self, x):\n\t\tout = nn.functional.normalize(x)\n\t\tskip = out\n\t\tout = self.fc1(x)\n\t\t# out = self.bn1(out)\n\t\tout = self.relu(out)\n\t\tout = self.fc2(out)\n\t\t# out = self.bn2(out)\n\t\tout = self.relu(out)\n\t\tout = self.fc3(out)\n\t\tout = nn.functional.normalize(out)\n\t\tout += skip\n\t\treturn out\n\nclass CombiLSTM(nn.Module):\n\tdef __init__(self, in_dim = 2560, hidden_units = 512, out_dim = 2560):\n\t\tsuper().__init__()\n\t\tself.in_linear1 = nn.Linear(in_dim, hidden_units)\n\t\t# self.bn1 = nn.BatchNorm1d(hidden_units)\n\t\tself.in_linear2 = nn.Linear(hidden_units, hidden_units)\n\t\tself.rnn = nn.LSTM(input_size = hidden_units, hidden_size = hidden_units, dropout = 0)\n\t\tself.out_linear1 = nn.Linear(hidden_units, hidden_units)\n\t\t# self.bn2 = nn.BatchNorm1d(hidden_units)\n\t\tself.out_linear2 = nn.Linear(hidden_units, out_dim)\n\t\tself.relu = nn.ReLU()\n\t\tself.apply(weight_init)\n\n\tdef forward(self, x, hidden = None):\n\t\tout = nn.functional.normalize(x)\n\t\tskip = out\n\t\tout = self.in_linear1(out)\n\t\t# out = self.bn1(out)\n\t\tout = self.relu(out)\n\t\tout = self.in_linear2(out)\n\t\tout = out.unsqueeze(1) #Adding batch dimension\n\t\tif hidden is None:\n\t\t\tout, hidden = self.rnn(out)\n\t\telse:\n\t\t\tout, hidden = self.rnn(out, hidden)\n\n\t\tout = out.squeeze(1) #removing batch dimension\n\t\tout = self.out_linear1(out)\n\t\t# out = self.bn2(out)\n\t\tout = self.relu(out)\n\t\tout = self.out_linear2(out)\n\t\tout = nn.functional.normalize(out)\n\t\tout += skip\n\t\treturn out, hidden\n\ndef weight_init(m):\n\tif type(m)==nn.Linear:\n\t\tnn.init.xavier_normal_(m.weight, gain=np.sqrt(2))\n\telif type(m)==nn.LSTM:\n\t\tnn.init.xavier_normal_(m.weight_ih_l0)\n\t\tnn.init.xavier_normal_(m.weight_hh_l0)\n"
  },
  {
    "path": "src/deep_sort_utils.py",
    "content": "# vim: expandtab:ts=4:sw=4\nimport numpy as np\nimport cv2\n\n\ndef non_max_suppression(boxes, max_bbox_overlap, scores=None):\n    \"\"\"Suppress overlapping detections.\n\n    Original code from [1]_ has been adapted to include confidence score.\n\n    .. [1] http://www.pyimagesearch.com/2015/02/16/\n           faster-non-maximum-suppression-python/\n\n    Examples\n    --------\n\n        >>> boxes = [d.roi for d in detections]\n        >>> scores = [d.confidence for d in detections]\n        >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores)\n        >>> detections = [detections[i] for i in indices]\n\n    Parameters\n    ----------\n    boxes : ndarray\n        Array of ROIs (x, y, width, height).\n    max_bbox_overlap : float\n        ROIs that overlap more than this values are suppressed.\n    scores : Optional[array_like]\n        Detector confidence score.\n\n    Returns\n    -------\n    List[int]\n        Returns indices of detections that have survived non-maxima suppression.\n\n    \"\"\"\n    if len(boxes) == 0:\n        return []\n\n    boxes = boxes.astype(np.float)\n    pick = []\n\n    x1 = boxes[:, 0]\n    y1 = boxes[:, 1]\n    x2 = boxes[:, 2] + boxes[:, 0]\n    y2 = boxes[:, 3] + boxes[:, 1]\n\n    area = (x2 - x1 + 1) * (y2 - y1 + 1)\n    if scores is not None:\n        idxs = np.argsort(scores)\n    else:\n        idxs = np.argsort(y2)\n\n    while len(idxs) > 0:\n        last = len(idxs) - 1\n        i = idxs[last]\n        pick.append(i)\n\n        xx1 = np.maximum(x1[i], x1[idxs[:last]])\n        yy1 = np.maximum(y1[i], y1[idxs[:last]])\n        xx2 = np.minimum(x2[i], x2[idxs[:last]])\n        yy2 = np.minimum(y2[i], y2[idxs[:last]])\n\n        w = np.maximum(0, xx2 - xx1 + 1)\n        h = np.maximum(0, yy2 - yy1 + 1)\n\n        overlap = (w * h) / (area[idxs[:last]]) # + area[idxs[last:last+1]] - w * h) #changed from deepsort to sum both areas\n\n        idxs = np.delete(\n            idxs, np.concatenate(\n                ([last], np.where(overlap > max_bbox_overlap)[0])))\n\n    return pick\n"
  },
  {
    "path": "src/detection.py",
    "content": "# vim: expandtab:ts=4:sw=4\nimport numpy as np\n\n\nclass Detection(object):\n    \"\"\"\n    This class represents a bounding box detection in a single image.\n\n    Parameters\n    ----------\n    tlwh : array_like\n        Bounding box in format `(x, y, w, h)`.\n    confidence : float\n        Detector confidence score.\n    feature : array_like\n        A feature vector that describes the object contained in this image.\n\n    Attributes\n    ----------\n    tlwh : ndarray\n        Bounding box in format `(top left x, top left y, width, height)`.\n    confidence : ndarray\n        Detector confidence score.\n    feature : ndarray | NoneType\n        A feature vector that describes the object contained in this image.\n\n    \"\"\"\n\n    def __init__(self, tlwh, box_3d, confidence, appearance_feature, feature):\n        self.tlwh = np.asarray(tlwh, dtype=np.float)\n        # Note that detections format is centre of 3D box and dimensions (not bottom face)\n        self.box_3d = box_3d\n        if box_3d is not None:\n            self.box_3d[1] -= box_3d[4]/2\n            self.box_3d = np.asarray(box_3d, dtype=np.float32)\n        self.confidence = float(confidence)\n        self.appearance_feature = appearance_feature\n        if feature is not None:\n            self.feature = feature\n        else:\n            self.feature = None\n\n\n    def to_tlbr(self):\n        \"\"\"Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,\n        `(top left, bottom right)`.\n        \"\"\"\n        ret = self.tlwh.copy()\n        ret[2:] += ret[:2]\n        return ret\n\n    def to_xyah(self):\n        \"\"\"Convert bounding box to format `(center x, center y, aspect ratio,\n        height)`, where the aspect ratio is `width / height`.\n        \"\"\"\n        ret = self.tlwh.copy()\n        ret[:2] += ret[2:] / 2\n        ret[2] /= ret[3]\n        return ret\n    def to_xywh(self):\n        \"\"\"Convert bounding box to format `(center x, center y, aspect ratio,\n        height)`, where the aspect ratio is `width / height`.\n        \"\"\"\n        ret = self.tlwh.copy()\n        ret[:2] += ret[2:] / 2\n        return ret\n    def get_3d_distance(self):\n        if self.box_3d is not None:\n            return np.sqrt(self.box_3d[0]**2 + self.box_3d[2]**2)"
  },
  {
    "path": "src/distances.py",
    "content": "\"\"\"py-motmetrics - metrics for multiple object tracker (MOT) benchmarking.\n\nChristoph Heindl, 2017\nhttps://github.com/cheind/py-motmetrics\n\"\"\"\n\nimport numpy as np\nimport pdb\n\ndef norm2squared_matrix(objs, hyps, max_d2=float('inf')):\n    \"\"\"Computes the squared Euclidean distance matrix between object and hypothesis points.\n\n    Params\n    ------\n    objs : NxM array\n        Object points of dim M in rows\n    hyps : KxM array\n        Hypothesis points of dim M in rows\n\n    Kwargs\n    ------\n    max_d2 : float\n        Maximum tolerable squared Euclidean distance. Object / hypothesis points\n        with larger distance are set to np.nan signalling do-not-pair. Defaults\n        to +inf\n\n    Returns\n    -------\n    C : NxK array\n        Distance matrix containing pairwise distances or np.nan.\n    \"\"\"\n\n    objs = np.atleast_2d(objs).astype(float)\n    hyps = np.atleast_2d(hyps).astype(float)\n\n    if objs.size == 0 or hyps.size == 0:\n        return np.empty((0,0))\n\n    assert hyps.shape[1] == objs.shape[1], \"Dimension mismatch\"\n\n    C = np.empty((objs.shape[0], hyps.shape[0]))\n\n    for o in range(objs.shape[0]):\n        for h in range(hyps.shape[0]):\n            e = objs[o] - hyps[h]\n            C[o, h] = e.dot(e)\n\n    C[C > max_d2] = np.nan\n    return C\n\n\ndef iou_matrix(objs, hyps, max_iou=1.):\n    \"\"\"Computes 'intersection over union (IoU)' distance matrix between object and hypothesis rectangles.\n\n    The IoU is computed as\n\n        IoU(a,b) = 1. - isect(a, b) / union(a, b)\n\n    where isect(a,b) is the area of intersection of two rectangles and union(a, b) the area of union. The\n    IoU is bounded between zero and one. 0 when the rectangles overlap perfectly and 1 when the overlap is\n    zero.\n\n    Params\n    ------\n    objs : Nx4 array\n        Object rectangles (x,y,w,h) in rows\n    hyps : Kx4 array\n        Hypothesis rectangles (x,y,w,h) in rows\n\n    Kwargs\n    ------\n    max_iou : float\n        Maximum tolerable overlap distance. Object / hypothesis points\n        with larger distance are set to np.nan signalling do-not-pair. Defaults\n        to 0.5\n\n    Returns\n    -------\n    C : NxK array\n        Distance matrix containing pairwise distances or np.nan.\n    \"\"\"\n\n    objs = np.atleast_2d(objs).astype(float)\n    hyps = np.atleast_2d(hyps).astype(float)\n\n    if objs.size == 0 or hyps.size == 0:\n        return np.empty((0,0))\n\n    assert objs.shape[1] == 4\n    assert hyps.shape[1] == 4\n\n    br_objs = objs[:, :2] + objs[:, 2:]\n    br_hyps = hyps[:, :2] + hyps[:, 2:]\n\n    C = np.empty((objs.shape[0], hyps.shape[0]))\n\n    for o in range(objs.shape[0]):\n        for h in range(hyps.shape[0]):\n            isect_xy = np.maximum(objs[o, :2], hyps[h, :2])\n            isect_wh = np.maximum(np.minimum(br_objs[o], br_hyps[h]) - isect_xy, 0)\n            isect_a = isect_wh[0]*isect_wh[1]\n            union_a = objs[o, 2]*objs[o, 3] + hyps[h, 2]*hyps[h, 3] - isect_a\n            if union_a != 0:\n                C[o, h] = 1. - isect_a / union_a\n            else:\n                C[o, h] = np.nan\n\n    C[C > max_iou] = np.nan\n    return C\n\n\ndef find_area(vertices):\n    area = 0\n    for i in range(len(vertices)):\n        area += vertices[i][0]*(vertices[(i+1)%len(vertices)][1] - vertices[i-1][1])\n    return 0.5*abs(area)\n\ndef get_angle(p):\n    x, y = p\n    angle = np.arctan2(y,x)\n    if angle < 0:\n        angle += np.pi*2\n    return angle\n\ndef clip_polygon(box1, box2):\n    #clips box 1 by the edges in box2\n    x,y,z,l,h,w,theta = box2\n    theta = -theta\n\n    box2_edges = np.asarray([(-np.cos(theta), -np.sin(theta), l/2-x*np.cos(theta)-z*np.sin(theta)),\n                    (-np.sin(theta), np.cos(theta), w/2-x*np.sin(theta)+z*np.cos(theta)),\n                    (np.cos(theta), np.sin(theta), l/2+x*np.cos(theta)+z*np.sin(theta)),\n                    (np.sin(theta), -np.cos(theta), w/2+x*np.sin(theta)-z*np.cos(theta))])\n    x,y,z,l,h,w,theta = box1\n    theta = -theta\n\n    box1_vertices = [(x+l/2*np.cos(theta)-w/2*np.sin(theta), z+l/2*np.sin(theta)+w/2*np.cos(theta)),\n                        (x+l/2*np.cos(theta)+w/2*np.sin(theta), z+l/2*np.sin(theta)-w/2*np.cos(theta)),\n                        (x-l/2*np.cos(theta)-w/2*np.sin(theta), z-l/2*np.sin(theta)+w/2*np.cos(theta)),\n                        (x-l/2*np.cos(theta)+w/2*np.sin(theta), z-l/2*np.sin(theta)-w/2*np.cos(theta))]\n    out_vertices = box1_vertices\n    for edge in box2_edges:\n        vertex_list = out_vertices[:]\n        out_vertices = []\n        for idx, current_vertex in enumerate(vertex_list):\n            previous_vertex = vertex_list[idx-1]\n            if point_inside_edge(current_vertex, edge):\n                if not point_inside_edge(previous_vertex, edge):\n                    out_vertices.append(compute_intersection_point(previous_vertex, current_vertex, edge))\n                out_vertices.append(current_vertex)\n            elif point_inside_edge(previous_vertex, edge):\n                out_vertices.append(compute_intersection_point(previous_vertex, current_vertex, edge))\n    to_remove = []\n    for i in range(len(out_vertices)):\n        if i in to_remove:\n            continue\n        for j in range(i+1, len(out_vertices)):\n            if abs(out_vertices[i][0] - out_vertices[j][0]) < 1e-6 and abs(out_vertices[i][1] - out_vertices[j][1]) < 1e-6:\n                to_remove.append(j)\n    out_vertices = sorted([(v[0]-x, v[1]-z) for i,v in enumerate(out_vertices) if i not in to_remove], key = lambda p: get_angle((p[0],p[1])))\n    return out_vertices\n\ndef compute_intersection_point(pt1, pt2, line1):\n    if pt1[0] == pt2[0]:\n        slope = np.inf\n    else:\n        slope = (pt1[1]-pt2[1])/(pt1[0] - pt2[0])\n    if np.isinf(slope):\n        line2 = (1, 0, pt1[0])\n    else:\n        line2 = (slope, -1, pt1[0]*slope-pt1[1])\n    # print(\"Line1:\", line1)\n    # print(\"Line2:\", line2)\n    if line1[1] == 0:\n        x = line1[2]/line1[0]\n        y = (line2[2] - line2[0]*x)/line2[1]\n    elif line1[0] == 0:\n        y = line1[2]/line1[1]\n        x = (line2[2] - line2[1]*y)/line2[0]\n    elif line2[1] == 0:\n        x = pt1[0]\n        y = (line1[2]-x*line1[0])/line1[1]\n    else:\n        tmp_line = (line2 - line1*(line2[1]/line1[1]))\n        x = tmp_line[2]/tmp_line[0]\n        y = (line2[2] - line2[0]*x)/line2[1]\n    return (x,y)\n\ndef point_inside_edge(pt, edge):\n    lhs = pt[0]*edge[0] + pt[1]*edge[1]\n    if lhs < edge[2] - 1e-6:\n        return True\n    else:\n        return False\n\n\ndef iou_matrix_3d(objs, hyps, max_iou=1.):\n    \"\"\"Computes 'intersection over union (IoU)' distance matrix between object and hypothesis rectangles.\n\n    The IoU is computed as\n\n        IoU(a,b) = 1. - isect(a, b) / union(a, b)\n\n    where isect(a,b) is the area of intersection of two rectangles and union(a, b) the area of union. The\n    IoU is bounded between zero and one. 0 when the rectangles overlap perfectly and 1 when the overlap is\n    zero.\n\n    Params\n    ------\n    objs : Nx4 array\n        Object rectangles (x,y,w,h) in rows\n    hyps : Kx4 array\n        Hypothesis rectangles (x,y,w,h) in rows\n\n    Kwargs\n    ------\n    max_iou : float\n        Maximum tolerable overlap distance. Object / hypothesis points\n        with larger distance are set to np.nan signalling do-not-pair. Defaults\n        to 0.5\n\n    Returns\n    -------\n    C : NxK array\n        Distance matrix containing pairwise distances or np.nan.\n    \"\"\"\n\n    objs = np.atleast_2d(objs).astype(float)\n    hyps = np.atleast_2d(hyps).astype(float)\n\n    if objs.size == 0 or hyps.size == 0:\n        return np.empty((0,0))\n    assert objs.shape[1] == 7\n    assert hyps.shape[1] == 7\n\n    C = np.empty((objs.shape[0], hyps.shape[0]))\n    for o in range(objs.shape[0]):\n        for h in range(hyps.shape[0]):\n            base_area = find_area(clip_polygon(objs[o], hyps[h]))\n            height = min(objs[o][1], hyps[h][1]) - max(objs[o][1] - objs[o][4], hyps[h][1]-hyps[h][4])\n            intersect = base_area*height\n            union = objs[o][3]*objs[o][4]*objs[o][5] + hyps[h][3]*hyps[h][4]*hyps[h][5] - intersect\n            if union != 0:\n                C[o, h] = 1. - intersect / union\n            else:\n                C[o, h] = np.nan\n    C[C > max_iou] = np.nan\n    return C\n"
  },
  {
    "path": "src/double_measurement_kf.py",
    "content": "import random\nimport numpy as np\nimport scipy.linalg\nimport EKF\nimport pdb\nimport kf_2d\nimport os\nimport pickle\nimport torch\nfrom copy import deepcopy\nimport matplotlib.pyplot as plt\nnp.set_printoptions(precision=4, suppress=True)\nfrom calibration import Calibration\nimport sys\nsys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))\nfrom evaluation.distances import iou_matrix\n\nclass KF_3D(kf_2d.KalmanFilter2D):\n    \"\"\"\n    3D Kalman Filter that tracks objets in 3D space\n\n        The 8-dimensional state space\n\n            x, y, z, l, h, w, theta, vx, vz\n\n        contains the bounding box center position (x, z), the heading angle theta, the\n        box dimensions l, w, h, and the x and z velocities.\n\n        Object motion follows a constant velocity model. The bounding box location\n        (x, y) is taken as direct observation of the state space (linear\n        observation model).\n    \"\"\"\n    def __init__(self, calib, pos_weight_3d, pos_weight, velocity_weight, theta_weight,\n                    std_process, std_measurement_2d, std_measurement_3d,\n                    initial_uncertainty, omni = True, debug=True):\n        self.ndim, self.dt = 9, 4.\n\n        # Create Kalman filter model matrices.\n        # Motion model is constant velocity, i.e. x = x + Vx*dt\n        self._motion_mat = np.eye(self.ndim, self.ndim)\n        self._motion_mat[0, 7] = self.dt\n        self._motion_mat[2, 8] = self.dt\n        # Sensor model is direct observation, i.e. x = x\n        self._observation_mat = np.eye(self.ndim - 2, self.ndim)\n        if omni:\n            self.x_constant = calib.img_shape[2]/(2*np.pi)\n            self.y_constant = calib.median_focal_length_y\n            self.calib = calib\n        else:\n            self.projection_matrix = calib.P\n\n        self.omni = omni\n        self._std_weight_pos_3d = pos_weight_3d\n        self._std_weight_pos = pos_weight\n        self._std_weight_vel = velocity_weight\n        self._std_weight_theta= theta_weight\n\n        self._std_weight_process = std_process\n        self._initial_uncertainty = initial_uncertainty\n        self._std_weight_measurement_2d = std_measurement_2d\n        self._std_weight_measurement_3d = std_measurement_3d\n        self.debug = debug\n\n    def initiate(self, measurement_3d):\n\n        mean_pos = measurement_3d\n        mean_vel = np.zeros((2,))\n        mean = np.r_[mean_pos, mean_vel]\n        std = [\n                self._std_weight_pos_3d,\n                self._std_weight_pos_3d * 0.15,\n                self._std_weight_pos_3d,\n                self._std_weight_pos_3d * 0.015,\n                self._std_weight_pos_3d * 0.015,\n                self._std_weight_pos_3d * 0.015,\n                self._std_weight_theta * 10,\n                self._std_weight_vel*5,\n                self._std_weight_vel*5]\n        covariance = np.diag(np.square(std))*(self._initial_uncertainty*self._std_weight_process)**2\n\n        return mean, covariance\n\n    def get_process_noise(self, mean):\n\n        std_pos = [\n                self._std_weight_pos_3d, # x\n                self._std_weight_pos_3d * 0.15, # y\n                self._std_weight_pos_3d, # z\n                self._std_weight_pos_3d * 0.015, # l\n                self._std_weight_pos_3d * 0.015, # h\n                self._std_weight_pos_3d * 0.015, # w\n                self._std_weight_theta # theta\n            ]\n        std_vel = [\n            self._std_weight_vel, # x\n            self._std_weight_vel, # z\n            ]\n        self._motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))\n        motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))*self._std_weight_process**2\n        return motion_cov\n\n    def get_2d_measurement_noise(self, measurement_2d):\n        # Returns Qt the sensor noise covariance\n\n        # Measurement uncertainty scaled by estimated height\n        std = [\n                self._std_weight_pos*measurement_2d[2],\n                self._std_weight_pos*measurement_2d[3],\n                self._std_weight_pos*measurement_2d[2],\n                self._std_weight_pos*measurement_2d[3]]\n        innovation_cov = np.diag(np.square(std))*self._std_weight_measurement_2d**2\n        return innovation_cov\n\n    def get_3d_measurement_noise(self, measurement):\n        # Returns Qt the sensor noise covariance\n\n        # Measurement uncertainty scaled by estimated height\n        std = [\n            self._std_weight_pos_3d, # x\n            self._std_weight_pos_3d * 0.5, # y\n            self._std_weight_pos_3d, # z\n            self._std_weight_pos_3d, # l\n            self._std_weight_pos_3d, # h\n            self._std_weight_pos_3d, # w\n            self._std_weight_theta * 25 # theta\n            ]\n        innovation_cov = np.diag(np.square(std))*self._std_weight_measurement_3d**2\n        return innovation_cov\n\n    def gating_distance(self, mean, covariance, measurements,\n                        only_position=False,\n                        use_3d=True):\n        \"\"\"Compute gating distance between state distribution and measurements.\n\n        A suitable distance threshold can be obtained from `chi2inv95`. If\n        `only_position` is False, the chi-square distribution has 4 degrees of\n        freedom, otherwise 2.\n\n        Parameters\n        ----------\n        mean : ndarray\n            Mean vector over the state distribution (8 dimensional).\n        covariance : ndarray\n            Covariance of the state distribution (8x8 dimensional).\n        measurements : ndarray\n            An Nx4 dimensional matrix of N measurements, each in\n            format (x, y, a, h) where (x, y) is the bounding box center\n            position, a the aspect ratio, and h the height.\n        only_position : Optional[bool]\n            If True, distance computation is done with respect to the bounding\n            box center position only.\n\n        Returns\n        -------\n        ndarray\n            Returns an array of length N, where the i-th element contains the\n            squared Mahalanobis distance between (mean, covariance) and\n            `measurements[i]`.\n\n        \"\"\"\n        if not use_3d:\n            corner_points, corner_points_3d = self.calculate_corners(mean)\n            H_2d = self.get_2d_measurement_matrix(mean, corner_points, corner_points_3d)\n            min_x, min_y = np.amin(corner_points, axis = 0)[:2]\n            max_x, max_y = np.amax(corner_points, axis = 0)[:2]\n            cov = self.project_cov_2d(mean, covariance, H_2d)\n            mean = np.array([min_x, min_y, max_x - min_x, max_y - min_y])\n        else:\n            mean, cov = mean[:7], self.project_cov(mean, covariance)\n        if only_position:\n            if use_3d:\n                mean, cov = mean[[0, 2]], np.reshape(cov[[0, 0, 2, 2], [0, 2, 0, 2]], (2,2))\n                measurements = measurements[:, [0, 2]]\n            else:\n                mean, cov = mean[:2], cov[:2, :2]\n                measurements = measurements[:, :2]\n        self.LIMIT=0.3\n        if np.amax(cov) > self.LIMIT:\n            cov_2 = cov * self.LIMIT / np.amax(cov)\n        return EKF.squared_mahalanobis_distance(mean, cov2, measurements)\n\n    def project_cov(self, mean, covariance):\n        # Returns S the innovation covariance (projected covariance)\n\n        measurement_noise = self.get_3d_measurement_noise(mean)\n        innovation_cov = (np.linalg.multi_dot((self._observation_mat, covariance,\n                                          self._observation_mat.T))\n                     + measurement_noise)\n        return innovation_cov\n\n    def project_cov_2d(self, mean, covariance, H_2d):\n        # Returns S the innovation covariance (projected covariance)\n\n        measurement_noise = self.get_2d_measurement_noise(mean)\n        innovation_cov = (np.linalg.multi_dot((H_2d, covariance,\n                                          H_2d.T))\n                     + measurement_noise)\n        return innovation_cov\n    # @profile\n    def update(self, mean, covariance, measurement_2d, measurement_3d = None, marginalization=None, JPDA=False):\n        \"\"\"Run Kalman filter correction step.\n\n        Parameters\n        ----------\n        mean : ndarray\n            The predicted state's mean vector (9 dimensional).\n        covariance : ndarray\n            The state's covariance matrix (9x9 dimensional).\n        measurement_2d : ndarray\n            The 4 dimensional measurement vector (x, y, w, h), where (x, y)\n            is the center position, a the aspect ratio, and h the height of the\n            bounding box.\n        measurement_3d : ndarray\n            The 7 dimensional measurement vector (x, y, z, l, h, w, theta), where (x, y, z)\n            is the center bottom of the box, l, q, h are the dimensions of the bounding box\n            theta is the orientation angle w.r.t. the positive x axis.\n        Returns\n        -------\n        (ndarray, ndarray)\n            Returns the measurement-corrected state distribution.\n\n        \"\"\"\n\n        if np.any(np.isnan(mean)):\n            return mean, covariance\n        out_cov = deepcopy(covariance)\n        H_3d = self._observation_mat\n        do_3d = True\n        covariance_3d = None\n        post_3d_mean = mean\n        if measurement_3d is None:\n            do_3d = False\n        else:\n            for meas in measurement_3d:\n                if meas is None:\n                    do_3d = False\n                    break\n        if do_3d:\n            S_matrix = self.project_cov(mean, out_cov)\n            try:\n                chol_factor, lower = scipy.linalg.cho_factor(\n                    S_matrix, lower=True, check_finite=False)\n                kalman_gain = scipy.linalg.cho_solve(\n                    (chol_factor, lower), np.dot(out_cov, H_3d.T).T,\n                    check_finite=False).T\n            except:\n                # in case cholesky factorization fails, revert to standard solver\n                kalman_gain = np.linalg.multi_dot((out_cov, H_3d.T, np.linalg.inv(S_matrix)))\n            out_cov -= np.linalg.multi_dot((kalman_gain, S_matrix, kalman_gain.T))\n            if JPDA:\n                innovation_3d = 0\n                cov_uncertainty_3d = 0\n                for i, detection_3d in enumerate(measurement_3d):\n                    innovation_partial = detection_3d - mean[:7]\n                    innovation_3d += innovation_partial * marginalization[i+1]\n                    cov_uncertainty_3d += marginalization[i+1] * np.outer(innovation_partial, innovation_partial)\n                partial_cov = cov_uncertainty_3d-np.outer(innovation_3d, innovation_3d)\n                out_cov *= 1 - marginalization[0]\n                out_cov += np.linalg.multi_dot((kalman_gain, partial_cov, kalman_gain.T))\n                out_cov += marginalization[0]*covariance\n            else:\n                out_cov = out_cov - np.linalg.multi_dot((kalman_gain, H_3d, out_cov))\n                innovation_3d = measurement_3d - mean[:7]\n            mean = mean + np.dot(kalman_gain, innovation_3d)\n            post_3d_mean = mean\n            covariance_3d = deepcopy(out_cov)\n\n        if measurement_2d is not None:\n            corner_points, corner_points_3d = self.calculate_corners(mean)\n            H_2d = self.get_2d_measurement_matrix(mean, corner_points, corner_points_3d)\n            #update based on 2D\n            min_x, min_y = np.amin(corner_points, axis = 0)[:2]\n            max_x, max_y = np.amax(corner_points, axis = 0)[:2]\n            if min_y < 0:\n                min_y = 0\n            if max_y >= self.calib.img_shape[1]:\n                max_y = self.calib.img_shape[1] - 1\n            S_matrix = self.project_cov_2d(np.array([min_x, min_y, max_x - min_x, max_y - min_y]), out_cov, H_2d)\n            try:\n                chol_factor, lower = scipy.linalg.cho_factor(\n                    S_matrix, lower=True, check_finite=False)\n                kalman_gain = scipy.linalg.cho_solve(\n                    (chol_factor, lower), np.dot(out_cov, H_2d.T).T,\n                    check_finite=False).T\n            except:\n                # in case cholesky factorization fails, revert to standard solver\n                kalman_gain = np.linalg.multi_dot((out_cov, H_2d.T, np.linalg.inv(S_matrix)))\n            out_cov = np.dot(np.eye(*out_cov.shape)-np.dot(kalman_gain, H_2d), out_cov)\n            if JPDA:\n                innovation_2d = 0\n                cov_uncertainty_2d = 0\n                for i, detection_2d in enumerate(measurement_2d):\n                    innovation_partial = detection_2d[:4] - np.array([min_x, min_y, max_x - min_x, max_y - min_y])\n                    innovation_2d += innovation_partial * marginalization[i+1] # +1 to account for dummy node\n                    cov_uncertainty_2d += marginalization[i+1] * np.outer(innovation_partial, innovation_partial)\n                partial_cov = cov_uncertainty_2d-np.outer(innovation_2d, innovation_2d)\n                out_cov *= 1 - marginalization[0]\n                out_cov += np.linalg.multi_dot((kalman_gain, partial_cov, kalman_gain.T))\n                if covariance_3d is None:\n                    out_cov += marginalization[0]*covariance\n                else:\n                    out_cov += marginalization[0]*covariance_3d\n            else:\n                innovation_2d = measurement_2d[:4] - np.array([min_x, min_y, max_x - min_x, max_y - min_y])\n            mean = mean + np.dot(kalman_gain, innovation_2d)\n\n        if self.debug:\n            return mean, out_cov, post_3d_mean\n        return mean, out_cov\n\n    # @profile\n    def get_2d_measurement_matrix(self, mean, corner_points, corner_points_3d):\n\n        min_x = np.inf\n        min_x_idx = None\n        max_x = -np.inf\n        max_x_idx = None\n        min_y = np.inf\n        min_y_idx = None\n        max_y = -np.inf\n        max_y_idx = None\n        for idx, pt in enumerate(corner_points):\n            if pt[0] < min_x:\n                min_x_idx = idx\n                min_x = pt[0]\n            if pt[0] > max_x:\n                max_x_idx = idx\n                max_x = pt[0]\n            if pt[1] < min_y:\n                min_y_idx = idx\n                min_y = pt[1]\n            if pt[1] > max_y:\n                max_y_idx = idx\n                max_y = pt[1]\n        if self.omni:\n            jac_x = np.dot(self.jacobian_omni(corner_points_3d[min_x_idx])[0], self.corner_jacobian(mean, min_x_idx))\n            jac_y = np.dot(self.jacobian_omni(corner_points_3d[min_y_idx])[1], self.corner_jacobian(mean, min_y_idx))\n            jac_w = np.dot(self.jacobian_omni(corner_points_3d[max_x_idx])[0], self.corner_jacobian(mean, max_x_idx)) - jac_x\n            jac_h = np.dot(self.jacobian_omni(corner_points_3d[max_y_idx])[1], self.corner_jacobian(mean, max_y_idx)) - jac_y\n        else:\n            jac_x = np.dot(self.jacobian(corner_points_3d[min_x_idx])[0], self.corner_jacobian(mean, min_x_idx))\n            jac_y = np.dot(self.jacobian(corner_points_3d[min_y_idx])[1], self.corner_jacobian(mean, min_y_idx))\n            jac_w = np.dot(self.jacobian(corner_points_3d[max_x_idx])[0], self.corner_jacobian(mean, max_x_idx)) - jac_x\n            jac_h = np.dot(self.jacobian(corner_points_3d[max_y_idx])[1], self.corner_jacobian(mean, max_y_idx)) - jac_y\n        jac = np.vstack([jac_x, jac_y, jac_w, jac_h])\n        jac = np.hstack([jac, np.zeros((jac.shape[0], 2))])\n        return jac\n    # Jacobian for projective transformation\n    def jacobian(self, pt_3d):\n        pt_2d = self.project_2d(pt_3d[None, :])\n        den = np.sum(self.projection_matrix[2] * pt_3d)\n        dxy = (self.projection_matrix[0:2] - self.projection_matrix[2:3] * pt_2d.T)/den\n\n        return dxy[:, :3]\n\n    def jacobian_omni(self, pt_3d):\n        jac = np.zeros((2, 3))\n        x, y, z = pt_3d[0], pt_3d[1], pt_3d[2]\n        denominator = (x**2 + z**2)\n        jac[0, 0] = self.x_constant*(z/denominator)\n        jac[0, 2] = -self.x_constant*(x/denominator)\n\n        jac[1, 0] = self.y_constant*x*y/denominator\n        jac[1, 1] = -self.y_constant\n        jac[1,2] = self.y_constant*z*y/denominator\n        jac[1, :] /= np.sqrt(denominator)\n\n        return jac\n\n    def calculate_corners(self, box):\n        x,y,z,l,h,w,theta = box[:7]\n        pt_3d = []\n        x_delta_1 = np.cos(theta)*l/2+np.sin(theta)*w/2\n        x_delta_2 = np.cos(theta)*l/2 - np.sin(theta)*w/2\n        z_delta_1 = np.sin(theta)*l/2-np.cos(theta)*w/2\n        z_delta_2 = np.sin(theta)*l/2+np.cos(theta)*w/2\n        pt_3d.append((x+x_delta_1, y + h/2, z+z_delta_1, 1))\n        pt_3d.append((x+x_delta_2, y + h/2, z+z_delta_2, 1))\n        pt_3d.append((x-x_delta_2, y + h/2, z-z_delta_2, 1))\n        pt_3d.append((x-x_delta_1, y + h/2, z-z_delta_1, 1))\n        pt_3d.append((x+x_delta_1, y - h/2, z+z_delta_1, 1))\n        pt_3d.append((x+x_delta_2, y - h/2, z+z_delta_2, 1))\n        pt_3d.append((x-x_delta_2, y - h/2, z-z_delta_2, 1))\n        pt_3d.append((x-x_delta_1, y - h/2, z-z_delta_1, 1))\n        pts_3d = np.vstack(pt_3d)\n        pts_2d = self.project_2d(pts_3d)\n        return pts_2d, pts_3d\n\n    def corner_jacobian(self, pt_3d, corner_idx):\n        _, _, _, l, _, w, theta = pt_3d[:7]\n        jac = np.eye(3,7)\n\n        jac[1, 4] = 0.5 if corner_idx < 4 else -0.5\n\n        jac[0, 3] = 0.5*np.sin(theta) if corner_idx % 4 < 2 else -0.5*np.sin(theta)\n        jac[0, 5] = 0.5*np.cos(theta) if corner_idx % 2 == 0 else -0.5*np.cos(theta)\n\n        jac[2, 3] = 0.5*np.cos(theta) if corner_idx%4 < 2 else -0.5*np.cos(theta)\n        jac[2, 5] = 0.5*np.sin(theta) if corner_idx%2 == 0 else -0.5*np.sin(theta)\n\n        if corner_idx%4 == 0:\n            jac[0, 6] = -np.sin(theta)*l/2 + np.cos(theta)*w/2\n            jac[2, 6] = np.cos(theta)*l/2 + np.sin(theta)*w/2\n        elif corner_idx%4==1:\n            jac[0, 6] = -np.sin(theta)*l/2 - np.cos(theta)*w/2\n            jac[2, 6] = np.cos(theta)*l/2 - np.sin(theta)*w/2\n        elif corner_idx%4==2:\n            jac[0, 6] = +np.sin(theta)*l/2 + np.cos(theta)*w/2\n            jac[2, 6] = -np.cos(theta)*l/2 + np.sin(theta)*w/2\n        else:\n            jac[0, 6] = +np.sin(theta)*l/2 - np.cos(theta)*w/2\n            jac[2, 6] = -np.cos(theta)*l/2 - np.sin(theta)*w/2\n\n        return jac\n\n    def project_2d(self, pts_3d):\n        if self.omni:\n            pts_2d = np.array(self.calib.project_ref_to_image_torch(torch.from_numpy(pts_3d)))\n        else:\n            pts_2d = np.dot(pts_3d, self.projection_matrix.T)\n            pts_2d /= np.expand_dims(pts_2d[:, 2], 1)\n        for pt in pts_2d:\n            if pt[1] > self.calib.img_shape[1]:\n                pt[1] = self.calib.img_shape[1]\n            elif pt[1] < 0:\n                pt[1] = 0\n        # min_x = np.argmin(pts_2d[:, 0])\n        # max_x = np.argmax(pts_2d[:, 0])\n        # if abs(min_x - max_x) > 1800:\n        #     # wrap around!\n        #     pts_2d[min_x], pts_2d[max_x] = pts_2d[max_x], pts_2d[min_x]\n        #     pts_2d[max_x, 0] += self.calib.img_shape[2]\n        return pts_2d[:, :2]\n\n\ndef swap(detections_3d, iou, idx, swap_prob = 0):\n    if random.random() > swap_prob:\n        return detections_3d[idx]\n    else:\n        iou_row = iou[idx]\n        iou_row[idx] = -1\n        max_idx = np.argmax(iou_row)\n        if iou_row[max_idx] > 0.4:\n            # print(\"SWAP\")\n            return detections_3d[max_idx]\n        else:\n            return detections_3d[idx]\n"
  },
  {
    "path": "src/evaluation/__init__.py",
    "content": ""
  },
  {
    "path": "src/evaluation/distances 2.py",
    "content": "\"\"\"py-motmetrics - metrics for multiple object tracker (MOT) benchmarking.\n\nChristoph Heindl, 2017\nhttps://github.com/cheind/py-motmetrics\n\"\"\"\n\nimport numpy as np\nimport pdb\n\ndef norm2squared_matrix(objs, hyps, max_d2=float('inf')):\n    \"\"\"Computes the squared Euclidean distance matrix between object and hypothesis points.\n\n    Params\n    ------\n    objs : NxM array\n        Object points of dim M in rows\n    hyps : KxM array\n        Hypothesis points of dim M in rows\n\n    Kwargs\n    ------\n    max_d2 : float\n        Maximum tolerable squared Euclidean distance. Object / hypothesis points\n        with larger distance are set to np.nan signalling do-not-pair. Defaults\n        to +inf\n\n    Returns\n    -------\n    C : NxK array\n        Distance matrix containing pairwise distances or np.nan.\n    \"\"\"\n\n    objs = np.atleast_2d(objs).astype(float)\n    hyps = np.atleast_2d(hyps).astype(float)\n\n    if objs.size == 0 or hyps.size == 0:\n        return np.empty((0,0))\n\n    assert hyps.shape[1] == objs.shape[1], \"Dimension mismatch\"\n\n    C = np.empty((objs.shape[0], hyps.shape[0]))\n\n    for o in range(objs.shape[0]):\n        for h in range(hyps.shape[0]):\n            e = objs[o] - hyps[h]\n            C[o, h] = e.dot(e)\n\n    C[C > max_d2] = np.nan\n    return C\n\n\ndef iou_matrix(objs, hyps, max_iou=1.):\n    \"\"\"Computes 'intersection over union (IoU)' distance matrix between object and hypothesis rectangles.\n\n    The IoU is computed as\n\n        IoU(a,b) = 1. - isect(a, b) / union(a, b)\n\n    where isect(a,b) is the area of intersection of two rectangles and union(a, b) the area of union. The\n    IoU is bounded between zero and one. 0 when the rectangles overlap perfectly and 1 when the overlap is\n    zero.\n\n    Params\n    ------\n    objs : Nx4 array\n        Object rectangles (x,y,w,h) in rows\n    hyps : Kx4 array\n        Hypothesis rectangles (x,y,w,h) in rows\n\n    Kwargs\n    ------\n    max_iou : float\n        Maximum tolerable overlap distance. Object / hypothesis points\n        with larger distance are set to np.nan signalling do-not-pair. Defaults\n        to 0.5\n\n    Returns\n    -------\n    C : NxK array\n        Distance matrix containing pairwise distances or np.nan.\n    \"\"\"\n\n    objs = np.atleast_2d(objs).astype(float)\n    hyps = np.atleast_2d(hyps).astype(float)\n\n    if objs.size == 0 or hyps.size == 0:\n        return np.empty((0,0))\n\n    assert objs.shape[1] == 4\n    assert hyps.shape[1] == 4\n\n    br_objs = objs[:, :2] + objs[:, 2:]\n    br_hyps = hyps[:, :2] + hyps[:, 2:]\n\n    C = np.empty((objs.shape[0], hyps.shape[0]))\n\n    for o in range(objs.shape[0]):\n        for h in range(hyps.shape[0]):\n            isect_xy = np.maximum(objs[o, :2], hyps[h, :2])\n            isect_wh = np.maximum(np.minimum(br_objs[o], br_hyps[h]) - isect_xy, 0)\n            isect_a = isect_wh[0]*isect_wh[1]\n            union_a = objs[o, 2]*objs[o, 3] + hyps[h, 2]*hyps[h, 3] - isect_a\n            if union_a != 0:\n                C[o, h] = 1. - isect_a / union_a\n            else:\n                C[o, h] = np.nan\n\n    C[C > max_iou] = np.nan\n    return C\n\n\ndef find_area(vertices):\n    area = 0\n    for i in range(len(vertices)):\n        area += vertices[i][0]*(vertices[(i+1)%len(vertices)][1] - vertices[i-1][1])\n    return 0.5*abs(area)\n\ndef get_angle(p):\n    x, y = p\n    angle = np.arctan2(y,x)\n    if angle < 0:\n        angle += np.pi*2\n    return angle\n\ndef clip_polygon(box1, box2):\n    #clips box 1 by the edges in box2\n    x,y,z,l,h,w,theta = box2\n    theta = -theta\n\n    box2_edges = np.asarray([(-np.cos(theta), -np.sin(theta), l/2-x*np.cos(theta)-z*np.sin(theta)),\n                    (-np.sin(theta), np.cos(theta), w/2-x*np.sin(theta)+z*np.cos(theta)),\n                    (np.cos(theta), np.sin(theta), l/2+x*np.cos(theta)+z*np.sin(theta)),\n                    (np.sin(theta), -np.cos(theta), w/2+x*np.sin(theta)-z*np.cos(theta))])\n    x,y,z,l,h,w,theta = box1\n    theta = -theta\n\n    box1_vertices = [(x+l/2*np.cos(theta)-w/2*np.sin(theta), z+l/2*np.sin(theta)+w/2*np.cos(theta)),\n                        (x+l/2*np.cos(theta)+w/2*np.sin(theta), z+l/2*np.sin(theta)-w/2*np.cos(theta)),\n                        (x-l/2*np.cos(theta)-w/2*np.sin(theta), z-l/2*np.sin(theta)+w/2*np.cos(theta)),\n                        (x-l/2*np.cos(theta)+w/2*np.sin(theta), z-l/2*np.sin(theta)-w/2*np.cos(theta))]\n    out_vertices = sort_points(box1_vertices, (x, z))\n    for edge in box2_edges:\n        vertex_list = out_vertices.copy()\n        out_vertices = []\n        for idx, current_vertex in enumerate(vertex_list):\n            previous_vertex = vertex_list[idx-1]\n            if point_inside_edge(current_vertex, edge):\n                if not point_inside_edge(previous_vertex, edge):\n                    out_vertices.append(compute_intersection_point(previous_vertex, current_vertex, edge))\n                out_vertices.append(current_vertex)\n            elif point_inside_edge(previous_vertex, edge):\n                out_vertices.append(compute_intersection_point(previous_vertex, current_vertex, edge))\n    to_remove = []\n    for i in range(len(out_vertices)):\n        if i in to_remove:\n            continue\n        for j in range(i+1, len(out_vertices)):\n            if abs(out_vertices[i][0] - out_vertices[j][0]) < 1e-6 and abs(out_vertices[i][1] - out_vertices[j][1]) < 1e-6:\n                to_remove.append(j)\n    out_vertices = sorted([(v[0]-x, v[1]-z) for i,v in enumerate(out_vertices) if i not in to_remove], key = lambda p: get_angle((p[0],p[1])))\n    return out_vertices\n\ndef sort_points(pts, center):\n    x, z = center\n    sorted_pts = sorted([(i, (v[0]-x, v[1]-z)) for i,v in enumerate(pts)], key = lambda p: get_angle((p[1][0],p[1][1])))\n    idx, _ = zip(*sorted_pts)\n    return [pts[i] for i in idx]\n\ndef compute_intersection_point(pt1, pt2, line1):\n    if pt1[0] == pt2[0]:\n        slope = np.inf\n    else:\n        slope = (pt1[1]-pt2[1])/(pt1[0] - pt2[0])\n    if np.isinf(slope):\n        line2 = (1, 0, pt1[0])\n    else:\n        line2 = (slope, -1, pt1[0]*slope-pt1[1])\n    # print(\"Line1:\", line1)\n    # print(\"Line2:\", line2)\n    if line1[1] == 0:\n        x = line1[2]/line1[0]\n        y = (line2[2] - line2[0]*x)/line2[1]\n    elif line1[0] == 0:\n        y = line1[2]/line1[1]\n        x = (line2[2] - line2[1]*y)/line2[0]\n    elif line2[1] == 0:\n        x = pt1[0]\n        y = (line1[2]-x*line1[0])/line1[1]\n    else:\n        tmp_line = (line2 - line1*(line2[1]/line1[1]))\n        x = tmp_line[2]/tmp_line[0]\n        y = (line2[2] - line2[0]*x)/line2[1]\n    return (x,y)\n\ndef point_inside_edge(pt, edge):\n    lhs = pt[0]*edge[0] + pt[1]*edge[1]\n    if lhs < edge[2] - 1e-6:\n        return True\n    else:\n        return False\n\n\ndef iou_matrix_3d(objs, hyps, max_iou=1.):\n    \"\"\"Computes 'intersection over union (IoU)' distance matrix between object and hypothesis rectangles.\n\n    The IoU is computed as\n\n        IoU(a,b) = 1. - isect(a, b) / union(a, b)\n\n    where isect(a,b) is the area of intersection of two rectangles and union(a, b) the area of union. The\n    IoU is bounded between zero and one. 0 when the rectangles overlap perfectly and 1 when the overlap is\n    zero.\n\n    Params\n    ------\n    objs : Nx4 array\n        Object rectangles (x,y,w,h) in rows\n    hyps : Kx4 array\n        Hypothesis rectangles (x,y,w,h) in rows\n\n    Kwargs\n    ------\n    max_iou : float\n        Maximum tolerable overlap distance. Object / hypothesis points\n        with larger distance are set to np.nan signalling do-not-pair. Defaults\n        to 0.5\n\n    Returns\n    -------\n    C : NxK array\n        Distance matrix containing pairwise distances or np.nan.\n    \"\"\"\n\n    objs = np.atleast_2d(objs).astype(float)\n    hyps = np.atleast_2d(hyps).astype(float)\n\n    if objs.size == 0 or hyps.size == 0:\n        return np.empty((0,0))\n    assert objs.shape[1] == 7\n    assert hyps.shape[1] == 7\n\n    C = np.empty((objs.shape[0], hyps.shape[0]))\n    for o in range(objs.shape[0]):\n        for h in range(hyps.shape[0]):\n            base_area = find_area(clip_polygon(objs[o], hyps[h]))\n            height = max(objs[o][1], hyps[h][1]) - min(objs[o][1] - objs[o][4], hyps[h][1]-hyps[h][4])\n            intersect = base_area*height\n            union = objs[o][3]*objs[o][4]*objs[o][5] + hyps[h][3]*hyps[h][4]*hyps[h][5] - intersect\n            if union != 0:\n                C[o, h] = 1. - intersect / union\n            else:\n                C[o, h] = np.nan\n    C[C > max_iou] = np.nan\n    return C\n"
  },
  {
    "path": "src/evaluation/distances.py",
    "content": "\"\"\"py-motmetrics - metrics for multiple object tracker (MOT) benchmarking.\n\nChristoph Heindl, 2017\nhttps://github.com/cheind/py-motmetrics\n\"\"\"\n\nimport numpy as np\nimport pdb\n\ndef norm2squared_matrix(objs, hyps, max_d2=float('inf')):\n    \"\"\"Computes the squared Euclidean distance matrix between object and hypothesis points.\n\n    Params\n    ------\n    objs : NxM array\n        Object points of dim M in rows\n    hyps : KxM array\n        Hypothesis points of dim M in rows\n\n    Kwargs\n    ------\n    max_d2 : float\n        Maximum tolerable squared Euclidean distance. Object / hypothesis points\n        with larger distance are set to np.nan signalling do-not-pair. Defaults\n        to +inf\n\n    Returns\n    -------\n    C : NxK array\n        Distance matrix containing pairwise distances or np.nan.\n    \"\"\"\n\n    objs = np.atleast_2d(objs).astype(float)\n    hyps = np.atleast_2d(hyps).astype(float)\n\n    if objs.size == 0 or hyps.size == 0:\n        return np.empty((0,0))\n\n    assert hyps.shape[1] == objs.shape[1], \"Dimension mismatch\"\n\n    C = np.empty((objs.shape[0], hyps.shape[0]))\n\n    for o in range(objs.shape[0]):\n        for h in range(hyps.shape[0]):\n            e = objs[o] - hyps[h]\n            C[o, h] = e.dot(e)\n\n    C[C > max_d2] = np.nan\n    return C\n\n\ndef iou_matrix(objs, hyps, max_iou=1.):\n    \"\"\"Computes 'intersection over union (IoU)' distance matrix between object and hypothesis rectangles.\n\n    The IoU is computed as\n\n        IoU(a,b) = 1. - isect(a, b) / union(a, b)\n\n    where isect(a,b) is the area of intersection of two rectangles and union(a, b) the area of union. The\n    IoU is bounded between zero and one. 0 when the rectangles overlap perfectly and 1 when the overlap is\n    zero.\n\n    Params\n    ------\n    objs : Nx4 array\n        Object rectangles (x,y,w,h) in rows\n    hyps : Kx4 array\n        Hypothesis rectangles (x,y,w,h) in rows\n\n    Kwargs\n    ------\n    max_iou : float\n        Maximum tolerable overlap distance. Object / hypothesis points\n        with larger distance are set to np.nan signalling do-not-pair. Defaults\n        to 0.5\n\n    Returns\n    -------\n    C : NxK array\n        Distance matrix containing pairwise distances or np.nan.\n    \"\"\"\n\n    objs = np.atleast_2d(objs).astype(float)\n    hyps = np.atleast_2d(hyps).astype(float)\n\n    if objs.size == 0 or hyps.size == 0:\n        return np.empty((0,0))\n\n    assert objs.shape[1] == 4\n    assert hyps.shape[1] == 4\n\n    br_objs = objs[:, :2] + objs[:, 2:]\n    br_hyps = hyps[:, :2] + hyps[:, 2:]\n\n    C = np.empty((objs.shape[0], hyps.shape[0]))\n\n    for o in range(objs.shape[0]):\n        for h in range(hyps.shape[0]):\n            isect_xy = np.maximum(objs[o, :2], hyps[h, :2])\n            isect_wh = np.maximum(np.minimum(br_objs[o], br_hyps[h]) - isect_xy, 0)\n            isect_a = isect_wh[0]*isect_wh[1]\n            union_a = objs[o, 2]*objs[o, 3] + hyps[h, 2]*hyps[h, 3] - isect_a\n            if union_a != 0:\n                C[o, h] = 1. - isect_a / union_a\n            else:\n                C[o, h] = np.nan\n\n    C[C > max_iou] = np.nan\n    return C\n\n\ndef find_area(vertices):\n    area = 0\n    for i in range(len(vertices)):\n        area += vertices[i][0]*(vertices[(i+1)%len(vertices)][1] - vertices[i-1][1])\n    return 0.5*abs(area)\n\ndef get_angle(p):\n    x, y = p\n    angle = np.arctan2(y,x)\n    if angle < 0:\n        angle += np.pi*2\n    return angle\n\ndef clip_polygon(box1, box2):\n    #clips box 1 by the edges in box2\n    x,y,z,l,h,w,theta = box2\n    theta = -theta\n\n    box2_edges = np.asarray([(-np.cos(theta), -np.sin(theta), l/2-x*np.cos(theta)-z*np.sin(theta)),\n                    (-np.sin(theta), np.cos(theta), w/2-x*np.sin(theta)+z*np.cos(theta)),\n                    (np.cos(theta), np.sin(theta), l/2+x*np.cos(theta)+z*np.sin(theta)),\n                    (np.sin(theta), -np.cos(theta), w/2+x*np.sin(theta)-z*np.cos(theta))])\n    x,y,z,l,h,w,theta = box1\n    theta = -theta\n\n    box1_vertices = [(x+l/2*np.cos(theta)-w/2*np.sin(theta), z+l/2*np.sin(theta)+w/2*np.cos(theta)),\n                        (x+l/2*np.cos(theta)+w/2*np.sin(theta), z+l/2*np.sin(theta)-w/2*np.cos(theta)),\n                        (x-l/2*np.cos(theta)-w/2*np.sin(theta), z-l/2*np.sin(theta)+w/2*np.cos(theta)),\n                        (x-l/2*np.cos(theta)+w/2*np.sin(theta), z-l/2*np.sin(theta)-w/2*np.cos(theta))]\n    out_vertices = sort_points(box1_vertices, (x, z))\n    for edge in box2_edges:\n        vertex_list = out_vertices.copy()\n        out_vertices = []\n        for idx, current_vertex in enumerate(vertex_list):\n            previous_vertex = vertex_list[idx-1]\n            if point_inside_edge(current_vertex, edge):\n                if not point_inside_edge(previous_vertex, edge):\n                    out_vertices.append(compute_intersection_point(previous_vertex, current_vertex, edge))\n                out_vertices.append(current_vertex)\n            elif point_inside_edge(previous_vertex, edge):\n                out_vertices.append(compute_intersection_point(previous_vertex, current_vertex, edge))\n    to_remove = []\n    for i in range(len(out_vertices)):\n        if i in to_remove:\n            continue\n        for j in range(i+1, len(out_vertices)):\n            if abs(out_vertices[i][0] - out_vertices[j][0]) < 1e-6 and abs(out_vertices[i][1] - out_vertices[j][1]) < 1e-6:\n                to_remove.append(j)\n    out_vertices = sorted([(v[0]-x, v[1]-z) for i,v in enumerate(out_vertices) if i not in to_remove], key = lambda p: get_angle((p[0],p[1])))\n    return out_vertices\n\ndef sort_points(pts, center):\n    x, z = center\n    sorted_pts = sorted([(i, (v[0]-x, v[1]-z)) for i,v in enumerate(pts)], key = lambda p: get_angle((p[1][0],p[1][1])))\n    idx, _ = zip(*sorted_pts)\n    return [pts[i] for i in idx]\n\ndef compute_intersection_point(pt1, pt2, line1):\n    if pt1[0] == pt2[0]:\n        slope = np.inf\n    else:\n        slope = (pt1[1]-pt2[1])/(pt1[0] - pt2[0])\n    if np.isinf(slope):\n        line2 = (1, 0, pt1[0])\n    else:\n        line2 = (slope, -1, pt1[0]*slope-pt1[1])\n    # print(\"Line1:\", line1)\n    # print(\"Line2:\", line2)\n    if line1[1] == 0:\n        x = line1[2]/line1[0]\n        y = (line2[2] - line2[0]*x)/line2[1]\n    elif line1[0] == 0:\n        y = line1[2]/line1[1]\n        x = (line2[2] - line2[1]*y)/line2[0]\n    elif line2[1] == 0:\n        x = pt1[0]\n        y = (line1[2]-x*line1[0])/line1[1]\n    else:\n        tmp_line = (line2 - line1*(line2[1]/line1[1]))\n        x = tmp_line[2]/tmp_line[0]\n        y = (line2[2] - line2[0]*x)/line2[1]\n    return (x,y)\n\ndef point_inside_edge(pt, edge):\n    lhs = pt[0]*edge[0] + pt[1]*edge[1]\n    if lhs < edge[2] - 1e-6:\n        return True\n    else:\n        return False\n\n\ndef iou_matrix_3d(objs, hyps, max_iou=1.):\n    \"\"\"Computes 'intersection over union (IoU)' distance matrix between object and hypothesis rectangles.\n\n    The IoU is computed as\n\n        IoU(a,b) = 1. - isect(a, b) / union(a, b)\n\n    where isect(a,b) is the area of intersection of two rectangles and union(a, b) the area of union. The\n    IoU is bounded between zero and one. 0 when the rectangles overlap perfectly and 1 when the overlap is\n    zero.\n\n    Params\n    ------\n    objs : Nx4 array\n        Object rectangles (x,y,w,h) in rows\n    hyps : Kx4 array\n        Hypothesis rectangles (x,y,w,h) in rows\n\n    Kwargs\n    ------\n    max_iou : float\n        Maximum tolerable overlap distance. Object / hypothesis points\n        with larger distance are set to np.nan signalling do-not-pair. Defaults\n        to 0.5\n\n    Returns\n    -------\n    C : NxK array\n        Distance matrix containing pairwise distances or np.nan.\n    \"\"\"\n\n    objs = np.atleast_2d(objs).astype(float)\n    hyps = np.atleast_2d(hyps).astype(float)\n\n    if objs.size == 0 or hyps.size == 0:\n        return np.empty((0,0))\n    assert objs.shape[1] == 7\n    assert hyps.shape[1] == 7\n\n    C = np.empty((objs.shape[0], hyps.shape[0]))\n    for o in range(objs.shape[0]):\n        for h in range(hyps.shape[0]):\n            base_area = find_area(clip_polygon(objs[o], hyps[h]))\n            height = max(objs[o][1], hyps[h][1]) - min(objs[o][1] - objs[o][4], hyps[h][1]-hyps[h][4])\n            intersect = base_area*height\n            union = objs[o][3]*objs[o][4]*objs[o][5] + hyps[h][3]*hyps[h][4]*hyps[h][5] - intersect\n            if union != 0:\n                C[o, h] = 1. - intersect / union\n            else:\n                C[o, h] = np.nan\n    C[C > max_iou] = np.nan\n    return C\n"
  },
  {
    "path": "src/featurepointnet_model.py",
    "content": "import os, pdb\nimport numpy as np\nimport tensorflow as tf\ntf.logging.set_verbosity(tf.logging.ERROR)\nimport configparser\n\nimport featurepointnet_tf_util as tf_util\nimport featurepointnet_model_util as model_util\nfrom calibration import Calibration, OmniCalibration\n\nbatch_size = 6 #TODO: Update if needed?\n\nclass FPointNet():\n    def __init__(self, config_path):\n        parser = configparser.SafeConfigParser()\n        parser.read(config_path)\n        self.num_point = parser.getint('general', 'num_point')\n        self.model_path = parser.get('general', 'model_path')\n\n        with tf.device('/gpu:'+str('0')):\n            pointclouds_pl, one_hot_vec_pl, labels_pl, centers_pl, \\\n            heading_class_label_pl, heading_residual_label_pl, \\\n            size_class_label_pl, size_residual_label_pl = model_util.placeholder_inputs(batch_size, self.num_point)\n            is_training_pl = tf.placeholder(tf.bool, shape=())\n            end_points, depth_feature = self.get_model(pointclouds_pl, one_hot_vec_pl, is_training_pl)\n            self.object_pointcloud = tf.placeholder(tf.float32, shape=(None, None, 3))\n            #depth_feature = self.get_depth_feature_op(is_training_pl)\n            loss = model_util.get_loss(labels_pl, centers_pl, heading_class_label_pl, heading_residual_label_pl, size_class_label_pl, size_residual_label_pl, end_points)\n            self.saver = tf.train.Saver()\n\n        # Create a session\n        config = tf.ConfigProto()\n        config.gpu_options.allow_growth = True\n        config.allow_soft_placement = True\n        self.sess = tf.Session(config=config)\n\n        #Initialize variables\n        self.sess.run(tf.global_variables_initializer())\n        # Restore variables from disk.\n        self.saver.restore(self.sess, self.model_path)\n        self.ops = {'pointclouds_pl': pointclouds_pl,\n               'one_hot_vec_pl': one_hot_vec_pl,\n               'labels_pl': labels_pl,\n               'centers_pl': centers_pl,\n               'heading_class_label_pl': heading_class_label_pl,\n               'heading_residual_label_pl': heading_residual_label_pl,\n               'size_class_label_pl': size_class_label_pl,\n               'size_residual_label_pl': size_residual_label_pl,\n               'is_training_pl': is_training_pl,\n               'logits': end_points['mask_logits'],\n               'center': end_points['center'],\n               'end_points': end_points,\n               'depth_feature':depth_feature,\n               'loss': loss}\n\n    # @profile\n    def __call__(self, input_point_cloud, rot_angle, peds=False):\n        '''\n        one_hot_vec = np.zeros((batch_size, 3))\n        feed_dict = {self.pointclouds_pl: input_point_cloud,\n                     self.one_hot_vec_pl: one_hot_vec,\n                     self.is_training_pl: False}\n        features = self.sess.run(self.feature,feed_dict=feed_dict)\n        return features '''\n\n        ''' Run inference for frustum pointnets in batch mode '''\n        \n        one_hot_vec = np.zeros((batch_size,3))\n        if peds:\n            one_hot_vec[:, 1] = 1\n        num_batches = input_point_cloud.shape[0]//batch_size + 1\n        num_inputs = input_point_cloud.shape[0]\n        if input_point_cloud.shape[0]%batch_size !=0:\n            input_point_cloud = np.vstack([input_point_cloud, np.zeros((batch_size - input_point_cloud.shape[0]%batch_size, self.num_point, 4))])\n        else:\n            num_batches -= 1\n        logits = np.zeros((input_point_cloud.shape[0], input_point_cloud.shape[1], 2))\n        centers = np.zeros((input_point_cloud.shape[0], 3))\n        heading_logits = np.zeros((input_point_cloud.shape[0], model_util.NUM_HEADING_BIN))\n        heading_residuals = np.zeros((input_point_cloud.shape[0], model_util.NUM_HEADING_BIN))\n        size_logits = np.zeros((input_point_cloud.shape[0], model_util.NUM_SIZE_CLUSTER))\n        size_residuals = np.zeros((input_point_cloud.shape[0], model_util.NUM_SIZE_CLUSTER, 3))\n        scores = np.zeros((input_point_cloud.shape[0],)) # 3D box score \n        features = np.zeros((input_point_cloud.shape[0], 512))\n        \n        for i in range(num_batches):    \n            ep = self.ops['end_points'] \n            feed_dict = {\\\n                self.ops['pointclouds_pl']: input_point_cloud[i*batch_size: (i+1)*batch_size],\n                self.ops['one_hot_vec_pl']: one_hot_vec,\n                self.ops['is_training_pl']: False}\n\n            batch_centers, \\\n            batch_heading_scores, batch_heading_residuals, \\\n            batch_size_scores, batch_size_residuals, batch_features = \\\n                self.sess.run([self.ops['center'],\n                    ep['heading_scores'], ep['heading_residuals'],\n                    ep['size_scores'], ep['size_residuals'], self.ops['depth_feature']],\n                    feed_dict=feed_dict)\n\n            # logits[i*batch_size: (i+1)*batch_size] = batch_logits\n            centers[i*batch_size: (i+1)*batch_size] = batch_centers\n            heading_logits[i*batch_size: (i+1)*batch_size] = batch_heading_scores\n            heading_residuals[i*batch_size: (i+1)*batch_size] = batch_heading_residuals\n            size_logits[i*batch_size: (i+1)*batch_size] = batch_size_scores\n            size_residuals[i*batch_size: (i+1)*batch_size] = batch_size_residuals\n            features[i*batch_size: (i+1)*batch_size] = batch_features[:,0,:]\n        heading_cls = np.argmax(heading_logits, 1) # B\n        size_cls = np.argmax(size_logits, 1) # B\n        heading_res = np.vstack([heading_residuals[i, heading_cls[i]] for i in range(heading_cls.shape[0])])\n        size_res = np.vstack([size_residuals[i, size_cls[i], :] for i in range(size_cls.shape[0])])\n\n        #TODO: Make this accept batches if wanted\n        boxes = []\n        for i in range(num_inputs):\n            box = np.array(model_util.from_prediction_to_label_format(centers[i], heading_cls[i], heading_res[i], size_cls[i], size_res[i], rot_angle[i]))\n            box[6] = np.squeeze(box[6])\n            swp = box[5]\n            box[5] = box[4]\n            box[4] = swp\n            boxes.append(box)       \n        boxes = np.vstack(boxes)\n        return boxes, scores[:num_inputs], features[:num_inputs]\n\n\n    def get_instance_seg_v1_net(self, point_cloud, one_hot_vec, is_training, bn_decay, end_points):\n        ''' 3D instance segmentation PointNet v1 network.\n        Input:\n            point_cloud: TF tensor in shape (B,N,4)\n                frustum point clouds with XYZ and intensity in point channels\n                XYZs are in frustum coordinate\n            one_hot_vec: TF tensor in shape (B,3)\n                length-3 vectors indicating predicted object type\n            is_training: TF boolean scalar\n            bn_decay: TF float scalar\n            end_points: dict\n        Output:\n            logits: TF tensor in shape (B,N,2), scores for bkg/clutter and object\n            end_points: dict\n        '''\n        num_point = point_cloud.get_shape()[1].value\n\n        net = tf.expand_dims(point_cloud, 2)\n\n        net = tf_util.conv2d(net, 64, [1,1],\n                             padding='VALID', stride=[1,1],\n                             bn=True, is_training=is_training,\n                             scope='conv1', bn_decay=bn_decay)\n        net = tf_util.conv2d(net, 64, [1,1],\n                             padding='VALID', stride=[1,1],\n                             bn=True, is_training=is_training,\n                             scope='conv2', bn_decay=bn_decay)\n        point_feat = tf_util.conv2d(net, 64, [1,1],\n                             padding='VALID', stride=[1,1],\n                             bn=True, is_training=is_training,\n                             scope='conv3', bn_decay=bn_decay)\n        net = tf_util.conv2d(point_feat, 128, [1,1],\n                             padding='VALID', stride=[1,1],\n                             bn=True, is_training=is_training,\n                             scope='conv4', bn_decay=bn_decay)\n        net = tf_util.conv2d(net, 1024, [1,1],\n                             padding='VALID', stride=[1,1],\n                             bn=True, is_training=is_training,\n                             scope='conv5', bn_decay=bn_decay)\n\n        global_feat = tf_util.max_pool2d(net, [num_point,1],\n                                         padding='VALID', scope='maxpool')\n\n        global_feat = tf.concat([global_feat, tf.expand_dims(tf.expand_dims(one_hot_vec, 1), 1)], axis=3)\n        global_feat_expand = tf.tile(global_feat, [1, num_point, 1, 1])\n        concat_feat = tf.concat(axis=3, values=[point_feat, global_feat_expand])\n\n        net = tf_util.conv2d(concat_feat, 512, [1,1],\n                             padding='VALID', stride=[1,1],\n                             bn=True, is_training=is_training,\n                             scope='conv6', bn_decay=bn_decay)\n        net = tf_util.conv2d(net, 256, [1,1],\n                             padding='VALID', stride=[1,1],\n                             bn=True, is_training=is_training,\n                             scope='conv7', bn_decay=bn_decay)\n        net = tf_util.conv2d(net, 128, [1,1],\n                             padding='VALID', stride=[1,1],\n                             bn=True, is_training=is_training,\n                             scope='conv8', bn_decay=bn_decay)\n        net = tf_util.conv2d(net, 128, [1,1],\n                             padding='VALID', stride=[1,1],\n                             bn=True, is_training=is_training,\n                             scope='conv9', bn_decay=bn_decay)\n        net = tf_util.dropout(net, is_training, 'dp1', keep_prob=0.5)\n\n        logits = tf_util.conv2d(net, 2, [1,1],\n                             padding='VALID', stride=[1,1], activation_fn=None,\n                             scope='conv10')\n        logits = tf.squeeze(logits, [2]) # BxNxC\n        return logits, end_points\n     \n    def get_3d_box_estimation_v1_net(self, object_point_cloud, one_hot_vec,is_training, bn_decay, end_points):\n        ''' 3D Box Estimation PointNet v1 network.\n        Input:\n            object_point_cloud: TF tensor in shape (B,M,C)\n                point clouds in object coordinate\n            one_hot_vec: TF tensor in shape (B,3)\n                length-3 vectors indicating predicted object type\n        Output:\n            output: TF tensor in shape (B,3+NUM_HEADING_BIN*2+NUM_SIZE_CLUSTER*4)\n                including box centers, heading bin class scores and residuals,\n                and size cluster scores and residuals\n        ''' \n        num_point = object_point_cloud.get_shape()[1].value\n        net = tf.expand_dims(object_point_cloud, 2)\n        net = tf_util.conv2d(net, 128, [1,1],\n                             padding='VALID', stride=[1,1],\n                             bn=True, is_training=is_training,\n                             scope='conv-reg1', bn_decay=bn_decay)\n        net = tf_util.conv2d(net, 128, [1,1],\n                             padding='VALID', stride=[1,1],\n                             bn=True, is_training=is_training,\n                             scope='conv-reg2', bn_decay=bn_decay)\n        net = tf_util.conv2d(net, 256, [1,1],\n                             padding='VALID', stride=[1,1],\n                             bn=True, is_training=is_training,\n                             scope='conv-reg3', bn_decay=bn_decay)\n        net = tf_util.conv2d(net, 512, [1,1],\n                             padding='VALID', stride=[1,1],\n                             bn=True, is_training=is_training,\n                             scope='conv-reg4', bn_decay=bn_decay)\n\n        features = tf.reduce_max(net, axis = 1)\n\n        net = tf_util.max_pool2d(net, [num_point,1],\n            padding='VALID', scope='maxpool2')\n        net = tf.squeeze(net, axis=[1,2])\n        net = tf.concat([net, one_hot_vec], axis=1)\n        net = tf_util.fully_connected(net, 512, scope='fc1', bn=True,\n            is_training=is_training, bn_decay=bn_decay)\n        net = tf_util.fully_connected(net, 256, scope='fc2', bn=True,\n            is_training=is_training, bn_decay=bn_decay)\n\n        # The first 3 numbers: box center coordinates (cx,cy,cz),\n        # the next NUM_HEADING_BIN*2:  heading bin class scores and bin residuals\n        # next NUM_SIZE_CLUSTER*4: box cluster scores and residuals\n        output = tf_util.fully_connected(net,\n            3+model_util.NUM_HEADING_BIN*2+model_util.NUM_SIZE_CLUSTER*4, activation_fn=None, scope='fc3')\n        return output, end_points, features\n\n    def get_model(self, point_cloud, one_hot_vec, is_training, bn_decay=None):\n        ''' Frustum PointNets model. The model predict 3D object masks and\n        amodel bounding boxes for objects in frustum point clouds.\n        Input:\n            point_cloud: TF tensor in shape (B,N,4)\n                frustum point clouds with XYZ and intensity in point channels\n                XYZs are in frustum coordinate\n            one_hot_vec: TF tensor in shape (B,3)\n                length-3 vectors indicating predicted object type\n            is_training: TF boolean scalar\n            bn_decay: TF float scalar\n        Output:\n            end_points: dict (map from name strings to TF tensors)\n        '''\n        end_points = {}\n        \n        # 3D Instance Segmentation PointNet\n        logits, end_points = self.get_instance_seg_v1_net(\\\n            point_cloud, one_hot_vec,\n            is_training, bn_decay, end_points)\n        end_points['mask_logits'] = logits\n\n        # Masking\n        # select masked points and translate to masked points' centroid\n        object_point_cloud_xyz, mask_xyz_mean, end_points = \\\n            model_util.point_cloud_masking(point_cloud, logits, end_points)\n\n        # T-Net and coordinate translation\n        center_delta, end_points = model_util.get_center_regression_net(\\\n            object_point_cloud_xyz, one_hot_vec,\n            is_training, bn_decay, end_points)\n        stage1_center = center_delta + mask_xyz_mean # Bx3\n        end_points['stage1_center'] = stage1_center\n        # Get object point cloud in object coordinate\n        object_point_cloud_xyz_new = \\\n            object_point_cloud_xyz - tf.expand_dims(center_delta, 1)\n\n        # Amodel Box Estimation PointNet\n        output, end_points, features = self.get_3d_box_estimation_v1_net(\\\n            object_point_cloud_xyz_new, one_hot_vec,\n            is_training, bn_decay, end_points)\n\n        # Parse output to 3D box parameters\n        end_points = model_util.parse_output_to_tensors(output, end_points)\n        end_points['center'] = end_points['center_boxnet'] + stage1_center # Bx3\n\n        return end_points, features\n    \n    def get_depth_feature_op(self, is_training):\n\n        net = tf.expand_dims(self.object_pointcloud, 2)\n        net = tf_util.conv2d(net, 128, [1,1],\n                             padding='VALID', stride=[1,1],\n                             bn=True, is_training=is_training,\n                             scope='conv-reg1', bn_decay=None)\n        net = tf_util.conv2d(net, 128, [1,1],\n                             padding='VALID', stride=[1,1],\n                             bn=True, is_training=is_training,\n                             scope='conv-reg2', bn_decay=None)\n        net = tf_util.conv2d(net, 256, [1,1],\n                             padding='VALID', stride=[1,1],\n                             bn=True, is_training=is_training,\n                             scope='conv-reg3', bn_decay=None)\n        net = tf_util.conv2d(net, 512, [1,1],\n                             padding='VALID', stride=[1,1],\n                             bn=True, is_training=is_training,\n                             scope='conv-reg4', bn_decay=None)\n        net = tf.reduce_max(net, axis = 1)\n        \n        return net\n\n    def get_depth_feature(self, object_pointcloud):\n        \n        feed_dict = {self.object_pointcloud:object_pointcloud, self.ops['is_training_pl']:False}\n        depth_feature = self.sess.run([self.ops['depth_feature']], feed_dict = feed_dict)\n        return depth_feature\n\n    def softmax(self, x):\n        ''' Numpy function for softmax'''\n        shape = x.shape\n        probs = np.exp(x - np.max(x, axis=len(shape)-1, keepdims=True))\n        probs /= np.sum(probs, axis=len(shape)-1, keepdims=True)\n        return probs\n\ndef create_depth_model(model, config_path):\n    #Note that folder path must be the folder containing the config.yaml file if omni_camera is True\n    if model == 'FPointNet':\n        return FPointNet(config_path)\n    elif model == 'PointNet':\n        return PointNet(config_path)\n"
  },
  {
    "path": "src/featurepointnet_model_util.py",
    "content": "# import open3d as o3d\nimport numpy as np\nimport tensorflow as tf\nimport os\nimport sys\nimport torch\nBASE_DIR = os.path.dirname(os.path.abspath(__file__))\nsys.path.append(BASE_DIR)\nimport featurepointnet_tf_util as tf_util\n\n# -----------------\n# Global Constants\n# -----------------\n\nNUM_HEADING_BIN = 12\nNUM_SIZE_CLUSTER = 8 # one cluster for each type\nNUM_OBJECT_POINT = 512\ng_type2class={'Car':0, 'Van':1, 'Truck':2, 'Pedestrian':3,\n              'Person_sitting':4, 'Cyclist':5, 'Tram':6, 'Misc':7}\ng_class2type = {g_type2class[t]:t for t in g_type2class}\ng_type2onehotclass = {'Car': 0, 'Pedestrian': 1, 'Cyclist': 2}\n#Added 0.5m and 0.2m for car and pedestrian to make boxes slightly bigger\ng_type_mean_size = {'Car': np.array([3.88311640418,1.62856739989,1.52563191462]),\n                    'Van': np.array([5.06763659,1.9007158,2.20532825]),\n                    'Truck': np.array([10.13586957,2.58549199,3.2520595]),\n                    'Pedestrian': np.array([0.84422524,0.66068622,1.76255119]),\n                    'Person_sitting': np.array([0.80057803,0.5983815,1.27450867]),\n                    'Cyclist': np.array([1.76282397,0.59706367,1.73698127]),\n                    'Tram': np.array([16.17150617,2.53246914,3.53079012]),\n                    'Misc': np.array([3.64300781,1.54298177,1.92320313])}\ng_mean_size_arr = np.zeros((NUM_SIZE_CLUSTER, 3)) # size clustrs\nfor i in range(NUM_SIZE_CLUSTER):\n    g_mean_size_arr[i,:] = g_type_mean_size[g_class2type[i]]\n\n# -----------------\n# TF Functions Helpers\n# -----------------\n\ndef tf_gather_object_pc(point_cloud, mask, npoints=512):\n    ''' Gather object point clouds according to predicted masks.\n    Input:\n        point_cloud: TF tensor in shape (B,N,C)\n        mask: TF tensor in shape (B,N) of 0 (not pick) or 1 (pick)\n        npoints: int scalar, maximum number of points to keep (default: 512)\n    Output:\n        object_pc: TF tensor in shape (B,npoint,C)\n        indices: TF int tensor in shape (B,npoint,2)\n    '''\n    def mask_to_indices(mask):\n        indices = np.zeros((mask.shape[0], npoints, 2), dtype=np.int32)\n        for i in range(mask.shape[0]):\n            pos_indices = np.where(mask[i,:]>0.5)[0]\n            # skip cases when pos_indices is empty\n            if len(pos_indices) > 0: \n                if len(pos_indices) > npoints:\n                    choice = np.random.choice(len(pos_indices),\n                        npoints, replace=False)\n                else:\n                    choice = np.random.choice(len(pos_indices),\n                        npoints-len(pos_indices), replace=True)\n                    choice = np.concatenate((np.arange(len(pos_indices)), choice))\n                np.random.shuffle(choice)\n                indices[i,:,1] = pos_indices[choice]\n            indices[i,:,0] = i\n        return indices\n\n    indices = tf.py_func(mask_to_indices, [mask], tf.int32)  \n    object_pc = tf.gather_nd(point_cloud, indices)\n    return object_pc, indices\n\n\ndef get_box3d_corners_helper(centers, headings, sizes):\n    \"\"\" TF layer. Input: (N,3), (N,), (N,3), Output: (N,8,3) \"\"\"\n    #print '-----', centers\n    N = centers.get_shape()[0].value\n    l = tf.slice(sizes, [0,0], [-1,1]) # (N,1)\n    w = tf.slice(sizes, [0,1], [-1,1]) # (N,1)\n    h = tf.slice(sizes, [0,2], [-1,1]) # (N,1)\n    #print l,w,h\n    x_corners = tf.concat([l/2,l/2,-l/2,-l/2,l/2,l/2,-l/2,-l/2], axis=1) # (N,8)\n    y_corners = tf.concat([h/2,h/2,h/2,h/2,-h/2,-h/2,-h/2,-h/2], axis=1) # (N,8)\n    z_corners = tf.concat([w/2,-w/2,-w/2,w/2,w/2,-w/2,-w/2,w/2], axis=1) # (N,8)\n    corners = tf.concat([tf.expand_dims(x_corners,1), tf.expand_dims(y_corners,1), tf.expand_dims(z_corners,1)], axis=1) # (N,3,8)\n    #print x_corners, y_corners, z_corners\n    c = tf.cos(headings)\n    s = tf.sin(headings)\n    ones = tf.ones([N], dtype=tf.float32)\n    zeros = tf.zeros([N], dtype=tf.float32)\n    row1 = tf.stack([c,zeros,s], axis=1) # (N,3)\n    row2 = tf.stack([zeros,ones,zeros], axis=1)\n    row3 = tf.stack([-s,zeros,c], axis=1)\n    R = tf.concat([tf.expand_dims(row1,1), tf.expand_dims(row2,1), tf.expand_dims(row3,1)], axis=1) # (N,3,3)\n    #print row1, row2, row3, R, N\n    corners_3d = tf.matmul(R, corners) # (N,3,8)\n    corners_3d += tf.tile(tf.expand_dims(centers,2), [1,1,8]) # (N,3,8)\n    corners_3d = tf.transpose(corners_3d, perm=[0,2,1]) # (N,8,3)\n    return corners_3d\n\ndef get_box3d_corners(center, heading_residuals, size_residuals):\n    \"\"\" TF layer.\n    Inputs:\n        center: (B,3)\n        heading_residuals: (B,NH)\n        size_residuals: (B,NS,3)\n    Outputs:\n        box3d_corners: (B,NH,NS,8,3) tensor\n    \"\"\"\n    batch_size = center.get_shape()[0].value\n    heading_bin_centers = tf.constant(np.arange(0,2*np.pi,2*np.pi/NUM_HEADING_BIN), dtype=tf.float32) # (NH,)\n    headings = heading_residuals + tf.expand_dims(heading_bin_centers, 0) # (B,NH)\n    \n    mean_sizes = tf.expand_dims(tf.constant(g_mean_size_arr, dtype=tf.float32), 0) + size_residuals # (B,NS,1)\n    sizes = mean_sizes + size_residuals # (B,NS,3)\n    sizes = tf.tile(tf.expand_dims(sizes,1), [1,NUM_HEADING_BIN,1,1]) # (B,NH,NS,3)\n    headings = tf.tile(tf.expand_dims(headings,-1), [1,1,NUM_SIZE_CLUSTER]) # (B,NH,NS)\n    centers = tf.tile(tf.expand_dims(tf.expand_dims(center,1),1), [1,NUM_HEADING_BIN, NUM_SIZE_CLUSTER,1]) # (B,NH,NS,3)\n\n    N = batch_size*NUM_HEADING_BIN*NUM_SIZE_CLUSTER\n    corners_3d = get_box3d_corners_helper(tf.reshape(centers, [N,3]), tf.reshape(headings, [N]), tf.reshape(sizes, [N,3]))\n\n    return tf.reshape(corners_3d, [batch_size, NUM_HEADING_BIN, NUM_SIZE_CLUSTER, 8, 3])\n\n\ndef huber_loss(error, delta):\n    abs_error = tf.abs(error)\n    quadratic = tf.minimum(abs_error, delta)\n    linear = (abs_error - quadratic)\n    losses = 0.5 * quadratic**2 + delta * linear\n    return tf.reduce_mean(losses)\n\n\ndef parse_output_to_tensors(output, end_points):\n    ''' Parse batch output to separate tensors (added to end_points)\n    Input:\n        output: TF tensor in shape (B,3+2*NUM_HEADING_BIN+4*NUM_SIZE_CLUSTER)\n        end_points: dict\n    Output:\n        end_points: dict (updated)\n    '''\n    batch_size = output.get_shape()[0].value\n    center = tf.slice(output, [0,0], [-1,3])\n    end_points['center_boxnet'] = center\n\n    heading_scores = tf.slice(output, [0,3], [-1,NUM_HEADING_BIN])\n    heading_residuals_normalized = tf.slice(output, [0,3+NUM_HEADING_BIN],\n        [-1,NUM_HEADING_BIN])\n    end_points['heading_scores'] = heading_scores # BxNUM_HEADING_BIN\n    end_points['heading_residuals_normalized'] = \\\n        heading_residuals_normalized # BxNUM_HEADING_BIN (-1 to 1)\n    end_points['heading_residuals'] = \\\n        heading_residuals_normalized * (np.pi/NUM_HEADING_BIN) # BxNUM_HEADING_BIN\n    \n    size_scores = tf.slice(output, [0,3+NUM_HEADING_BIN*2],\n        [-1,NUM_SIZE_CLUSTER]) # BxNUM_SIZE_CLUSTER\n    size_residuals_normalized = tf.slice(output,\n        [0,3+NUM_HEADING_BIN*2+NUM_SIZE_CLUSTER], [-1,NUM_SIZE_CLUSTER*3])\n    size_residuals_normalized = tf.reshape(size_residuals_normalized,\n        [batch_size, NUM_SIZE_CLUSTER, 3]) # BxNUM_SIZE_CLUSTERx3\n    end_points['size_scores'] = size_scores\n    end_points['size_residuals_normalized'] = size_residuals_normalized\n    end_points['size_residuals'] = size_residuals_normalized * \\\n        tf.expand_dims(tf.constant(g_mean_size_arr, dtype=tf.float32), 0)\n\n    return end_points\n\n# -----------------\n# Box Parsing Helpers\n# -----------------\n\ndef from_prediction_to_label_format(center, angle_class, angle_res,\\\n                                    size_class, size_res, rot_angle):\n    ''' Convert predicted box parameters to label format. '''\n    l,w,h = class2size(size_class, size_res)\n    ry = class2angle(angle_class, angle_res, NUM_HEADING_BIN) + rot_angle\n    tx,ty,tz = rotate_pc_along_y(np.expand_dims(center,0),-rot_angle).squeeze()\n    ty += h/2.0\n    return tx,ty,tz,l,w,h,ry\n\ndef size2class(size, type_name):\n    ''' Convert 3D bounding box size to template class and residuals.\n    todo (rqi): support multiple size clusters per type.\n \n    Input:\n        size: numpy array of shape (3,) for (l,w,h)\n        type_name: string\n    Output:\n        size_class: int scalar\n        size_residual: numpy array of shape (3,)\n    '''\n    size_class = g_type2class[type_name]\n    size_residual = size - g_type_mean_size[type_name]\n    return size_class, size_residual\n\ndef class2size(pred_cls, residual):\n    ''' Inverse function to size2class. '''\n    mean_size = g_type_mean_size[g_class2type[pred_cls]]\n    return mean_size + residual\n\ndef angle2class(angle, num_class):\n    ''' Convert continuous angle to discrete class and residual.\n    Input:\n        angle: rad scalar, from 0-2pi (or -pi~pi), class center at\n            0, 1*(2pi/N), 2*(2pi/N) ...  (N-1)*(2pi/N)\n        num_class: int scalar, number of classes N\n    Output:\n        class_id, int, among 0,1,...,N-1\n        residual_angle: float, a number such that\n            class*(2pi/N) + residual_angle = angle\n    '''\n    angle = angle%(2*np.pi)\n    assert(angle>=0 and angle<=2*np.pi)\n    angle_per_class = 2*np.pi/float(num_class)\n    shifted_angle = (angle+angle_per_class/2)%(2*np.pi)\n    class_id = int(shifted_angle/angle_per_class)\n    residual_angle = shifted_angle - \\\n        (class_id * angle_per_class + angle_per_class/2)\n    return class_id, residual_angle\n\ndef class2angle(pred_cls, residual, num_class, to_label_format=True):\n    ''' Inverse function to angle2class.\n    If to_label_format, adjust angle to the range as in labels.\n    '''\n    angle_per_class = 2*np.pi/float(num_class)\n    angle_center = pred_cls * angle_per_class\n    angle = angle_center + residual\n    if to_label_format and angle>np.pi:\n        angle = angle - 2*np.pi\n    return angle\n\ndef rotate_pc_along_y(pc, rot_angle):\n    '''\n    Input:\n        pc: numpy array (N,C), first 3 channels are XYZ\n            z is facing forward, x is left ward, y is downward\n        rot_angle: rad scalar\n    Output:\n        pc: updated pc with XYZ rotated\n    '''\n    cosval = np.cos(rot_angle)\n    sinval = np.sin(rot_angle)\n    rotmat = np.array([[cosval, -sinval],[sinval, cosval]])\n    pc[:,[0,2]] = np.dot(pc[:,[0,2]], np.transpose(rotmat))\n    return pc\n\n\n\ndef rotate_pc_along_y_torch(pc, rot_angle):\n    '''\n    Input:\n        pc: numpy array (N,C), first 3 channels are XYZ\n            z is facing forward, x is left ward, y is downward\n        rot_angle: rad scalar\n    Output:\n        pc: updated pc with XYZ rotated\n    '''\n    rotmats = []\n    for angle in rot_angle:\n        cosval = np.cos(angle)\n        sinval = np.sin(angle)\n        rotmat = torch.Tensor([[cosval, sinval],[-sinval, cosval]]).type(pc.type())\n        rotmats.append(rotmat)\n    rotmats = torch.stack(rotmats, dim=0)\n    pc[:, :,[0,2]] = torch.bmm(pc[:, :,[0,2]], rotmats)\n    return pc\n\n# --------------------------------------\n# Shared subgraphs for v1 and v2 models\n# --------------------------------------\n\ndef placeholder_inputs(batch_size, num_point):\n    ''' Get useful placeholder tensors.\n    Input:\n        batch_size: scalar int\n        num_point: scalar int\n    Output:\n        TF placeholders for inputs and ground truths\n    '''\n    pointclouds_pl = tf.placeholder(tf.float32,\n        shape=(batch_size, num_point, 4))\n    one_hot_vec_pl = tf.placeholder(tf.float32, shape=(batch_size, 3))\n\n    # labels_pl is for segmentation label\n    labels_pl = tf.placeholder(tf.int32, shape=(batch_size, num_point))\n    centers_pl = tf.placeholder(tf.float32, shape=(batch_size, 3))\n    heading_class_label_pl = tf.placeholder(tf.int32, shape=(batch_size,))\n    heading_residual_label_pl = tf.placeholder(tf.float32, shape=(batch_size,))\n    size_class_label_pl = tf.placeholder(tf.int32, shape=(batch_size,))\n    size_residual_label_pl = tf.placeholder(tf.float32, shape=(batch_size,3))\n\n    return pointclouds_pl, one_hot_vec_pl, labels_pl, centers_pl, \\\n        heading_class_label_pl, heading_residual_label_pl, \\\n        size_class_label_pl, size_residual_label_pl\n\n\ndef point_cloud_masking(point_cloud, logits, end_points, xyz_only=True):\n    ''' Select point cloud with predicted 3D mask,\n    translate coordinates to the masked points centroid.\n    \n    Input:\n        point_cloud: TF tensor in shape (B,N,C)\n        logits: TF tensor in shape (B,N,2)\n        end_points: dict\n        xyz_only: boolean, if True only return XYZ channels\n    Output:\n        object_point_cloud: TF tensor in shape (B,M,3)\n            for simplicity we only keep XYZ here\n            M = NUM_OBJECT_POINT as a hyper-parameter\n        mask_xyz_mean: TF tensor in shape (B,3)\n    '''\n    batch_size = point_cloud.get_shape()[0].value\n    num_point = point_cloud.get_shape()[1].value\n    mask = tf.slice(logits,[0,0,0],[-1,-1,1]) < \\\n        tf.slice(logits,[0,0,1],[-1,-1,1])\n    mask = tf.to_float(mask) # BxNx1\n    mask_count = tf.tile(tf.reduce_sum(mask,axis=1,keep_dims=True),\n        [1,1,3]) # Bx1x3\n    point_cloud_xyz = tf.slice(point_cloud, [0,0,0], [-1,-1,3]) # BxNx3\n    mask_xyz_mean = tf.reduce_sum(tf.tile(mask, [1,1,3])*point_cloud_xyz,\n        axis=1, keep_dims=True) # Bx1x3\n    mask = tf.squeeze(mask, axis=[2]) # BxN\n    end_points['mask'] = mask\n    mask_xyz_mean = mask_xyz_mean/tf.maximum(mask_count,1) # Bx1x3\n\n    # Translate to masked points' centroid\n    point_cloud_xyz_stage1 = point_cloud_xyz - \\\n        tf.tile(mask_xyz_mean, [1,num_point,1])\n\n    if xyz_only:\n        point_cloud_stage1 = point_cloud_xyz_stage1\n    else:\n        point_cloud_features = tf.slice(point_cloud, [0,0,3], [-1,-1,-1])\n        point_cloud_stage1 = tf.concat(\\\n            [point_cloud_xyz_stage1, point_cloud_features], axis=-1)\n    num_channels = point_cloud_stage1.get_shape()[2].value\n\n    object_point_cloud, _ = tf_gather_object_pc(point_cloud_stage1,\n        mask, NUM_OBJECT_POINT)\n    object_point_cloud.set_shape([batch_size, NUM_OBJECT_POINT, num_channels])\n\n    return object_point_cloud, tf.squeeze(mask_xyz_mean, axis=1), end_points\n\n\ndef get_center_regression_net(object_point_cloud, one_hot_vec,\n                              is_training, bn_decay, end_points):\n    ''' Regression network for center delta. a.k.a. T-Net.\n    Input:\n        object_point_cloud: TF tensor in shape (B,M,C)\n            point clouds in 3D mask coordinate\n        one_hot_vec: TF tensor in shape (B,3)\n            length-3 vectors indicating predicted object type\n    Output:\n        predicted_center: TF tensor in shape (B,3)\n    ''' \n    num_point = object_point_cloud.get_shape()[1].value\n    net = tf.expand_dims(object_point_cloud, 2)\n    net = tf_util.conv2d(net, 128, [1,1],\n                         padding='VALID', stride=[1,1],\n                         bn=True, is_training=is_training,\n                         scope='conv-reg1-stage1', bn_decay=bn_decay)\n    net = tf_util.conv2d(net, 128, [1,1],\n                         padding='VALID', stride=[1,1],\n                         bn=True, is_training=is_training,\n                         scope='conv-reg2-stage1', bn_decay=bn_decay)\n    net = tf_util.conv2d(net, 256, [1,1],\n                         padding='VALID', stride=[1,1],\n                         bn=True, is_training=is_training,\n                         scope='conv-reg3-stage1', bn_decay=bn_decay)\n    net = tf_util.max_pool2d(net, [num_point,1],\n        padding='VALID', scope='maxpool-stage1')\n    net = tf.squeeze(net, axis=[1,2])\n    net = tf.concat([net, one_hot_vec], axis=1)\n    net = tf_util.fully_connected(net, 256, scope='fc1-stage1', bn=True,\n        is_training=is_training, bn_decay=bn_decay)\n    net = tf_util.fully_connected(net, 128, scope='fc2-stage1', bn=True,\n        is_training=is_training, bn_decay=bn_decay)\n    predicted_center = tf_util.fully_connected(net, 3, activation_fn=None,\n        scope='fc3-stage1')\n    return predicted_center, end_points\n\ndef softmax(x):\n    ''' Numpy function for softmax'''\n    shape = x.shape\n    probs = np.exp(x - np.max(x, axis=len(shape)-1, keepdims=True))\n    probs /= np.sum(probs, axis=len(shape)-1, keepdims=True)\n    return probs\n\n\ndef get_loss(mask_label, center_label, \\\n             heading_class_label, heading_residual_label, \\\n             size_class_label, size_residual_label, \\\n             end_points, \\\n             corner_loss_weight=10.0, \\\n             box_loss_weight=1.0):\n    ''' Loss functions for 3D object detection.\n    Input:\n        mask_label: TF int32 tensor in shape (B,N)\n        center_label: TF tensor in shape (B,3)\n        heading_class_label: TF int32 tensor in shape (B,) \n        heading_residual_label: TF tensor in shape (B,) \n        size_class_label: TF tensor int32 in shape (B,)\n        size_residual_label: TF tensor tensor in shape (B,)\n        end_points: dict, outputs from our model\n        corner_loss_weight: float scalar\n        box_loss_weight: float scalar\n    Output:\n        total_loss: TF scalar tensor\n            the total_loss is also added to the losses collection\n    '''\n    # 3D Segmentation loss\n    mask_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(\\\n        logits=end_points['mask_logits'], labels=mask_label))\n    tf.summary.scalar('3d mask loss', mask_loss)\n\n    # Center regression losses\n    center_dist = tf.norm(center_label - end_points['center'], axis=-1)\n    center_loss = huber_loss(center_dist, delta=2.0)\n    tf.summary.scalar('center loss', center_loss)\n    stage1_center_dist = tf.norm(center_label - \\\n        end_points['stage1_center'], axis=-1)\n    stage1_center_loss = huber_loss(stage1_center_dist, delta=1.0)\n    tf.summary.scalar('stage1 center loss', stage1_center_loss)\n\n    # Heading loss\n    heading_class_loss = tf.reduce_mean( \\\n        tf.nn.sparse_softmax_cross_entropy_with_logits( \\\n        logits=end_points['heading_scores'], labels=heading_class_label))\n    tf.summary.scalar('heading class loss', heading_class_loss)\n\n    hcls_onehot = tf.one_hot(heading_class_label,\n        depth=NUM_HEADING_BIN,\n        on_value=1, off_value=0, axis=-1) # BxNUM_HEADING_BIN\n    heading_residual_normalized_label = \\\n        heading_residual_label / (np.pi/NUM_HEADING_BIN)\n    heading_residual_normalized_loss = huber_loss(tf.reduce_sum( \\\n        end_points['heading_residuals_normalized']*tf.to_float(hcls_onehot), axis=1) - \\\n        heading_residual_normalized_label, delta=1.0)\n    tf.summary.scalar('heading residual normalized loss',\n        heading_residual_normalized_loss)\n\n    # Size loss\n    size_class_loss = tf.reduce_mean( \\\n        tf.nn.sparse_softmax_cross_entropy_with_logits( \\\n        logits=end_points['size_scores'], labels=size_class_label))\n    tf.summary.scalar('size class loss', size_class_loss)\n\n    scls_onehot = tf.one_hot(size_class_label,\n        depth=NUM_SIZE_CLUSTER,\n        on_value=1, off_value=0, axis=-1) # BxNUM_SIZE_CLUSTER\n    scls_onehot_tiled = tf.tile(tf.expand_dims( \\\n        tf.to_float(scls_onehot), -1), [1,1,3]) # BxNUM_SIZE_CLUSTERx3\n    predicted_size_residual_normalized = tf.reduce_sum( \\\n        end_points['size_residuals_normalized']*scls_onehot_tiled, axis=[1]) # Bx3\n\n    mean_size_arr_expand = tf.expand_dims( \\\n        tf.constant(g_mean_size_arr, dtype=tf.float32),0) # 1xNUM_SIZE_CLUSTERx3\n    mean_size_label = tf.reduce_sum( \\\n        scls_onehot_tiled * mean_size_arr_expand, axis=[1]) # Bx3\n    size_residual_label_normalized = size_residual_label / mean_size_label\n    size_normalized_dist = tf.norm( \\\n        size_residual_label_normalized - predicted_size_residual_normalized,\n        axis=-1)\n    size_residual_normalized_loss = huber_loss(size_normalized_dist, delta=1.0)\n    tf.summary.scalar('size residual normalized loss',\n        size_residual_normalized_loss)\n\n    # Corner loss\n    # We select the predicted corners corresponding to the \n    # GT heading bin and size cluster.\n    corners_3d = get_box3d_corners(end_points['center'],\n        end_points['heading_residuals'],\n        end_points['size_residuals']) # (B,NH,NS,8,3)\n    gt_mask = tf.tile(tf.expand_dims(hcls_onehot, 2), [1,1,NUM_SIZE_CLUSTER]) * \\\n        tf.tile(tf.expand_dims(scls_onehot,1), [1,NUM_HEADING_BIN,1]) # (B,NH,NS)\n    corners_3d_pred = tf.reduce_sum( \\\n        tf.to_float(tf.expand_dims(tf.expand_dims(gt_mask,-1),-1)) * corners_3d,\n        axis=[1,2]) # (B,8,3)\n\n    heading_bin_centers = tf.constant( \\\n        np.arange(0,2*np.pi,2*np.pi/NUM_HEADING_BIN), dtype=tf.float32) # (NH,)\n    heading_label = tf.expand_dims(heading_residual_label,1) + \\\n        tf.expand_dims(heading_bin_centers, 0) # (B,NH)\n    heading_label = tf.reduce_sum(tf.to_float(hcls_onehot)*heading_label, 1)\n    mean_sizes = tf.expand_dims( \\\n        tf.constant(g_mean_size_arr, dtype=tf.float32), 0) # (1,NS,3)\n    size_label = mean_sizes + \\\n        tf.expand_dims(size_residual_label, 1) # (1,NS,3) + (B,1,3) = (B,NS,3)\n    size_label = tf.reduce_sum( \\\n        tf.expand_dims(tf.to_float(scls_onehot),-1)*size_label, axis=[1]) # (B,3)\n    corners_3d_gt = get_box3d_corners_helper( \\\n        center_label, heading_label, size_label) # (B,8,3)\n    corners_3d_gt_flip = get_box3d_corners_helper( \\\n        center_label, heading_label+np.pi, size_label) # (B,8,3)\n\n    corners_dist = tf.minimum(tf.norm(corners_3d_pred - corners_3d_gt, axis=-1),\n        tf.norm(corners_3d_pred - corners_3d_gt_flip, axis=-1))\n    corners_loss = huber_loss(corners_dist, delta=1.0) \n    tf.summary.scalar('corners loss', corners_loss)\n\n    # Weighted sum of all losses\n    total_loss = mask_loss + box_loss_weight * (center_loss + \\\n        heading_class_loss + size_class_loss + \\\n        heading_residual_normalized_loss*20 + \\\n        size_residual_normalized_loss*20 + \\\n        stage1_center_loss + \\\n        corner_loss_weight*corners_loss)\n    tf.add_to_collection('losses', total_loss)\n\n    return total_loss\n\ndef get_lidar_in_image_fov(pc_velo, calib, xmin, ymin, xmax, ymax,\n                           clip_distance=40.0):\n    ''' Filter lidar points, keep those in image FOV '''\n    #pts_2d = calib.project_rect_to_image(calib.project_ref_to_rect(pc_velo))\n    #pts_2d = calib.project_rect_to_image_torch(calib.project_ref_to_rect_torch(torch.from_numpy(pc_velo).cuda()))\n    pts_2d = calib.project_ref_to_image_torch(pc_velo)\n\n    fov_inds = (pts_2d[:,0]<xmax) & (pts_2d[:,0]>=xmin) & \\\n        (pts_2d[:,1]<ymax) & (pts_2d[:,1]>=ymin)\n    # fov_inds = fov_inds & (pc_velo[:,2]<clip_distance) #filter out far z pts\n    # imgfov_pc_velo = pc_velo[fov_inds,:].cpu().numpy()\n    return pts_2d, fov_inds\n\n# @profile\ndef preprocess_pointcloud(detections, point_cloud, pc_image_coord,\n                            calib, num_point = 1024, \n                            lidar_point_threshold=5,\n                            omni=False):\n    ''' Extract point clouds in frustums extruded from 2D detection boxes.\n        Update: Lidar points and 3d boxes are in *rect camera* coord system\n            (as that in 3d box label files)\n        \n    Input:\n        lidar_point_threshold: int, neglect frustum with too few points.\n    Output:\n\n    '''\n    \n    point_clouds = [] # channel number = 4, xyz,intensity in rect camera coord\n    rot_angles = []\n    ids_3d = np.zeros((len(detections)))\n    for i, detection in enumerate(detections):\n\n        xmin,ymin,xmax,ymax,_,_,_ = detection\n        box_fov_inds = (pc_image_coord[:,0]<xmax) & \\\n            (pc_image_coord[:,0]>=xmin) & \\\n            (pc_image_coord[:,1]<ymax) & \\\n            (pc_image_coord[:,1]>=ymin)\n        pc_in_box_fov = point_cloud[box_fov_inds,:]\n        if omni:\n            frustum_angle = ((xmin+xmax)/2.0) /calib.img_shape[2] * (2 * np.pi)  - np.pi/2\n        else:\n            box_center = np.array([xmax+xmin, ymin+ymax])/2\n            uvdepth = np.zeros((1,3))\n            uvdepth[0,0:2] = box_center\n            uvdepth[0,2] = 20 # some random depth\n            box2d_center_rect = calib.project_image_to_rect(uvdepth)\n            frustum_angle = np.pi/2 - np.arctan2(box2d_center_rect[0,2],\n                box2d_center_rect[0,0])\n        rot_angles.append(frustum_angle)\n        if len(pc_in_box_fov)<lidar_point_threshold:\n            ids_3d[i] = -1\n            point_clouds.append(torch.zeros((1, num_point, 4)).type(pc_in_box_fov.type()))\n        else:\n            if pc_in_box_fov.shape[0] > num_point:\n                idx = np.random.choice(range(pc_in_box_fov.shape[0]), size = (num_point), replace=False)\n                pc_in_box_fov = pc_in_box_fov[idx].unsqueeze(0)\n            else:\n                idx = np.random.choice(range(pc_in_box_fov.shape[0]), size = (num_point-pc_in_box_fov.shape[0]), replace=True)\n                pc_in_box_fov = torch.cat([pc_in_box_fov, pc_in_box_fov[idx]], dim=0).unsqueeze(0)\n            point_clouds.append(pc_in_box_fov)\n    point_clouds = torch.cat(point_clouds, dim=0)\n    point_clouds = rotate_pc_along_y_torch(point_clouds, rot_angles)\n\n    return point_clouds, rot_angles, ids_3d\n# @profile\ndef generate_detections_3d(detector, detections_2d, point_cloud, calib, img_shape, peds=False, omni=False):\n    _, img_height, img_width = img_shape\n    pc_image_coord, img_fov_inds = get_lidar_in_image_fov(point_cloud[:,:3], calib, 0, 0, img_width, img_height)\n    pc_image_coord = pc_image_coord[img_fov_inds,:]\n    point_cloud = point_cloud[img_fov_inds,:]\n    point_cloud_frustrums, rot_angles, ids_3d = preprocess_pointcloud(detections_2d, point_cloud, pc_image_coord, calib, num_point = detector.num_point, omni=omni)\n    point_cloud_frustrums = point_cloud_frustrums.cpu().numpy()\n    boxes_3d, scores_3d, depth_features = detector(point_cloud_frustrums, np.asarray(rot_angles), peds)\n    for i in range(len(ids_3d)):\n        if ids_3d[i] == -1 or np.isnan(scores_3d[i]):\n            boxes_3d[i] = None\n            ids_3d[i] = -1\n    return boxes_3d, ids_3d, rot_angles, scores_3d, depth_features, point_cloud_frustrums\n\ndef convert_depth_features(depth_features_orig, ids_3d):\n    depth_features = []\n    for i, depth_feature_orig in enumerate(depth_features_orig):\n        if depth_feature_orig is None or ids_3d[i] == -1:\n            depth_features.append(None)\n        else:\n            depth_features.append(depth_feature_orig)\n    return depth_features\n"
  },
  {
    "path": "src/featurepointnet_tf_util.py",
    "content": "\"\"\" Wrapper functions for TensorFlow layers.\n\nAuthor: Charles R. Qi\nDate: November 2017\n\"\"\"\n\nimport numpy as np\nimport tensorflow as tf\n\ndef _variable_on_cpu(name, shape, initializer, use_fp16=False):\n  \"\"\"Helper to create a Variable stored on CPU memory.\n  Args:\n    name: name of the variable\n    shape: list of ints\n    initializer: initializer for Variable\n  Returns:\n    Variable Tensor\n  \"\"\"\n  with tf.device(\"/cpu:0\"):\n    dtype = tf.float16 if use_fp16 else tf.float32\n    var = tf.get_variable(name, shape, initializer=initializer, dtype=dtype)\n  return var\n\ndef _variable_with_weight_decay(name, shape, stddev, wd, use_xavier=True):\n  \"\"\"Helper to create an initialized Variable with weight decay.\n\n  Note that the Variable is initialized with a truncated normal distribution.\n  A weight decay is added only if one is specified.\n\n  Args:\n    name: name of the variable\n    shape: list of ints\n    stddev: standard deviation of a truncated Gaussian\n    wd: add L2Loss weight decay multiplied by this float. If None, weight\n        decay is not added for this Variable.\n    use_xavier: bool, whether to use xavier initializer\n\n  Returns:\n    Variable Tensor\n  \"\"\"\n  if use_xavier:\n    initializer = tf.contrib.layers.xavier_initializer()\n  else:\n    initializer = tf.truncated_normal_initializer(stddev=stddev)\n  var = _variable_on_cpu(name, shape, initializer)\n  if wd is not None:\n    weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss')\n    tf.add_to_collection('losses', weight_decay)\n  return var\n\n\ndef conv1d(inputs,\n           num_output_channels,\n           kernel_size,\n           scope,\n           stride=1,\n           padding='SAME',\n           data_format='NHWC',\n           use_xavier=True,\n           stddev=1e-3,\n           weight_decay=None,\n           activation_fn=tf.nn.relu,\n           bn=False,\n           bn_decay=None,\n           is_training=None):\n  \"\"\" 1D convolution with non-linear operation.\n\n  Args:\n    inputs: 3-D tensor variable BxLxC\n    num_output_channels: int\n    kernel_size: int\n    scope: string\n    stride: int\n    padding: 'SAME' or 'VALID'\n    data_format: 'NHWC' or 'NCHW'\n    use_xavier: bool, use xavier_initializer if true\n    stddev: float, stddev for truncated_normal init\n    weight_decay: float\n    activation_fn: function\n    bn: bool, whether to use batch norm\n    bn_decay: float or float tensor variable in [0,1]\n    is_training: bool Tensor variable\n\n  Returns:\n    Variable tensor\n  \"\"\"\n  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:\n    assert(data_format=='NHWC' or data_format=='NCHW')\n    if data_format == 'NHWC':\n      num_in_channels = inputs.get_shape()[-1].value\n    elif data_format=='NCHW':\n      num_in_channels = inputs.get_shape()[1].value\n    kernel_shape = [kernel_size,\n                    num_in_channels, num_output_channels]\n    kernel = _variable_with_weight_decay('weights',\n                                         shape=kernel_shape,\n                                         use_xavier=use_xavier,\n                                         stddev=stddev,\n                                         wd=weight_decay)\n    outputs = tf.nn.conv1d(inputs, kernel,\n                           stride=stride,\n                           padding=padding,\n                           data_format=data_format)\n    biases = _variable_on_cpu('biases', [num_output_channels],\n                              tf.constant_initializer(0.0))\n    outputs = tf.nn.bias_add(outputs, biases, data_format=data_format)\n\n    if bn:\n      outputs = batch_norm_for_conv1d(outputs, is_training,\n                                      bn_decay=bn_decay, scope='bn',\n                                      data_format=data_format)\n\n    if activation_fn is not None:\n      outputs = activation_fn(outputs)\n    return outputs\n\n\n\n\ndef conv2d(inputs,\n           num_output_channels,\n           kernel_size,\n           scope,\n           stride=[1, 1],\n           padding='SAME',\n           data_format='NHWC',\n           use_xavier=True,\n           stddev=1e-3,\n           weight_decay=None,\n           activation_fn=tf.nn.relu,\n           bn=False,\n           bn_decay=None,\n           is_training=None):\n  \"\"\" 2D convolution with non-linear operation.\n\n  Args:\n    inputs: 4-D tensor variable BxHxWxC\n    num_output_channels: int\n    kernel_size: a list of 2 ints\n    scope: string\n    stride: a list of 2 ints\n    padding: 'SAME' or 'VALID'\n    data_format: 'NHWC' or 'NCHW'\n    use_xavier: bool, use xavier_initializer if true\n    stddev: float, stddev for truncated_normal init\n    weight_decay: float\n    activation_fn: function\n    bn: bool, whether to use batch norm\n    bn_decay: float or float tensor variable in [0,1]\n    is_training: bool Tensor variable\n\n  Returns:\n    Variable tensor\n  \"\"\"\n  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:\n      kernel_h, kernel_w = kernel_size\n      assert(data_format=='NHWC' or data_format=='NCHW')\n      if data_format == 'NHWC':\n        num_in_channels = inputs.get_shape()[-1].value\n      elif data_format=='NCHW':\n        num_in_channels = inputs.get_shape()[1].value\n      kernel_shape = [kernel_h, kernel_w,\n                      num_in_channels, num_output_channels]\n      kernel = _variable_with_weight_decay('weights',\n                                           shape=kernel_shape,\n                                           use_xavier=use_xavier,\n                                           stddev=stddev,\n                                           wd=weight_decay)\n      stride_h, stride_w = stride\n      outputs = tf.nn.conv2d(inputs, kernel,\n                             [1, stride_h, stride_w, 1],\n                             padding=padding,\n                             data_format=data_format)\n      biases = _variable_on_cpu('biases', [num_output_channels],\n                                tf.constant_initializer(0.0))\n      outputs = tf.nn.bias_add(outputs, biases, data_format=data_format)\n\n      if bn:\n        outputs = batch_norm_for_conv2d(outputs, is_training,\n                                        bn_decay=bn_decay, scope='bn',\n                                        data_format=data_format)\n\n      if activation_fn is not None:\n        outputs = activation_fn(outputs)\n      return outputs\n\n\ndef conv2d_transpose(inputs,\n                     num_output_channels,\n                     kernel_size,\n                     scope,\n                     stride=[1, 1],\n                     padding='SAME',\n                     use_xavier=True,\n                     stddev=1e-3,\n                     weight_decay=None,\n                     activation_fn=tf.nn.relu,\n                     bn=False,\n                     bn_decay=None,\n                     is_training=None):\n  \"\"\" 2D convolution transpose with non-linear operation.\n\n  Args:\n    inputs: 4-D tensor variable BxHxWxC\n    num_output_channels: int\n    kernel_size: a list of 2 ints\n    scope: string\n    stride: a list of 2 ints\n    padding: 'SAME' or 'VALID'\n    use_xavier: bool, use xavier_initializer if true\n    stddev: float, stddev for truncated_normal init\n    weight_decay: float\n    activation_fn: function\n    bn: bool, whether to use batch norm\n    bn_decay: float or float tensor variable in [0,1]\n    is_training: bool Tensor variable\n\n  Returns:\n    Variable tensor\n\n  Note: conv2d(conv2d_transpose(a, num_out, ksize, stride), a.shape[-1], ksize, stride) == a\n  \"\"\"\n  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:\n      kernel_h, kernel_w = kernel_size\n      num_in_channels = inputs.get_shape()[-1].value\n      kernel_shape = [kernel_h, kernel_w,\n                      num_output_channels, num_in_channels] # reversed to conv2d\n      kernel = _variable_with_weight_decay('weights',\n                                           shape=kernel_shape,\n                                           use_xavier=use_xavier,\n                                           stddev=stddev,\n                                           wd=weight_decay)\n      stride_h, stride_w = stride\n      \n      # from slim.convolution2d_transpose\n      def get_deconv_dim(dim_size, stride_size, kernel_size, padding):\n          dim_size *= stride_size\n\n          if padding == 'VALID' and dim_size is not None:\n            dim_size += max(kernel_size - stride_size, 0)\n          return dim_size\n\n      # caculate output shape\n      batch_size = inputs.get_shape()[0].value\n      height = inputs.get_shape()[1].value\n      width = inputs.get_shape()[2].value\n      out_height = get_deconv_dim(height, stride_h, kernel_h, padding)\n      out_width = get_deconv_dim(width, stride_w, kernel_w, padding)\n      output_shape = [batch_size, out_height, out_width, num_output_channels]\n\n      outputs = tf.nn.conv2d_transpose(inputs, kernel, output_shape,\n                             [1, stride_h, stride_w, 1],\n                             padding=padding)\n      biases = _variable_on_cpu('biases', [num_output_channels],\n                                tf.constant_initializer(0.0))\n      outputs = tf.nn.bias_add(outputs, biases)\n\n      if bn:\n        outputs = batch_norm_for_conv2d(outputs, is_training,\n                                        bn_decay=bn_decay, scope='bn')\n\n      if activation_fn is not None:\n        outputs = activation_fn(outputs)\n      return outputs\n\n   \n\ndef conv3d(inputs,\n           num_output_channels,\n           kernel_size,\n           scope,\n           stride=[1, 1, 1],\n           padding='SAME',\n           use_xavier=True,\n           stddev=1e-3,\n           weight_decay=None,\n           activation_fn=tf.nn.relu,\n           bn=False,\n           bn_decay=None,\n           is_training=None):\n  \"\"\" 3D convolution with non-linear operation.\n\n  Args:\n    inputs: 5-D tensor variable BxDxHxWxC\n    num_output_channels: int\n    kernel_size: a list of 3 ints\n    scope: string\n    stride: a list of 3 ints\n    padding: 'SAME' or 'VALID'\n    use_xavier: bool, use xavier_initializer if true\n    stddev: float, stddev for truncated_normal init\n    weight_decay: float\n    activation_fn: function\n    bn: bool, whether to use batch norm\n    bn_decay: float or float tensor variable in [0,1]\n    is_training: bool Tensor variable\n\n  Returns:\n    Variable tensor\n  \"\"\"\n  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:\n    kernel_d, kernel_h, kernel_w = kernel_size\n    num_in_channels = inputs.get_shape()[-1].value\n    kernel_shape = [kernel_d, kernel_h, kernel_w,\n                    num_in_channels, num_output_channels]\n    kernel = _variable_with_weight_decay('weights',\n                                         shape=kernel_shape,\n                                         use_xavier=use_xavier,\n                                         stddev=stddev,\n                                         wd=weight_decay)\n    stride_d, stride_h, stride_w = stride\n    outputs = tf.nn.conv3d(inputs, kernel,\n                           [1, stride_d, stride_h, stride_w, 1],\n                           padding=padding)\n    biases = _variable_on_cpu('biases', [num_output_channels],\n                              tf.constant_initializer(0.0))\n    outputs = tf.nn.bias_add(outputs, biases)\n    \n    if bn:\n      outputs = batch_norm_for_conv3d(outputs, is_training,\n                                      bn_decay=bn_decay, scope='bn')\n\n    if activation_fn is not None:\n      outputs = activation_fn(outputs)\n    return outputs\n\ndef fully_connected(inputs,\n                    num_outputs,\n                    scope,\n                    use_xavier=True,\n                    stddev=1e-3,\n                    weight_decay=None,\n                    activation_fn=tf.nn.relu,\n                    bn=False,\n                    bn_decay=None,\n                    is_training=None):\n  \"\"\" Fully connected layer with non-linear operation.\n  \n  Args:\n    inputs: 2-D tensor BxN\n    num_outputs: int\n  \n  Returns:\n    Variable tensor of size B x num_outputs.\n  \"\"\"\n  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:\n    num_input_units = inputs.get_shape()[-1].value\n    weights = _variable_with_weight_decay('weights',\n                                          shape=[num_input_units, num_outputs],\n                                          use_xavier=use_xavier,\n                                          stddev=stddev,\n                                          wd=weight_decay)\n    outputs = tf.matmul(inputs, weights)\n    biases = _variable_on_cpu('biases', [num_outputs],\n                             tf.constant_initializer(0.0))\n    outputs = tf.nn.bias_add(outputs, biases)\n     \n    if bn:\n      outputs = batch_norm_for_fc(outputs, is_training, bn_decay, 'bn')\n\n    if activation_fn is not None:\n      outputs = activation_fn(outputs)\n    return outputs\n\n\ndef max_pool2d(inputs,\n               kernel_size,\n               scope,\n               stride=[2, 2],\n               padding='VALID'):\n  \"\"\" 2D max pooling.\n\n  Args:\n    inputs: 4-D tensor BxHxWxC\n    kernel_size: a list of 2 ints\n    stride: a list of 2 ints\n  \n  Returns:\n    Variable tensor\n  \"\"\"\n  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:\n    kernel_h, kernel_w = kernel_size\n    stride_h, stride_w = stride\n    outputs = tf.nn.max_pool(inputs,\n                             ksize=[1, kernel_h, kernel_w, 1],\n                             strides=[1, stride_h, stride_w, 1],\n                             padding=padding,\n                             name=sc.name)\n    return outputs\n\ndef avg_pool2d(inputs,\n               kernel_size,\n               scope,\n               stride=[2, 2],\n               padding='VALID'):\n  \"\"\" 2D avg pooling.\n\n  Args:\n    inputs: 4-D tensor BxHxWxC\n    kernel_size: a list of 2 ints\n    stride: a list of 2 ints\n  \n  Returns:\n    Variable tensor\n  \"\"\"\n  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:\n    kernel_h, kernel_w = kernel_size\n    stride_h, stride_w = stride\n    outputs = tf.nn.avg_pool(inputs,\n                             ksize=[1, kernel_h, kernel_w, 1],\n                             strides=[1, stride_h, stride_w, 1],\n                             padding=padding,\n                             name=sc.name)\n    return outputs\n\n\ndef max_pool3d(inputs,\n               kernel_size,\n               scope,\n               stride=[2, 2, 2],\n               padding='VALID'):\n  \"\"\" 3D max pooling.\n\n  Args:\n    inputs: 5-D tensor BxDxHxWxC\n    kernel_size: a list of 3 ints\n    stride: a list of 3 ints\n  \n  Returns:\n    Variable tensor\n  \"\"\"\n  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:\n    kernel_d, kernel_h, kernel_w = kernel_size\n    stride_d, stride_h, stride_w = stride\n    outputs = tf.nn.max_pool3d(inputs,\n                               ksize=[1, kernel_d, kernel_h, kernel_w, 1],\n                               strides=[1, stride_d, stride_h, stride_w, 1],\n                               padding=padding,\n                               name=sc.name)\n    return outputs\n\ndef avg_pool3d(inputs,\n               kernel_size,\n               scope,\n               stride=[2, 2, 2],\n               padding='VALID'):\n  \"\"\" 3D avg pooling.\n\n  Args:\n    inputs: 5-D tensor BxDxHxWxC\n    kernel_size: a list of 3 ints\n    stride: a list of 3 ints\n  \n  Returns:\n    Variable tensor\n  \"\"\"\n  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:\n    kernel_d, kernel_h, kernel_w = kernel_size\n    stride_d, stride_h, stride_w = stride\n    outputs = tf.nn.avg_pool3d(inputs,\n                               ksize=[1, kernel_d, kernel_h, kernel_w, 1],\n                               strides=[1, stride_d, stride_h, stride_w, 1],\n                               padding=padding,\n                               name=sc.name)\n    return outputs\n\n\ndef batch_norm_template_unused(inputs, is_training, scope, moments_dims, bn_decay):\n  \"\"\" NOTE: this is older version of the util func. it is deprecated.\n  Batch normalization on convolutional maps and beyond...\n  Ref.: http://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow\n  \n  Args:\n      inputs:        Tensor, k-D input ... x C could be BC or BHWC or BDHWC\n      is_training:   boolean tf.Varialbe, true indicates training phase\n      scope:         string, variable scope\n      moments_dims:  a list of ints, indicating dimensions for moments calculation\n      bn_decay:      float or float tensor variable, controling moving average weight\n  Return:\n      normed:        batch-normalized maps\n  \"\"\"\n  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:\n    num_channels = inputs.get_shape()[-1].value\n    beta = _variable_on_cpu(name='beta',shape=[num_channels],\n                            initializer=tf.constant_initializer(0))\n    gamma = _variable_on_cpu(name='gamma',shape=[num_channels],\n                            initializer=tf.constant_initializer(1.0))\n    batch_mean, batch_var = tf.nn.moments(inputs, moments_dims, name='moments')\n    decay = bn_decay if bn_decay is not None else 0.9\n    ema = tf.train.ExponentialMovingAverage(decay=decay)\n    # Operator that maintains moving averages of variables.\n    # Need to set reuse=False, otherwise if reuse, will see moments_1/mean/ExponentialMovingAverage/ does not exist\n    # https://github.com/shekkizh/WassersteinGAN.tensorflow/issues/3\n    with tf.variable_scope(tf.get_variable_scope(), reuse=False):\n        ema_apply_op = tf.cond(is_training,\n                               lambda: ema.apply([batch_mean, batch_var]),\n                               lambda: tf.no_op())\n    \n    # Update moving average and return current batch's avg and var.\n    def mean_var_with_update():\n      with tf.control_dependencies([ema_apply_op]):\n        return tf.identity(batch_mean), tf.identity(batch_var)\n    \n    # ema.average returns the Variable holding the average of var.\n    mean, var = tf.cond(is_training,\n                        mean_var_with_update,\n                        lambda: (ema.average(batch_mean), ema.average(batch_var)))\n    normed = tf.nn.batch_normalization(inputs, mean, var, beta, gamma, 1e-3)\n  return normed\n\n\ndef batch_norm_template(inputs, is_training, scope, moments_dims_unused, bn_decay, data_format='NHWC'):\n  \"\"\" Batch normalization on convolutional maps and beyond...\n  Ref.: http://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow\n  \n  Args:\n      inputs:        Tensor, k-D input ... x C could be BC or BHWC or BDHWC\n      is_training:   boolean tf.Varialbe, true indicates training phase\n      scope:         string, variable scope\n      moments_dims:  a list of ints, indicating dimensions for moments calculation\n      bn_decay:      float or float tensor variable, controling moving average weight\n      data_format:   'NHWC' or 'NCHW'\n  Return:\n      normed:        batch-normalized maps\n  \"\"\"\n  bn_decay = bn_decay if bn_decay is not None else 0.9\n  return tf.contrib.layers.batch_norm(inputs, \n                                      center=True, scale=True,\n                                      is_training=is_training, decay=bn_decay,updates_collections=None,\n                                      scope=scope,\n                                      data_format=data_format)\n\n\ndef batch_norm_for_fc(inputs, is_training, bn_decay, scope):\n  \"\"\" Batch normalization on FC data.\n  \n  Args:\n      inputs:      Tensor, 2D BxC input\n      is_training: boolean tf.Varialbe, true indicates training phase\n      bn_decay:    float or float tensor variable, controling moving average weight\n      scope:       string, variable scope\n  Return:\n      normed:      batch-normalized maps\n  \"\"\"\n  return batch_norm_template(inputs, is_training, scope, [0,], bn_decay)\n\n\ndef batch_norm_for_conv1d(inputs, is_training, bn_decay, scope, data_format):\n  \"\"\" Batch normalization on 1D convolutional maps.\n  \n  Args:\n      inputs:      Tensor, 3D BLC input maps\n      is_training: boolean tf.Varialbe, true indicates training phase\n      bn_decay:    float or float tensor variable, controling moving average weight\n      scope:       string, variable scope\n      data_format: 'NHWC' or 'NCHW'\n  Return:\n      normed:      batch-normalized maps\n  \"\"\"\n  return batch_norm_template(inputs, is_training, scope, [0,1], bn_decay, data_format)\n\n\n\n  \ndef batch_norm_for_conv2d(inputs, is_training, bn_decay, scope, data_format):\n  \"\"\" Batch normalization on 2D convolutional maps.\n  \n  Args:\n      inputs:      Tensor, 4D BHWC input maps\n      is_training: boolean tf.Varialbe, true indicates training phase\n      bn_decay:    float or float tensor variable, controling moving average weight\n      scope:       string, variable scope\n      data_format: 'NHWC' or 'NCHW'\n  Return:\n      normed:      batch-normalized maps\n  \"\"\"\n  return batch_norm_template(inputs, is_training, scope, [0,1,2], bn_decay, data_format)\n\n\ndef batch_norm_for_conv3d(inputs, is_training, bn_decay, scope):\n  \"\"\" Batch normalization on 3D convolutional maps.\n  \n  Args:\n      inputs:      Tensor, 5D BDHWC input maps\n      is_training: boolean tf.Varialbe, true indicates training phase\n      bn_decay:    float or float tensor variable, controling moving average weight\n      scope:       string, variable scope\n  Return:\n      normed:      batch-normalized maps\n  \"\"\"\n  return batch_norm_template(inputs, is_training, scope, [0,1,2,3], bn_decay)\n\n\ndef dropout(inputs,\n            is_training,\n            scope,\n            keep_prob=0.5,\n            noise_shape=None):\n  \"\"\" Dropout layer.\n\n  Args:\n    inputs: tensor\n    is_training: boolean tf.Variable\n    scope: string\n    keep_prob: float in [0,1]\n    noise_shape: list of ints\n\n  Returns:\n    tensor variable\n  \"\"\"\n  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:\n    outputs = tf.cond(is_training,\n                      lambda: tf.nn.dropout(inputs, keep_prob, noise_shape),\n                      lambda: inputs)\n    return outputs\n"
  },
  {
    "path": "src/iou_matching.py",
    "content": "# vim: expandtab:ts=4:sw=4\nfrom __future__ import absolute_import\nimport numpy as np\nimport linear_assignment\nimport pdb\n\n\ndef iou(bbox, candidates):\n    \"\"\"Computer intersection over union.\n\n    Parameters\n    ----------\n    bbox : ndarray\n        A bounding box in format `(top left x, top left y, width, height)`.\n    candidates : ndarray\n        A matrix of candidate bounding boxes (one per row) in the same format\n        as `bbox`.\n\n    Returns\n    -------\n    ndarray\n        The intersection over union in [0, 1] between the `bbox` and each\n        candidate. A higher score means a larger fraction of the `bbox` is\n        occluded by the candidate.\n\n    \"\"\"\n    bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:]\n    candidates_tl = candidates[:, :2]\n    candidates_br = candidates[:, :2] + candidates[:, 2:]\n\n    tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis],\n               np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]]\n    br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis],\n               np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]]\n    wh = np.maximum(0., br - tl)\n\n    area_intersection = wh.prod(axis=1)\n    area_bbox = bbox[2:].prod()\n    area_candidates = candidates[:, 2:].prod(axis=1)\n    return area_intersection / (area_bbox + area_candidates - area_intersection)\n\n\ndef iou_cost(tracks, detections, track_indices=None,\n             detection_indices=None, use3d=False, kf=None):\n    \"\"\"An intersection over union distance metric.\n\n    Parameters\n    ----------\n    tracks : List[deep_sort.track.Track]\n        A list of tracks.\n    detections : List[deep_sort.detection.Detection]\n        A list of detections.\n    track_indices : Optional[List[int]]\n        A list of indices to tracks that should be matched. Defaults to\n        all `tracks`.\n    detection_indices : Optional[List[int]]\n        A list of indices to detections that should be matched. Defaults\n        to all `detections`.\n    box_expansion_factor:\n        Multiplier for box size to bias towards higher recall\n\n    Returns\n    -------\n    ndarray\n        Returns a cost matrix of shape\n        len(track_indices), len(detection_indices) where entry (i, j) is\n        `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`.\n\n    \"\"\"\n    if track_indices is None:\n        track_indices = np.arange(len(tracks))\n    if detection_indices is None:\n        detection_indices = np.arange(len(detections))\n\n    cost_matrix = np.zeros((len(track_indices), len(detection_indices)))\n\n    if cost_matrix.shape[0] == 0 or cost_matrix.shape[1] == 0:\n        return cost_matrix\n    if use3d:\n        # Convert 3d detctions to tlwh format\n        # @TODO: Should use a Detection3D class to do this\n        candidates = np.array([detections[i].box_3d for i in detection_indices])\n        candidates[:,:2] -= candidates[:,3:5] / 2\n        candidates = candidates[:, [0,2,3,5]]\n    else:\n        candidates = np.asarray([detections[i].tlwh for i in detection_indices])\n\n    for row, track_idx in enumerate(track_indices):\n        if use3d:\n            bbox = tracks[track_idx].to_tlwh3d()\n            bbox[:2] -= bbox[3:5] / 2\n            bbox = bbox[[0,2,3,5]]\n        else:\n            bbox = tracks[track_idx].to_tlwh(kf)\n        cost_matrix[row, :] = 1. - iou(bbox, candidates)\n    return cost_matrix\n"
  },
  {
    "path": "src/kf_2d.py",
    "content": "# vim: expandtab:ts=4:sw=4\nimport numpy as np\nimport scipy.linalg\nimport EKF\nimport pdb\nnp.set_printoptions(precision=4, suppress=True)\n\nclass KalmanFilter2D(EKF.EKF):\n    \"\"\"\n    A simple Kalman filter for tracking bounding boxes in image space.\n\n    The 8-dimensional state space\n\n        x, y, w, h, vx, vy, vw, vh\n\n    contains the bounding box center position (x, y), width w, height h,\n    and their respective velocities.\n\n    Object motion follows a constant velocity model. The bounding box location\n    (x, y, w, h) is taken as direct observation of the state space (linear\n    observation model).\n\n    \"\"\"\n\n    def __init__(self, pos_weight, velocity_weight, std_process, std_measurement, initial_uncertainty, gate_limit):\n        ndim, dt = 4, 1.\n        self.ndim = ndim\n        self.img_center = 1242\n        # Create Kalman filter model matrices.\n        # Motion model is constant velocity, i.e. x = x + Vx*dt\n        self._motion_mat = np.eye(2 * ndim, 2 * ndim)\n        for i in range(ndim):\n            self._motion_mat[i, ndim + i] = dt\n        # Sensor model is direct observation, i.e. x = x\n        self._observation_mat = np.eye(ndim, 2 * ndim)\n\n        # Motion and observation uncertainty are chosen relative to the current\n        # state estimate. These weights control the amount of uncertainty in\n        # the model. This is a bit hacky.\n        self._std_weight_process = std_process\n        self._std_weight_measurement = std_measurement\n        self._std_weight_pos = pos_weight\n        self._std_weight_vel = velocity_weight\n        self._initial_uncertainty = initial_uncertainty\n        self.LIMIT = gate_limit\n\n    def initiate(self, measurement, flow):\n        \"\"\"Create track from unassociated measurement.\n\n        Parameters\n        ----------\n        measurement : ndarray\n            Bounding box coordinates (x, y, a, h) with center position (x, y),\n            aspect ratio a, and height h.\n\n        Returns\n        -------\n        (ndarray, ndarray)\n            Returns the mean vector (8 dimensional) and covariance matrix (8x8\n            dimensional) of the new track. Unobserved velocities are initialized\n            to 0 mean.\n\n        \"\"\"\n        mean_pos = measurement\n        mean_vel = np.zeros_like(mean_pos)\n        if flow is not None:\n            vel = np.mean(np.reshape(flow[int(mean_pos[1]):int(mean_pos[1]+mean_pos[3]), \n                    int(mean_pos[0]):int(mean_pos[0]+mean_pos[2]), :], (-1, 2)), axis=0)\n            mean_vel[:2] = vel\n        mean = np.r_[mean_pos, mean_vel]\n\n        # Initialize covariance based on w, h and configured std\n        std = [\n            (1 + abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_pos * measurement[2],\n            (1 + abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_pos * measurement[3],\n            (1 + abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_pos * measurement[2],\n            (1 + abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_pos * measurement[3],\n\n            (1 + 1.5*abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_vel * measurement[2],\n            (1 + 1.5*abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_vel * measurement[3],\n            (1 + 1.5*abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_vel * measurement[2],\n            (1 + 1.5*abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_vel * measurement[3]]\n\n        covariance = np.diag(np.square(std))*(self._initial_uncertainty*self._std_weight_process)**2\n        return mean, covariance\n\n    def predict_mean(self, mean):\n        # Updates predicted state from previous state (function g)\n        # Calculates motion update Jacobian (Gt)\n        # Returns (g(mean), Gt)\n        return np.dot(self._motion_mat, mean)\n    \n    def predict_covariance(self, mean, covariance):\n        # Updates predicted state from previous state (function g)\n        # Calculates motion update Jacobian (Gt)\n        # Returns (g(mean), Gt)\n        process_noise = self.get_process_noise(mean)\n        return (np.linalg.multi_dot((self._motion_mat, covariance, self._motion_mat.T)) \n                     + process_noise)\n    def get_process_noise(self, mean):\n        # Returns Rt the motion noise covariance\n\n        # Motion uncertainty scaled by estimated height\n        std_pos = [\n            (1 + abs(mean[0]/self.img_center - 1)) * self._std_weight_pos * mean[2],\n            (1 + abs(mean[0]/self.img_center - 1)) * self._std_weight_pos * mean[3],\n            (1 + abs(mean[0]/self.img_center - 1)) * self._std_weight_pos * mean[2],\n            (1 + abs(mean[0]/self.img_center - 1)) * self._std_weight_pos * mean[3]]\n        std_vel = [\n            (1 + abs(mean[0]/self.img_center - 1)) * self._std_weight_vel * mean[2],\n            (1 + abs(mean[0]/self.img_center - 1)) * self._std_weight_vel * mean[3],\n            (1 + abs(mean[0]/self.img_center - 1)) * self._std_weight_vel * mean[2],\n            (1 + abs(mean[0]/self.img_center - 1)) * self._std_weight_vel * mean[3]]\n        motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))*self._std_weight_process**2\n\n        return motion_cov\n\n    def project_mean(self, mean):\n        # Measurement prediction from state (function h)\n        # Calculations sensor update Jacobian (Ht)\n        # Returns (h(mean), Ht)\n        return np.dot(self._observation_mat, mean)\n\n\n\n    def get_measurement_noise(self, measurement):\n        # Returns Qt the sensor noise covariance\n                \n        # Measurement uncertainty scaled by estimated height\n        std = [\n                self._std_weight_pos*measurement[2],\n                self._std_weight_pos*measurement[3],\n                self._std_weight_pos*measurement[2],\n                self._std_weight_pos*measurement[3]]\n        innovation_cov = np.diag(np.square(std))*self._std_weight_measurement**2\n        return innovation_cov\n    \n    def project_cov(self, mean, covariance):\n        # Returns S the innovation covariance (projected covariance)\n                \n        measurement_noise = self.get_measurement_noise(mean)\n        innovation_cov = (np.linalg.multi_dot((self._observation_mat, covariance,\n                                          self._observation_mat.T))\n                     + measurement_noise)\n        return innovation_cov\n\n    def gating_distance(self, mean, covariance, measurements,\n                        only_position=False, use_3d=False):\n        \"\"\"Compute gating distance between state distribution and measurements.\n\n        A suitable distance threshold can be obtained from `chi2inv95`. If\n        `only_position` is False, the chi-square distribution has 4 degrees of\n        freedom, otherwise 2.\n\n        Parameters\n        ----------\n        mean : ndarray\n            Mean vector over the state distribution (8 dimensional).\n        covariance : ndarray\n            Covariance of the state distribution (8x8 dimensional).\n        measurements : ndarray\n            An Nx4 dimensional matrix of N measurements, each in\n            format (x, y, a, h) where (x, y) is the bounding box center\n            position, a the aspect ratio, and h the height.\n        only_position : Optional[bool]\n            If True, distance computation is done with respect to the bounding\n            box center position only.\n\n        Returns\n        -------\n        ndarray\n            Returns an array of length N, where the i-th element contains the\n            squared Mahalanobis distance between (mean, covariance) and\n            `measurements[i]`.\n\n        \"\"\"\n        projected_mean, projected_covariance = self.project(mean, covariance)\n        if only_position:\n            projected_mean, projected_covariance = projected_mean[:2], projected_covariance[:2, :2]\n            measurements = measurements[:, :2]\n        max_val = np.amax(projected_covariance)\n        # LIMIT = max(mean[2], mean[3]) #*(1 + abs(3*mean[0]/self.img_center - 1))\n        if max_val > self.LIMIT:\n            projected_covariance *= self.LIMIT / max_val\n        return EKF.squared_mahalanobis_distance(projected_mean, projected_covariance, measurements)\n\nclass RandomWalkKalmanFilter2D(KalmanFilter2D): #TODO UPDATE THIS DOCUMENTATION\n    \"\"\"\n    A simple Kalman filter for tracking bounding boxes in image space.\n\n    The 8-dimensional state space\n\n        x, y, w, h\n\n    contains the bounding box center position (x, y), aspect ratio a, height h,\n    and their respective velocities.\n\n    Object motion follows a constant velocity model. The bounding box location\n    (x, y, a, h) is taken as direct observation of the state space (linear\n    observation model).\n\n    \"\"\"\n    def __init__(self, pos_weight, velocity_weight, std_process, std_measurement, initial_uncertainty, img_center=1242):\n        ndim, dt = 4, 1.\n        self.ndim = ndim\n        self.img_center = img_center\n        # Create Kalman filter model matrices.\n        # Motion model is constant velocity, i.e. x = x + Vx*dt\n        self._motion_mat = np.eye(2*ndim, 2*ndim)\n        self._motion_mat[ndim:, ndim:] = 0\n        # Sensor model is direct observation, i.e. x = x\n        self._observation_mat = np.eye(ndim, 2*ndim)\n\n        # Motion and observation uncertainty are chosen relative to the current\n        # state estimate. These weights control the amount of uncertainty in\n        # the model. This is a bit hacky.\n        self._std_weight_process = std_process\n        self._std_weight_measurement = std_measurement\n        self._std_weight_pos = pos_weight\n        self._std_weight_vel = velocity_weight\n        self._initial_uncertainty = initial_uncertainty\n"
  },
  {
    "path": "src/linear_assignment.py",
    "content": "# vim: expandtab:ts=4:sw=4\nfrom __future__ import absolute_import\nimport numpy as np\nfrom sklearn.utils.linear_assignment_ import linear_assignment\nimport EKF\nimport pdb\nfrom mbest_ilp import new_m_best_sol\nfrom multiprocessing import Pool\nfrom functools import partial\n#from mbest_ilp import m_best_sol as new_m_best_sol\n\nINFTY_COST = 1e+5\nAPP_COUNT = 0\nIOU_COUNT = 0\ndef min_marg_matching(marginalizations, track_indices=None, max_distance=1):\n    cost_matrix = 1 - marginalizations\n    num_tracks, num_detections = cost_matrix.shape\n    if track_indices is None:\n        track_indices = np.arange(num_tracks)\n\n    detection_indices = np.arange(num_detections-1)\n\n    if num_tracks == 0 or num_detections == 0:\n        return [], track_indices, detection_indices  # Nothing to match.\n\n    extra_dummy_cols = np.tile(cost_matrix[:,0,np.newaxis], (1, num_tracks-1))\n    expanded_cost_matrix = np.hstack((extra_dummy_cols, cost_matrix))\n    indices = linear_assignment(expanded_cost_matrix)\n\n    matches, unmatched_tracks, unmatched_detections = [], [], []\n\n    # gather unmatched detections (new track)\n    for col, detection_idx in enumerate(detection_indices):\n        if col+num_tracks not in indices[:, 1]:\n            unmatched_detections.append(detection_idx)\n\n    # gather unmatched tracks (no detection)\n    for row, track_idx in enumerate(track_indices):\n        if row not in indices[:, 0]:\n            unmatched_tracks.append(track_idx)\n\n    # thresholding and matches\n    for row, col in indices:\n\n        track_idx = track_indices[row]\n        detection_idx = col - num_tracks\n        if detection_idx < 0:\n            unmatched_tracks.append(track_idx)\n            continue\n\n        if expanded_cost_matrix[row, col] > max_distance:\n            # apply thresholding\n            unmatched_tracks.append(track_idx)\n            unmatched_detections.append(detection_idx)\n        else:\n            # associate matches\n            matches.append((track_idx, detection_idx))\n\n    return matches, unmatched_tracks, unmatched_detections\n\ndef min_cost_matching(\n        distance_metric, max_distance, tracks, detections, track_indices=None,\n        detection_indices=None, compare_2d = False, detections_3d=None):\n    \"\"\"Solve linear assignment problem.\n\n    Parameters\n    ----------\n    distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray\n        The distance metric is given a list of tracks and detections as well as\n        a list of N track indices and M detection indices. The metric should\n        return the NxM dimensional cost matrix, where element (i, j) is the\n        association cost between the i-th track in the given track indices and\n        the j-th detection in the given detection_indices.\n    max_distance : float\n        Gating threshold. Associations with cost larger than this value are\n        disregarded.\n    tracks : List[track.Track]\n        A list of predicted tracks at the current time step.\n    detections : List[detection.Detection]\n        A list of detections at the current time step.\n    track_indices : List[int]\n        List of track indices that maps rows in `cost_matrix` to tracks in\n        `tracks` (see description above).\n    detection_indices : List[int]\n        List of detection indices that maps columns in `cost_matrix` to\n        detections in `detections` (see description above).\n\n    Returns\n    -------\n    (List[(int, int)], List[int], List[int])\n        Returns a tuple with the following three entries:\n        * A list of matched track and detection indices.\n        * A list of unmatched track indices.\n        * A list of unmatched detection indices.\n\n    \"\"\"\n    if track_indices is None:\n        track_indices = np.arange(len(tracks))\n    if detection_indices is None:\n        detection_indices = np.arange(len(detections))\n\n    if len(detection_indices) == 0 or len(track_indices) == 0:\n        return [], track_indices, detection_indices  # Nothing to match.\n\n    cost_matrix = distance_metric(\n        tracks, detections, track_indices, detection_indices, compare_2d, detections_3d)\n    cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5\n\n    #print(\"\\n\\nCascade Cost Matrix: \", cost_matrix)\n\n    indices = linear_assignment(cost_matrix)\n\n    matches, unmatched_tracks, unmatched_detections = [], [], []\n\n    # gather unmatched detections (new track)\n    for col, detection_idx in enumerate(detection_indices):\n        if col not in indices[:, 1]:\n            unmatched_detections.append(detection_idx)\n\n    # gather unmatched trackes (no detection)\n    for row, track_idx in enumerate(track_indices):\n        if row not in indices[:, 0]:\n            unmatched_tracks.append(track_idx)\n\n    # thresholding and matches\n    for row, col in indices:\n\n        track_idx = track_indices[row]\n        detection_idx = detection_indices[col]\n\n        if cost_matrix[row, col] > max_distance:\n            # apply thresholding\n            unmatched_tracks.append(track_idx)\n            unmatched_detections.append(detection_idx)\n        else:\n            # associate matches\n            matches.append((track_idx, detection_idx))\n\n    return matches, unmatched_tracks, unmatched_detections\n\n# @profile\ndef JPDA(\n        distance_metric, dummy_node_cost_app, dummy_node_cost_iou, tracks, detections, track_indices=None,\n        detection_indices=None, m=1, compare_2d = False, windowing = False):\n    \"\"\"Solve linear assignment problem.\n\n    Parameters\n    ----------\n    distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray\n        The distance metric is given a list of tracks and detections as well as\n        a list of N track indices and M detection indices. The metric should\n        return the NxM dimensional cost matrix, where element (i, j) is the\n        association cost between the i-th track in the given track indices and\n        the j-th detection in the given detection_indices.\n    max_distance : float\n        Gating threshold. Associations with cost larger than this value are\n        disregarded.\n    tracks : List[track.Track]\n        A list of predicted tracks at the current time step.\n    detections : List[detection.Detection]\n        A list of detections at the current time step.\n    track_indices : List[int]\n        List of track indices that maps rows in `cost_matrix` to tracks in\n        `tracks` (see description above).\n    detection_indices : List[int]\n        List of detection indices that maps columns in `cost_matrix` to\n        detections in `detections` (see description above).\n\n    Returns\n    -------\n    (List[(int, int)], List[int], List[int])\n        Returns a tuple with the following three entries:\n        * A list of matched track and detection indices.\n        * A list of unmatched track indices.\n        * A list of unmatched detection indices.\n\n    \"\"\"\n    if track_indices is None:\n        track_indices = np.arange(len(tracks))\n    if detection_indices is None:\n        detection_indices = np.arange(len(detections))\n\n    if len(detection_indices) == 0 or len(track_indices) == 0:\n        return np.zeros((0, len(detections) + 1))  # Nothing to match.\n    cost_matrix, gate_mask = distance_metric(\n        tracks, detections, track_indices, detection_indices, compare_2d)\n\n    num_tracks, num_detections = cost_matrix.shape[0], cost_matrix.shape[1]\n    cost_matrix[gate_mask] = INFTY_COST\n\n    clusters = find_clusters(cost_matrix[:,:,0], INFTY_COST - 0.0001)\n\n    jpda_output = []\n    for cluster in clusters:\n        jpda_output.append(get_JPDA_output(cluster, cost_matrix, dummy_node_cost_app, dummy_node_cost_iou, INFTY_COST - 0.0001, m))\n    if not jpda_output:\n        mc = np.zeros((num_tracks, num_detections + 1))\n        mc[:, 0] = 1\n        return mc\n    assignments, assignment_cost = zip(*jpda_output)\n    assignments = np.vstack([item for sublist in assignments for item in sublist])\n    assignment_cost = np.array([item for sublist in assignment_cost for item in sublist])\n\n    marginalised_cost = np.sum(assignments*np.exp(-np.expand_dims(assignment_cost, 1)), axis = 0)\n    marginalised_cost = np.reshape(marginalised_cost, (num_tracks, num_detections+1))\n    return marginalised_cost\n\ndef calculate_entropy(matrix, idx, idy):\n    mask = np.ones(matrix.shape)\n    mask[idx, idy] = 0\n    entropy = matrix/np.sum(mask*matrix, axis=1, keepdims=True)\n    entropy = (-entropy*np.log(entropy)) * mask\n    entropy = np.mean(np.sum(entropy, axis=1))\n    return entropy\n\ndef get_JPDA_output(cluster, cost_matrix, dummy_node_cost_app, dummy_node_cost_iou, cutoff, m):\n    if len(cluster[1]) == 0:\n        assignment = np.zeros((cost_matrix.shape[0], cost_matrix.shape[1]+1))\n        assignment[cluster[0], 0] = 1\n        assignment = assignment.reshape(1,-1)\n        return [assignment], np.array([0])\n\n    new_cost_matrix_appearance = np.reshape(cost_matrix[np.repeat(cluster[0], len(cluster[1])),\n                                        np.tile(cluster[1] - 1, len(cluster[0])),\n                                        [0]*(len(cluster[1])*len(cluster[0]))],\n                                        (len(cluster[0]), len(cluster[1])))\n    new_cost_matrix_iou = np.reshape(cost_matrix[np.repeat(cluster[0], len(cluster[1])), np.tile(cluster[1] - 1, len(cluster[0])), 1],\n                (len(cluster[0]), len(cluster[1])))\n    idx_x, idx_y = np.where(new_cost_matrix_appearance > cutoff)\n    appearance_entropy = calculate_entropy(new_cost_matrix_appearance, idx_x, idx_y)\n    iou_entropy = calculate_entropy(new_cost_matrix_iou, idx_x, idx_y)\n    if appearance_entropy < iou_entropy:\n        new_cost_matrix = new_cost_matrix_appearance\n        # new_cost_matrix = 2*np.ones(new_cost_matrix.shape)/(new_cost_matrix+1) - 1\n        global APP_COUNT\n        APP_COUNT += 1\n        dummy_node_cost = dummy_node_cost_app\n    else:\n        global IOU_COUNT\n        IOU_COUNT += 1\n        new_cost_matrix = new_cost_matrix_iou\n        new_cost_matrix[new_cost_matrix==1] -= 1e-3\n        new_cost_matrix = 1 - new_cost_matrix\n        dummy_node_cost = -np.log(1-dummy_node_cost_iou)\n        new_cost_matrix = -np.log(new_cost_matrix)\n    new_cost_matrix[idx_x, idx_y] = cutoff\n    if len(cluster[0]) == 1:\n        new_cost_matrix = np.concatenate([np.ones((new_cost_matrix.shape[0], 1))*dummy_node_cost, new_cost_matrix], axis = 1)\n        total_cost = np.sum(np.exp(-new_cost_matrix))\n        new_assignment = np.zeros((cost_matrix.shape[0], cost_matrix.shape[1]+1))\n        new_assignment[np.repeat(cluster[0], len(cluster[1])+1), np.tile(\n                        np.concatenate([np.zeros(1, dtype = np.int32), cluster[1]]), len(cluster[0]))] = np.exp(-new_cost_matrix)/total_cost\n        new_assignment = new_assignment.reshape(1, -1)\n        return  [new_assignment], np.array([0])\n    if new_cost_matrix.ndim <= 1:\n        new_cost_matrix = np.expand_dims(new_cost_matrix, 1)\n\n    # print(new_cost_matrix)\n    assignments, assignment_cost = new_m_best_sol(new_cost_matrix, m, dummy_node_cost)\n    offset = np.amin(assignment_cost)\n    assignment_cost -= offset\n    new_assignments = []\n    total_cost = np.sum(np.exp(-assignment_cost))\n    for assignment in assignments:\n        new_assignment = np.zeros((cost_matrix.shape[0], cost_matrix.shape[1]+1))\n        new_assignment[np.repeat(cluster[0], len(cluster[1])+1), np.tile(\n                    np.concatenate([np.zeros(1, dtype = np.int32), cluster[1]]), len(cluster[0]))] = \\\n                                                assignment/total_cost\n        new_assignments.append(new_assignment.reshape(1, -1))\n    return new_assignments, assignment_cost\n\n\ndef matching_cascade(\n        distance_metric, max_distance, cascade_depth, tracks, detections,\n        track_indices=None, detection_indices=None, compare_2d = False, detections_3d=None):\n    \"\"\"Run matching cascade.\n\n    Parameters\n    ----------\n    distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray\n        The distance metric is given a list of tracks and detections as well as\n        a list of N track indices and M detection indices. The metric should\n        return the NxM dimensional cost matrix, where element (i, j) is the\n        association cost between the i-th track in the given track indices and\n        the j-th detection in the given detection indices.\n    max_distance : float\n        Gating threshold. Associations with cost larger than this value are\n        disregarded.\n    cascade_depth: int\n        The cascade depth, should be se to the maximum track age.\n    tracks : List[track.Track]\n        A list of predicted tracks at the current time step.\n    detections : List[detection.Detection]\n        A list of detections at the current time step.\n    track_indices : Optional[List[int]]\n        List of track indices that maps rows in `cost_matrix` to tracks in\n        `tracks` (see description above). Defaults to all tracks.\n    detection_indices : Optional[List[int]]\n        List of detection indices that maps columns in `cost_matrix` to\n        detections in `detections` (see description above). Defaults to all\n        detections.\n\n    Returns\n    -------\n    (List[(int, int)], List[int], List[int])\n        Returns a tuple with the following three entries:\n        * A list of matched track and detection indices.\n        * A list of unmatched track indices.\n        * A list of unmatched detection indices.\n\n    \"\"\"\n    if track_indices is None:\n        track_indices = list(range(len(tracks)))\n    if detection_indices is None:\n        detection_indices = list(range(len(detections)))\n\n    unmatched_detections = detection_indices\n    matches = []\n    for level in range(cascade_depth):\n        if len(unmatched_detections) == 0:  # No detections left\n            break\n\n        track_indices_l = [\n            k for k in track_indices\n            if tracks[k].time_since_update == 1 + level\n        ]\n        if len(track_indices_l) == 0:  # Nothing to match at this level\n            continue\n\n        matches_l, _, unmatched_detections = \\\n            min_cost_matching(\n                distance_metric, max_distance, tracks, detections,\n                track_indices_l, unmatched_detections, compare_2d, detections_3d=detections_3d)\n        matches += matches_l\n    unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))\n    return matches, unmatched_tracks, unmatched_detections\n\n# @profile\ndef gate_cost_matrix(\n        kf, tracks, detections, track_indices, detection_indices,\n        gated_cost=INFTY_COST, only_position=False, use3d=False, windowing = False):\n    \"\"\"Invalidate infeasible entries in cost matrix based on the state\n    distributions obtained by Kalman filtering.\n\n    Parameters\n    ----------\n    kf : The Kalman filter.\n    cost_matrix : ndarray\n        The NxM dimensional cost matrix, where N is the number of track indices\n        and M is the number of detection indices, such that entry (i, j) is the\n        association cost between `tracks[track_indices[i]]` and\n        `detections[detection_indices[j]]`.\n    tracks : List[track.Track]\n        A list of predicted tracks at the current time step.\n    detections : List[detection.Detection]\n        A list of detections at the current time step.\n    track_indices : List[int]\n        List of track indices that maps rows in `cost_matrix` to tracks in\n        `tracks` (see description above).\n    detection_indices : List[int]\n        List of detection indices that maps columns in `cost_matrix` to\n        detections in `detections` (see description above).\n    gated_cost : Optional[float]\n        Entries in the cost matrix corresponding to infeasible associations are\n        set this value. Defaults to a very large value.\n    only_position : Optional[bool]\n        If True, only the x, y position of the state distribution is considered\n        during gating. Defaults to False.\n\n    Returns\n    -------\n    ndarray\n        Returns the modified cost matrix.\n\n    \"\"\"\n\n    # assert (len(track_indices) == cost_matrix.shape[0]), \"Cost matrix shape does not match track indices\"\n    # assert (len(detection_indices) == cost_matrix.shape[1]), \"Cost matrix shape does match detection indices\"\n\n    if len(track_indices) == 0 or len(detection_indices) == 0:\n        return None\n\n    if use3d:\n        measurements = np.array([det.box_3d for i, det in enumerate(detections) if i in detection_indices])\n    else:\n        measurements = np.asarray(\n            [detections[i].to_xywh() for i in detection_indices])\n    if only_position:\n        gating_dim = 2\n    else:\n        gating_dim =  measurements.shape[1]\n    gating_threshold = EKF.chi2inv975[gating_dim]\n    gate_mask = []\n    for track_idx in track_indices:\n        track = tracks[track_idx]\n        gating_distance = kf.gating_distance(\n            track.mean, track.covariance, measurements, only_position, use3d)\n        gate_mask.append(gating_distance > gating_threshold)\n    return np.vstack(gate_mask)\n\ndef find_clusters(cost_matrix, cutoff):\n    num_tracks, _ = cost_matrix.shape\n    clusters = []\n    total_tracks = 0\n    total_detections = 0\n    all_tracks = set(range(num_tracks))\n    all_visited_tracks = set()\n    while total_tracks < num_tracks:\n        visited_detections = set()\n        visited_tracks = set()\n        potential_track = next(iter(all_tracks - all_visited_tracks))\n        potential_tracks = set()\n        potential_tracks.add(potential_track)\n        while potential_tracks:\n            current_track = potential_tracks.pop()\n            visited_detections.update((np.where(cost_matrix[current_track] < cutoff)[0])+1)\n            visited_tracks.add(current_track)\n            for detection in visited_detections:\n                connected_tracks = np.where(cost_matrix[:, detection - 1] < cutoff)[0]\n                for track in connected_tracks:\n                    if track in visited_tracks or track in potential_tracks:\n                        continue\n                    potential_tracks.add(track)\n        total_tracks += len(visited_tracks)\n        total_detections += len(visited_detections)\n        all_visited_tracks.update(visited_tracks)\n        clusters.append((np.array(list(visited_tracks), dtype = np.int32), np.array(list(visited_detections), dtype = np.int32)))\n    return clusters\n"
  },
  {
    "path": "src/mbest_ilp.py",
    "content": "from gurobipy import Model, quicksum, LinExpr, GRB\nimport numpy as np\nimport copy\nimport time\nfrom sklearn.utils.linear_assignment_ import linear_assignment\nimport pickle\nimport itertools\nimport pdb\nfrom copy import deepcopy\nimport math\n\"\"\"\nFn: ilp_assignment\n------------------\nSolves ILP problem using gurobi\n\"\"\"\ndef ilp_assignment(model):\n    \n    model.optimize()\n    if(model.status == 3):\n        return -1\n    return\n\n\"\"\"\nFn: initialize_model\n--------------------\nInitializes gurobi ILP model by setting the base objective\n\"\"\"\n# @profile\ndef initialize_model(cost_matrix, cutoff, model = None):\n    #Add dummy detection\n    cost_matrix = np.insert(cost_matrix,0, np.ones(cost_matrix.shape[0])*cutoff, axis=1)\n    M,N = cost_matrix.shape\n    if model is None:\n        model = Model()\n    else:\n        model.remove(model.getVars())\n        model.remove(model.getConstrs())\n    model.setParam('OutputFlag', False)\n    # y = []\n    # for i in range(M):\n    #     y.append([])\n    #     for j in range(N):\n    #         y[i].append(m.addVar(vtype=GRB.BINARY, name = 'y_%d%d'%(i,j)))\n    y = model.addVars(M,N, vtype=GRB.BINARY, name = 'y')\n    model.setObjective(quicksum(quicksum([y[i,j]*cost_matrix[i][j] for j in range(N)]) for i in range(M)), GRB.MINIMIZE)\n    # for i in range(M):\n    model.addConstrs((quicksum(y[i,j] for j in range(N))==1 for i in range(M)), name='constraint for track')\n    # for j in range(1,N):\n    model.addConstrs((quicksum(y[i,j] for i in range(M))<=1 for j in range(1, N)), name='constraint for detection')\n    y = list(y.values())\n    return model, M, N, y\n\n\"\"\"\nFn: m_best_sol\n--------------\nFinds m_best solutions for object/track association givent the\ninput cost matrix. Solves constrained ILP problems using gurobi solver.\n\"\"\"\ndef cache(func):\n    cache = {}\n    def cached_function(*args):\n        cost_matrix = args[0]\n        cost_matrix = np.hstack((np.ones((cost_matrix.shape[0], 1))*args[1], cost_matrix))\n        if (cost_matrix.shape[0], cost_matrix.shape[1]) in cache:\n            solution_list = cache[(cost_matrix.shape[0], cost_matrix.shape[1])]\n            solution_vals = np.sum(solution_list*cost_matrix.reshape(1, -1), axis = 1)\n            return solution_list, solution_vals\n        else: \n            solution_list, solution_vals = func(*args)\n            cache[(cost_matrix.shape[0], cost_matrix.shape[1])] = solution_list\n            return solution_list, solution_vals\n    return cached_function\n# @profile\ndef num_solutions(cost_matrix):\n    M,N = cost_matrix.shape\n    N += 1\n    count = 0\n    for i in range(min(M+1, N)):\n        count += np.prod(range(M-i+1, M+1))*np.prod(range(N-i, N))//math.factorial(i)\n        if count > 2000:\n            break\n    return int(count)\n\n@cache\ndef enumerate_solutions(cost_matrix, cutoff, num_solutions):\n    # num_solutions = [[2, 3, 4, 5, 6, 7],[3, 7, 13, 21, 31],[4, 13, 34, 73, 136],[5, 21, 73, 209, 501],[6, 31, 136, 501, 1546], [7]]\n    cost_matrix = np.hstack((np.ones((cost_matrix.shape[0], 1))*cutoff, cost_matrix))\n    M,N = cost_matrix.shape\n    solution_list = np.zeros((num_solutions, M, N), dtype = np.int32)\n    solution_list[:, :, 0] = 1\n    count = 0\n    for i in range(min(M+1, N)):\n        for chosen in itertools.combinations(range(M), i):\n            for perm in itertools.permutations(range(1,N), i):\n                if chosen:\n                    solution_list[[count]*len(chosen), chosen, perm] = 1\n                    solution_list[[count]*len(chosen), chosen, [0]*len(chosen)] = 0\n                count += 1\n    solution_vals = np.sum(np.sum(solution_list*np.expand_dims(cost_matrix, 0), axis = 1), axis = 1)\n    solution_list = np.reshape(solution_list, (num_solutions, -1))\n    return solution_list, solution_vals\n\n\ndef new_m_best_sol(cost_matrix, m_sol, cutoff, model = None):\n    sols = num_solutions(cost_matrix)\n    if sols <= 2000:\n        return enumerate_solutions(cost_matrix, cutoff, sols)\n    model, M, N, y = initialize_model(cost_matrix, cutoff, model)\n    X = np.zeros((m_sol, M*N))\n    xv = []\n    if (ilp_assignment(model) == -1):\n        xv.append(0)\n    else:\n        x = model.getAttr(\"X\", y)\n        X[0] = x\n        xv.append(model.objVal)\n    if m_sol > 1:\n        model.addConstr(LinExpr(x,y) <= M-1, name = 'constraint_0')\n        if (ilp_assignment(model) == -1):\n            xv.append(0)\n        else:\n            x = model.getAttr(\"X\", y)\n            X[1] = x\n            xv.append(model.objVal)\n    if m_sol > 2:\n        model.remove(model.getConstrByName('constraint_0'))\n        second_best_solutions = []\n        second_best_solution_vals = []\n        partitions = []\n        j = np.argmax(np.logical_xor(X[0], X[1]))\n        partitions.append([j])\n        partitions.append([j])\n        model.addConstr(y[j]==X[0][j], name = 'partition_constraint')\n        model.addConstr(LinExpr(X[0], y) <= M-1, name = 'non_equality_constraint')\n        ilp_assignment(model)\n        second_best_solutions.append(model.getAttr(\"X\", y))\n        second_best_solution_vals.append(model.objVal)\n        model.remove(model.getConstrByName('non_equality_constraint'))\n        model.remove(model.getConstrByName('partition_constraint'))\n        model.addConstr(y[j]==X[1][j], name = 'partition_constraint')\n        model.addConstr(LinExpr(X[1], y) <= M-1, name = 'non_equality_constraint')\n        ilp_assignment(model)\n        second_best_solution_vals.append(model.objVal)\n        second_best_solutions.append(model.getAttr(\"X\", y))\n        model.remove(model.getConstrByName('non_equality_constraint'))\n        model.remove(model.getConstrByName('partition_constraint'))\n        \n        for m in range(2, m_sol):\n            l_k = np.argmin(second_best_solution_vals)\n            X[m] = second_best_solutions[l_k]\n            xv.append(second_best_solution_vals[l_k])\n            if m==m_sol-1:\n                break\n            j = np.argmax(np.logical_xor(X[m], X[l_k]))\n            parent_partition = partitions[l_k]\n            constrs = []\n            for idx in parent_partition:\n                constrs.append(model.addConstr(y[idx]==X[l_k, idx]))\n            model.addConstr(y[j]==X[m][j], name = 'partition_constraint_new')\n            model.addConstr(LinExpr(X[m], y) <= M-1, name = 'non_equality_constraint')\n            if(ilp_assignment(model) == -1):\n                second_best_solutions.append(np.ones((M,N)))\n                second_best_solution_vals.append(np.inf)\n            else:\n                second_best_solutions.append(model.getAttr(\"X\", y))\n                second_best_solution_vals.append(model.objVal)\n            model.remove(model.getConstrByName('partition_constraint_new'))\n            model.remove(model.getConstrByName('non_equality_constraint'))\n            model.addConstr(LinExpr(X[l_k], y) <= M-1, name = 'non_equality_constraint')\n            model.addConstr(y[j]==X[l_k][j], name = 'partition_constraint_new')\n            if(ilp_assignment(model) == -1):\n                second_best_solution_vals[l_k] = np.inf\n                second_best_solutions[l_k] = np.ones((M,N))\n            else:\n                second_best_solution_vals[l_k] = model.objVal\n                second_best_solutions[l_k] = model.getAttr(\"X\", y)\n            model.remove(model.getConstrByName('partition_constraint_new'))\n            model.remove(model.getConstrByName('non_equality_constraint'))\n            partitions[l_k].append(j)\n            partitions.append(copy.deepcopy(partitions[l_k]))\n            for constr in constrs:\n                model.remove(constr)\n\n\n\n    # X = np.asarray(X)\n    xv = np.asarray(xv)\n    return X, xv\ndef linear_assignment_wrapper(a):\n    return linear_assignment(a)\n\nif __name__=='__main__':\n    # a = np.random.randn(100,100)\n    # # cProfile.run('m_best_sol(a,1,10)', 'mbest.profile')\n    # # cProfile.run('linear_assignment(a)', 'hungarian.profile')\n    # total = 0\n    # for i in range(10):\n    #     start = time.time()\n    #     _, sol_cost = m_best_sol(a, 1, 10)\n    #     end = time.time()\n    #     total+= end-start\n    # print(\"Time for JPDA m=1, is %f\"%(total/10))\n    # total = 0\n    # for i in range(10):\n    #     start = time.time()\n    #     ass = linear_assignment(a)\n    #     end = time.time()\n    #     total+= end-start\n    # print(\"Time for Hungarian, is %f\"%(total/10))\n    \n    np.random.seed(14295)\n    # Check JPDA matches Hungarian\n    # while True:\n    #     print('*******')\n    #     a = np.random.randn(100,100)\n    #     X, _ = new_m_best_sol(a, 1, 10)\n    #     X = np.reshape(X[0], (100,101))[:,1:]\n    #     ass = linear_assignment(a)\n    #     output_hungarian = np.zeros(a.shape)\n    #     output_hungarian[ass[:,0], ass[:, 1]] = 1\n    #     assert(np.all(output_hungarian==X))\n    #\n    # Output to file to check\n\n    #  np.random.seed(14295)\n    # vals = []\n    # a = np.random.randn(5,5)\n    a = np.array([[0.1,0.6,0.2,0.3],[0.4,0.1,0.9,0.4],[0.3,0.5,0.1,0.7],[0.8,0.2,0.2,0.1]])\n    num_solutions(a)\n    # enumerate_solutions(a.shape[0], a.shape[1]+1)\n    # ass = linear_assignment_wrapper(a)\n    # m = Model()\n    sols, vals = new_m_best_sol(a, 100, 10)\n    for i, val in enumerate(vals):\n        print(np.reshape(sols[i], (4,5)), val)\n    # print(np.reshape(sols[1], (4,5)), vals[1])\n    # print(np.reshape(sols[2], (4,5)), vals[2])\n    # print(np.reshape(sols[3], (4,5)), vals[3])\n\n    # with open('test.pkl', 'wb') as f:\n    #     pickle.dump(vals, f)\n"
  },
  {
    "path": "src/nn_matching.py",
    "content": "# vim: expandtab:ts=4:sw=4\nimport numpy as np\nimport pdb\nimport torch\n\ndef _pdist(a, b):\n    \"\"\"Compute pair-wise squared distance between points in `a` and `b`.\n\n    Parameters\n    ----------\n    a : array_like\n        An NxM matrix of N samples of dimensionality M.\n    b : array_like\n        An LxM matrix of L samples of dimensionality M.\n\n    Returns\n    -------\n    ndarray\n        Returns a matrix of size len(a), len(b) such that eleement (i, j)\n        contains the squared distance between `a[i]` and `b[j]`.\n\n    \"\"\"\n    a, b = np.asarray(a), np.asarray(b)\n    if len(a) == 0 or len(b) == 0:\n        return np.zeros((len(a), len(b)))\n    a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1)\n    r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :]\n    r2 = np.clip(r2, 0., float(np.inf))\n    return r2\n\n\ndef _cosine_distance(a, b, data_is_normalized=False):\n    \"\"\"Compute pair-wise cosine distance between points in `a` and `b`.\n\n    Parameters\n    ----------\n    a : array_like\n        An NxM matrix of N samples of dimensionality M.\n    b : array_like\n        An LxM matrix of L samples of dimensionality M.\n    data_is_normalized : Optional[bool]\n        If True, assumes rows in a and b are unit length vectors.\n        Otherwise, a and b are explicitly normalized to lenght 1.\n\n    Returns\n    -------\n    ndarray\n        Returns a matrix of size len(a), len(b) such that eleement (i, j)\n        contains the squared distance between `a[i]` and `b[j]`.\n\n    \"\"\"\n    if not data_is_normalized:\n        a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True)\n        b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True)\n    return 1. - np.dot(a, b.T)\n\ndef _cosine_distance_torch(a, b, data_is_normalized=False):\n    '''\n    _cosine_distance but torched\n    '''\n    if not data_is_normalized:\n        a = a / torch.norm(a, dim=1, keepdim=True)\n        b = b / torch.norm(b, dim=1, keepdim=True)\n    return 1. - torch.matmul(a, torch.transpose(b,0,1))\n\ndef _nn_euclidean_distance(x, y):\n    \"\"\" Helper function for nearest neighbor distance metric (Euclidean).\n\n    Parameters\n    ----------\n    x : ndarray\n        A matrix of N row-vectors (sample points).\n    y : ndarray\n        A matrix of M row-vectors (query points).\n\n    Returns\n    -------\n    ndarray\n        A vector of length M that contains for each entry in `y` the\n        smallest Euclidean distance to a sample in `x`.\n\n    \"\"\"\n    distances = _pdist(x, y)\n    return np.maximum(0.0, distances.min(axis=0))\n\ndef _nn_euclidean_distance_torch(x, y):\n    \"\"\" Helper function for nearest neighbor distance metric (Euclidean).\n\n    Parameters\n    ----------\n    x : ndarray\n        A matrix of N row-vectors (sample points).\n    y : ndarray\n        A matrix of M row-vectors (query points).\n\n    Returns\n    -------\n    ndarray\n        A vector of length M that contains for each entry in `y` the\n        smallest Euclidean distance to a sample in `x`.\n\n    \"\"\"\n    # x = x/((x*x).sum(1, keepdim = True)).sqrt()\n    # y = y/((y*y).sum(1, keepdim = True)).sqrt()\n    sim = (x.unsqueeze(1) - y.unsqueeze(0)).pow(2).sum(2).sqrt()\n    # sim = sim.exp()\n    # sim = (sim - 1)/(sim + 1)\n    sim = torch.min(sim, 0)[0]\n    return sim\n    \ndef _nn_cosine_distance(x, y):\n    \"\"\" Helper function for nearest neighbor distance metric (cosine).\n\n    Parameters\n    ----------\n    x : ndarray\n        A matrix of N row-vectors (sample points).\n    y : ndarray\n        A matrix of M row-vectors (query points).\n\n    Returns\n    -------\n    ndarray\n        A vector of length M that contains for each entry in `y` the\n        smallest cosine distance to a sample in `x`.\n\n    \"\"\"\n    distances = _cosine_distance(x, y)\n    return distances.min(axis=0)\n\ndef _nn_cosine_distance_torch(x,y):\n    '''\n    Same as _nn_cosine_distance except torched\n    '''\n    distances = _cosine_distance_torch(x,y)\n    return torch.min(distances, 0)[0]\n\nclass NearestNeighborDistanceMetric(object):\n    \"\"\"\n    A nearest neighbor distance metric that, for each target, returns\n    the closest distance to any sample that has been observed so far.\n\n    Parameters\n    ----------\n    metric : str\n        Either \"euclidean\" or \"cosine\".\n    matching_threshold: float\n        The matching threshold. Samples with larger distance are considered an\n        invalid match.\n    budget : Optional[int]\n        If not None, fix samples per class to at most this number. Removes\n        the oldest samples when the budget is reached.\n\n    Attributes\n    ----------\n    samples : Dict[int -> List[ndarray]]\n        A dictionary that maps from target identities to the list of samples\n        that have been observed so far.\n\n    \"\"\"\n\n    def __init__(self, metric, budget=None):\n\n\n        if metric == \"euclidean\":\n            self._metric = _nn_euclidean_distance\n            self._metric_torch = _nn_euclidean_distance_torch\n        elif metric == \"cosine\":\n            self._metric = _nn_cosine_distance\n            self._metric_torch = _nn_cosine_distance_torch\n        else:\n            raise ValueError(\n                \"Invalid metric; must be either 'euclidean' or 'cosine'\")\n        self.budget = budget\n        self.samples = {}\n        self.samples_2d = {}\n\n    def partial_fit(self, features, features_2d, targets, targets_2d, active_targets):\n        \"\"\"Update the distance metric with new data.\n\n        Parameters\n        ----------\n        features : ndarray\n            An NxM matrix of N features of dimensionality M.\n        targets : ndarray\n            An integer array of associated target identities.\n        active_targets : List[int]\n            A list of targets that are currently present in the scene.\n\n        \"\"\"\n        for feature, target in zip(features, targets):\n            if feature is not None:\n                self.samples.setdefault(target, []).append(feature)\n            else:\n                self.samples.setdefault(target, [])\n            if self.budget is not None:\n                self.samples[target] = self.samples[target][-self.budget:]\n        self.samples = {k: self.samples[k] for k in active_targets if k in targets}\n        for target in active_targets:\n            self.samples.setdefault(target, [])\n        \n        for feature_2d, target in zip(features_2d, targets_2d):\n            self.samples_2d.setdefault(target, []).append(feature_2d)\n            if self.budget is not None:\n                self.samples_2d[target] = self.samples_2d[target][-self.budget:]\n\n        self.samples_2d = {k: self.samples_2d[k] for k in active_targets}\n\n    def distance(self, features, targets, compare_2d=False):\n        \"\"\"Compute distance between features and targets.\n\n        Parameters\n        ----------\n        features : ndarray\n            An NxM matrix of N features of dimensionality M.\n        targets : List[int]\n            A list of targets to match the given `features` against.\n\n        Returns\n        -------\n        ndarray\n            Returns a cost matrix of shape len(targets), len(features), where\n            element (i, j) contains the closest squared distance between\n            `targets[i]` and `features[j]`.\n\n        \"\"\"\n        cost_matrix = np.zeros((len(targets), len(features)))\n        for i, target in enumerate(targets):\n            if compare_2d:            \n                cost_matrix[i, :] = self._metric(self.samples_2d[target], features)\n            else:\n                cost_matrix[i, :] = self._metric(self.samples[target], features)\n        return cost_matrix\n\n    def distance_torch(self, features, targets, compare_2d=False):\n        '''\n        Same as distance except torched.\n        '''\n        # features = torch.from_numpy(features).cuda()\n        cost_matrix = torch.zeros(len(targets), len(features)).to('cuda:0')\n        for i, target in enumerate(targets):\n            if compare_2d:\n                cost_matrix[i, :] = self._metric_torch(torch.stack(self.samples_2d[target], dim=0), features)\n            else:\n                cost_matrix[i, :] = self._metric_torch(torch.stack(self.samples[target], dim=0), features)\n        return cost_matrix.cpu().numpy()\n\n    def check_samples(self, targets):\n        for target in targets:\n            if len(self.samples[target]) == 0:\n                return True\n        return False\n"
  },
  {
    "path": "src/pointnet_model.py",
    "content": "import os, pdb\nimport tensorflow as tf\ntf.logging.set_verbosity(tf.logging.ERROR)\nimport configparser\nfrom utils.pointnet_transform_nets import input_transform_net, feature_transform_net\nimport utils.pointnet_tf_util as pointnet_tf_util\n\n\nclass PointNet():\n    def __init__(self, config_path):\n        parser = configparser.SafeConfigParser()\n        parser.read(config_path)\n        num_points = parser.getint('general', 'num_point')\n        depth_model_path = parser.get('general', 'depth_model_path')\n\n        with tf.device('/gpu:'+str(0)):\n            self.pointclouds_pl, _ = self.placeholder_inputs(1, num_points)\n            self.is_training_pl = tf.placeholder(tf.bool, shape=())\n\n            # simple model\n            feature = self.get_model(self.pointclouds_pl, self.is_training_pl)\n            self.feature = feature\n            # Add ops to save and restore all the variables.\n        \n        self.saver = tf.train.Saver()\n        #Create session\n        config = tf.ConfigProto()\n        config.gpu_options.allow_growth = True\n        config.allow_soft_placement = True\n        config.log_device_placement = False\n        self.sess = tf.Session(config=config)\n        #Initialize variables\n        self.sess.run(tf.global_variables_initializer())\n        #Restore model weights\n        self.saver.restore(self.sess, depth_model_path)\n\n    def __call__(self, input_point_cloud):\n        feed_dict = {self.pointclouds_pl: input_point_cloud,\n                     self.is_training_pl: False}\n        features = self.sess.run(self.feature,feed_dict=feed_dict)\n        return features\n\n    def placeholder_inputs(self, batch_size, num_point):\n        pointclouds_pl = tf.placeholder(tf.float32, shape=(batch_size, None, 3))\n        labels_pl = tf.placeholder(tf.int32, shape=(batch_size))\n        return pointclouds_pl, labels_pl\n\n\n    def get_model(self, point_cloud, is_training, bn_decay=None):\n        \"\"\" Classification PointNet, input is BxNx3, output Bx40 \"\"\"\n        batch_size = point_cloud.get_shape()[0].value\n        end_points = {}\n\n        with tf.variable_scope('transform_net1', reuse=tf.AUTO_REUSE) as sc:\n            transform = input_transform_net(point_cloud, is_training, bn_decay, K=3)\n        point_cloud_transformed = tf.matmul(point_cloud, transform)\n        input_image = tf.expand_dims(point_cloud_transformed, -1)\n\n        net = pointnet_tf_util.conv2d(input_image, 64, [1,3],\n                             padding='VALID', stride=[1,1],\n                             bn=True, is_training=is_training,\n                             scope='conv1', bn_decay=bn_decay)\n        net = pointnet_tf_util.conv2d(net, 64, [1,1],\n                             padding='VALID', stride=[1,1],\n                             bn=True, is_training=is_training,\n                             scope='conv2', bn_decay=bn_decay)\n\n        with tf.variable_scope('transform_net2', reuse=tf.AUTO_REUSE) as sc:\n            transform = feature_transform_net(net, is_training, bn_decay, K=64)\n        end_points['transform'] = transform\n        net_transformed = tf.matmul(tf.squeeze(net, axis=[2]), transform)\n        net_transformed = tf.expand_dims(net_transformed, [2])\n\n        net = pointnet_tf_util.conv2d(net_transformed, 64, [1,1],\n                             padding='VALID', stride=[1,1],\n                             bn=True, is_training=is_training,\n                             scope='conv3', bn_decay=bn_decay)\n        net = pointnet_tf_util.conv2d(net, 128, [1,1],\n                             padding='VALID', stride=[1,1],\n                             bn=True, is_training=is_training,\n                             scope='conv4', bn_decay=bn_decay)\n        net = pointnet_tf_util.conv2d(net, 1024, [1,1],\n                             padding='VALID', stride=[1,1],\n                             bn=True, is_training=is_training,\n                             scope='conv5', bn_decay=bn_decay)\n\n        # Symmetric function: max pooling\n        net = tf.reduce_max(net, axis = 1)\n\n        net = tf.reshape(net, [batch_size, -1])\n        feature = net\n\n        return feature\n\n\n    def get_loss(self, pred, label, end_points, reg_weight=0.001):\n        \"\"\" pred: B*NUM_CLASSES,\n            label: B, \"\"\"\n        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=pred, labels=label)\n        classify_loss = tf.reduce_mean(loss)\n        tf.summary.scalar('classify loss', classify_loss)\n\n        # Enforce the transformation as orthogonal matrix\n        transform = end_points['transform'] # BxKxK\n        K = transform.get_shape()[1].value\n        mat_diff = tf.matmul(transform, tf.transpose(transform, perm=[0,2,1]))\n        mat_diff = mat_diff - tf.constant(np.eye(K), dtype=tf.float32)\n        mat_diff_loss = tf.nn.l2_loss(mat_diff) \n        tf.summary.scalar('mat loss', mat_diff_loss)\n\n        return classify_loss + mat_diff_loss * reg_weight\n    "
  },
  {
    "path": "src/template 2.py",
    "content": "#!/home/sibot/anaconda2/bin/python\n\"\"\" yolo_bbox_to_sort.py\n    Subscribe to the Yolo 2 bboxes, and publish the detections with a 2d appearance feature used for reidentification\n\"\"\"\nimport time\nimport rospy\nimport sys\nimport torch\nimport numpy as np\nimport os\nfrom std_msgs.msg import Int8\nimport message_filters\nfrom sensor_msgs.msg import Image\nfrom darknet_ros_msgs.msg import BoundingBoxes, BoundingBox\nfrom cv_bridge import CvBridge, CvBridgeError\nfrom aligned_reid_utils import get_image_patches, generate_features, create_appearance_model\nfrom jpda_rospack.msg import detection2d_with_feature_array, detection2d_with_feature\n\nclass Appearance_Features:\n    def __init__(self):\n        self.node_name = \"aligned_reid_feature_generator\"\n        \n        rospy.init_node(self.node_name)\n        rospy.on_shutdown(self.cleanup)\n        apperance_model_ckpt = rospy.get_param('~aligned_reid_model', 'src/jpda_rospack/src/aligned_reid_MOT_weights.pth')\n        self.appearance_model = create_appearance_model(apperance_model_ckpt)\n        \n        self.image_sub = message_filters.Subscriber(\"/ros_indigosdk_node/stitched_image0\", Image, queue_size=2)\n        self.yolo_bbox_sub = message_filters.Subscriber(\"/omni_yolo_bboxes\", BoundingBoxes, queue_size=2)\n        \n        self.time_sync = message_filters.ApproximateTimeSynchronizer([self.yolo_bbox_sub, self.image_sub], 5, 0.1)\n        self.time_sync.registerCallback(self.get_2d_feature)\n    \n        self.cv_bridge = CvBridge()\n        self.feature_2d_pub = rospy.Publisher(\"detection2d_with_feature\", detection2d_with_feature_array, queue_size=1)\n        self.debug_pub = rospy.Publisher(\"/test\", Int8, queue_size=1)\n        rospy.loginfo(\"Ready.\")\n        \n    def get_2d_feature(self, y1_bboxes, ros_image):\n#        rospy.loginfo('Processing Image with AlignedReID')\n        start = time.time()\n        try:\n            input_image = self.cv_bridge.imgmsg_to_cv2(ros_image, \"bgr8\")\n        except CvBridgeError as e:\n            print(e)\n        input_img = torch.from_numpy(input_image).float()\n        input_img = input_img.to('cuda:1')\n        input_img = input_img.permute(2, 0, 1)/255\n        # Generate 2D image feaures for each bounding box\n        detections = []\n        frame_det_ids = []\n        count = 0\n        for y1_bbox in y1_bboxes.bounding_boxes:\n            if y1_bbox.Class == 'person':\n                xmin = y1_bbox.xmin\n                xmax = y1_bbox.xmax\n                ymin = y1_bbox.ymin\n                ymax = y1_bbox.ymax\n                probability = y1_bbox.probability\n                frame_det_ids.append(count)\n                count += 1\n                detections.append([int(xmin), int(ymin), int(xmax), int(ymax), probability, -1, -1])\n        features_2d = detection2d_with_feature_array()\n        features_2d.header.stamp = y1_bboxes.header.stamp\n        features_2d.header.frame_id = 'occam'\n        if not detections:\n            self.feature_2d_pub.publish(features_2d)\n            return\n        image_patches = get_image_patches(input_img, detections)\n        features = generate_features(self.appearance_model, image_patches)\n        \n        for (det, feature, i) in zip(detections, features, frame_det_ids):\n            det_msg = detection2d_with_feature()\n            det_msg.header.stamp = features_2d.header.stamp\n            det_msg.x1 = det[0]\n            det_msg.y1 = det[1]\n            det_msg.x2 = det[2]\n            det_msg.y2 = det[3]\n            det_msg.feature = feature\n            det_msg.valid = True\n            det_msg.frame_det_id = i\n            features_2d.detection2d_with_features.append(det_msg)\n        self.feature_2d_pub.publish(features_2d)\n        # rospy.loginfo(\"Aligned_ReID time: {}\".format(time.time() - start))\n                \n    def cleanup(self):\n        print(\"Shutting down 2D-Appearance node.\")\n    \ndef main(args):       \n    try:\n        Appearance_Features()\n        rospy.spin()\n    except KeyboardInterrupt:\n        print(\"Shutting down 2D-Appearance node.\")\n\nif __name__ == '__main__':\n    main(sys.argv)\n"
  },
  {
    "path": "src/template.py",
    "content": "#!/home/sibot/anaconda2/bin/python\n\"\"\" yolo_bbox_to_sort.py\n    Subscribe to the Yolo 2 bboxes, and publish the detections with a 2d appearance feature used for reidentification\n\"\"\"\nimport time\nimport rospy\nimport sys\nimport torch\nimport numpy as np\nimport os\nfrom std_msgs.msg import Int8\nimport message_filters\nfrom sensor_msgs.msg import Image\nfrom darknet_ros_msgs.msg import BoundingBoxes, BoundingBox\nfrom cv_bridge import CvBridge, CvBridgeError\nfrom aligned_reid_utils import get_image_patches, generate_features, create_appearance_model\nfrom jpda_rospack.msg import detection2d_with_feature_array, detection2d_with_feature\n\nclass Appearance_Features:\n    def __init__(self):\n        self.node_name = \"aligned_reid_feature_generator\"\n        \n        rospy.init_node(self.node_name)\n        rospy.on_shutdown(self.cleanup)\n        apperance_model_ckpt = rospy.get_param('~aligned_reid_model', 'src/jpda_rospack/src/aligned_reid_MOT_weights.pth')\n        self.appearance_model = create_appearance_model(apperance_model_ckpt)\n        \n        self.image_sub = message_filters.Subscriber(\"/ros_indigosdk_node/stitched_image0\", Image, queue_size=2)\n        self.yolo_bbox_sub = message_filters.Subscriber(\"/omni_yolo_bboxes\", BoundingBoxes, queue_size=2)\n        \n        self.time_sync = message_filters.ApproximateTimeSynchronizer([self.yolo_bbox_sub, self.image_sub], 5, 0.1)\n        self.time_sync.registerCallback(self.get_2d_feature)\n    \n        self.cv_bridge = CvBridge()\n        self.feature_2d_pub = rospy.Publisher(\"detection2d_with_feature\", detection2d_with_feature_array, queue_size=1)\n        self.debug_pub = rospy.Publisher(\"/test\", Int8, queue_size=1)\n        rospy.loginfo(\"Ready.\")\n        \n    def get_2d_feature(self, y1_bboxes, ros_image):\n#        rospy.loginfo('Processing Image with AlignedReID')\n        start = time.time()\n        try:\n            input_image = self.cv_bridge.imgmsg_to_cv2(ros_image, \"bgr8\")\n        except CvBridgeError as e:\n            print(e)\n        input_img = torch.from_numpy(input_image).float()\n        input_img = input_img.to('cuda:1')\n        input_img = input_img.permute(2, 0, 1)/255\n        # Generate 2D image feaures for each bounding box\n        detections = []\n        frame_det_ids = []\n        count = 0\n        for y1_bbox in y1_bboxes.bounding_boxes:\n            if y1_bbox.Class == 'person':\n                xmin = y1_bbox.xmin\n                xmax = y1_bbox.xmax\n                ymin = y1_bbox.ymin\n                ymax = y1_bbox.ymax\n                probability = y1_bbox.probability\n                frame_det_ids.append(count)\n                count += 1\n                detections.append([int(xmin), int(ymin), int(xmax), int(ymax), probability, -1, -1])\n        features_2d = detection2d_with_feature_array()\n        features_2d.header.stamp = y1_bboxes.header.stamp\n        features_2d.header.frame_id = 'occam'\n        if not detections:\n            self.feature_2d_pub.publish(features_2d)\n            return\n        image_patches = get_image_patches(input_img, detections)\n        features = generate_features(self.appearance_model, image_patches)\n        \n        for (det, feature, i) in zip(detections, features, frame_det_ids):\n            det_msg = detection2d_with_feature()\n            det_msg.header.stamp = features_2d.header.stamp\n            det_msg.x1 = det[0]\n            det_msg.y1 = det[1]\n            det_msg.x2 = det[2]\n            det_msg.y2 = det[3]\n            det_msg.feature = feature\n            det_msg.valid = True\n            det_msg.frame_det_id = i\n            features_2d.detection2d_with_features.append(det_msg)\n        self.feature_2d_pub.publish(features_2d)\n        # rospy.loginfo(\"Aligned_ReID time: {}\".format(time.time() - start))\n                \n    def cleanup(self):\n        print(\"Shutting down 2D-Appearance node.\")\n    \ndef main(args):       \n    try:\n        Appearance_Features()\n        rospy.spin()\n    except KeyboardInterrupt:\n        print(\"Shutting down 2D-Appearance node.\")\n\nif __name__ == '__main__':\n    main(sys.argv)\n"
  },
  {
    "path": "src/track_3d 2.py",
    "content": "# vim: expandtab:ts=4:sw=4\nimport numpy as np\nimport pdb\nimport torch\n\nclass TrackState:\n    \"\"\"\n    Enumeration type for the single target track state. Newly created tracks are\n    classified as `tentative` until enough evidence has been collected. Then,\n    the track state is changed to `confirmed`. Tracks that are no longer alive\n    are classified as `deleted` to mark them for removal from the set of active\n    tracks.\n\n    \"\"\"\n\n    Tentative = 1\n    Confirmed = 2\n    Deleted = 3\n\n\nclass Track_3d:\n    \"\"\"\n    A single target track with state space `(x, y, a, h)` and associated\n    velocities, where `(x, y)` is the center of the bounding box, `a` is the\n    aspect ratio and `h` is the height.\n\n    Parameters\n    ----------\n    mean : ndarray\n        Mean vector of the initial state distribution.\n    covariance : ndarray\n        Covariance matrix of the initial state distribution.\n    track_id : int\n        A unique track identifier.\n    n_init : int\n        Number of consecutive detections before the track is confirmed. The\n        track state is set to `Deleted` if a miss occurs within the first\n        `n_init` frames.\n    max_age : int\n        The maximum number of consecutive misses before the track state is\n        set to `Deleted`.\n    feature : Optional[ndarray]\n        Feature vector of the detection this track originates from. If not None,\n        this feature is added to the `features` cache.\n\n    Attributes\n    ----------\n    mean : ndarray\n        Mean vector of the initial state distribution.\n    covariance : ndarray\n        Covariance matrix of the initial state distribution.\n    track_id : int\n        A unique track identifier.\n    hits : int\n        Total number of measurement updates.\n    age : int\n        Total number of frames since first occurance.\n    time_since_update : int\n        Total number of frames since last measurement update.\n    state : TrackState\n        The current track state.\n    features : List[ndarray]\n        A cache of features. On each measurement update, the associated feature\n        vector is added to this list.\n\n    \"\"\"\n    def __init__(self, mean, covariance, track_id, n_init, max_age,\n                 feature=None, appearance_feature = None, cuda = False, lstm = None):\n\n        self.mean = mean\n        self.covariance = covariance\n        self.track_id = track_id\n        self.hits = 1\n        self.age = 1\n        self.time_since_update = 0\n        self.tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor\n        self.cuda = cuda\n        self.state = TrackState.Tentative\n        self.features = []\n        self.features_2d = []\n        self.hidden = None\n        if lstm is None:\n            self.features.append(feature)\n            self.features_2d.append(appearance_feature)\n        else:\n            self.feature_update(feature, appearance_feature, lstm)\n        self.first_detection = mean[:7]\n        self._n_init = n_init\n        if self.state == TrackState.Tentative and self.hits >= self._n_init:\n            self.state = TrackState.Confirmed\n        self._max_age = max_age\n        self.matched = True\n        self.exiting = False\n        self.last_box = None\n\n\n    def to_tlwh3d(self):\n        \"\"\"Get current position in bounding box format `(box center of bottom face [x, y, z], l, w, h)`.\n\n        Returns\n        -------\n        ndarray\n            The bounding box.\n\n        \"\"\"\n\n        if self.last_box is not None:\n            return self.last_box.box_3d\n        else:\n            return self.mean[[0,1,2,3,4,5,6]].copy()\n\n    def to_tlwh(self, kf):\n        \"\"\"Get current position in bounding box format `(box center of bottom face [x, y, z], l, w, h)`.\n\n        Returns\n        -------\n        ndarray\n            The bounding box.\n\n        \"\"\"\n        corner_points, _ = kf.calculate_corners(self.mean)\n        min_x, min_y = np.amin(corner_points, axis = 0)[:2]\n        max_x, max_y = np.amax(corner_points, axis = 0)[:2]\n        ret = np.array([min_x, min_y, max_x - min_x, max_y - min_y])\n        return ret\n\n    def predict(self, kf):\n        \"\"\"Propagate the state distribution to the current time step using a\n        Kalman filter prediction step.\n\n        Parameters\n        ----------\n        kf : kalman_filter.KalmanFilter\n            The Kalman filter.\n\n        \"\"\"\n        self.mean, self.covariance = kf.predict(self.mean, self.covariance)\n        self.age += 1\n        self.time_since_update += 1\n\n    # @profile\n    def update(self, kf, detection, compare_2d=False,\n                marginalization=None, detection_idx=None, JPDA=False, lstm = None):\n        \"\"\"Perform Kalman filter measurement update step and update the feature\n        cache.\n\n        Parameters\n        ----------\n        kf : kalman_filter.KalmanFilter\n            The Kalman filter.\n        detection : Detection\n            The associated detection.\n\n        \"\"\"\n        if JPDA:\n\n            detections_2d = [det.tlwh for det in detection]\n            if compare_2d:\n                detections_3d = None\n            else:\n                detections_3d = [np.copy(det.box_3d) for det in detection]\n                for det in detections_3d:\n                    if det[6] - self.mean[6] > np.pi:\n                        det[6] -= 2 * np.pi\n                    elif det[6] - self.mean[6] < -np.pi:\n                        det[6] += 2*np.pi\n            self.mean, self.covariance, self.mean_post_3d = kf.update(\n                self.mean, self.covariance, detections_2d, detections_3d, marginalization, JPDA)\n            self.mean[6] = self.mean[6] % (2 * np.pi)\n            self.feature_update(detection, detection_idx, lstm)\n            if np.argmax(marginalization) != 0:\n                self.matched=True\n            else:\n                self.matched=False\n            if detection_idx < 0:\n                self.last_box = None\n                return\n            self.hits += 1\n            self.time_since_update = 0\n            detection = detection[detection_idx]\n            self.last_box = detection\n        else:\n            detection = detection[detection_idx]\n            detections_3d = detections_3d[detection_idx]\n            self.mean, self.covariance = kf.update(\n                self.mean, self.covariance, detection.tlwh, detections_3d)\n\n        if self.state == TrackState.Tentative and self.hits >= self._n_init:\n            self.state = TrackState.Confirmed\n\n    def mark_missed(self):\n        \"\"\"Mark this track as missed (no association at the current time step).\n        \"\"\"\n        if self.state == TrackState.Tentative:\n            self.state = TrackState.Deleted\n        elif self.time_since_update > self._max_age:\n            self.state = TrackState.Deleted\n\n    def is_tentative(self):\n        \"\"\"Returns True if this track is tentative (unconfirmed).\n        \"\"\"\n        return self.state == TrackState.Tentative\n\n    def is_confirmed(self):\n        \"\"\"Returns True if this track is confirmed.\"\"\"\n        return self.state == TrackState.Confirmed\n\n    def is_deleted(self):\n        \"\"\"Returns True if this track is dead and should be deleted.\"\"\"\n        return self.state == TrackState.Deleted\n\n    def feature_update(self, detections, detection_idx, lstm, JPDA=False, marginalization=None):\n        if JPDA:\n            features=[d.feature for d in detections]\n            appearance_features=[d.appearance_feature for d in detections]\n            if len([i for i in features if i is None])==0:\n                combined_feature=np.sum(np.array(features).reshape(len(features), -1)\n                                        *marginalization[1:].reshape(-1, 1), axis=0).astype(np.float32)\n                self.features.append(combined_feature)\n            if len([i for i in appearance_features if i is None])==0:\n                combined_feature=np.sum(\n                                np.array(appearance_features).reshape(len(appearance_features), -1)\n                                *marginalization[1:].reshape(-1, 1), axis=0).astype(np.float32)\n                self.features_2d.append(combined_feature)\n        else:\n            feature = detections[detection_idx].feature\n            appearance_feature = detections[detection_idx].appearance_feature\n            if feature is not None:\n                if lstm is not None:\n                    input_feature = torch.Tensor(feature).type(self.tensor)\n                    input_feature = input_feature.unsqueeze(0)\n                    with torch.no_grad():\n                        if self.hidden is None:\n                            output_feature, self.hidden = lstm(input_feature)\n                        else:\n                            output_feature, self.hidden = lstm(input_feature, self.hidden)\n                    output_feature = output_feature.cpu().numpy().squeeze(0)\n                else:\n                    output_feature = feature\n                self.features.append(output_feature)\n            if appearance_feature is not None:\n                self.features_2d.append(appearance_feature)\n\n    def get_cov(self):\n        xyz_cov = self.covariance[:3, :3]\n        theta_cov_1 = self.covariance[7, :3]\n        theta_cov_2 = self.covariance[7, 7]\n        out_cov = np.zeros((6, 6))\n        out_cov[:3,:3] = xyz_cov\n        out_cov[5, :3] = theta_cov_1\n        out_cov[:3, 5] = theta_cov_1\n        out_cov[5, 5] = theta_cov_2\n        return out_cov"
  },
  {
    "path": "src/track_3d.py",
    "content": "# vim: expandtab:ts=4:sw=4\nimport numpy as np\nimport pdb\nimport torch\n\nclass TrackState:\n    \"\"\"\n    Enumeration type for the single target track state. Newly created tracks are\n    classified as `tentative` until enough evidence has been collected. Then,\n    the track state is changed to `confirmed`. Tracks that are no longer alive\n    are classified as `deleted` to mark them for removal from the set of active\n    tracks.\n\n    \"\"\"\n\n    Tentative = 1\n    Confirmed = 2\n    Deleted = 3\n\n\nclass Track_3d:\n    \"\"\"\n    A single target track with state space `(x, y, a, h)` and associated\n    velocities, where `(x, y)` is the center of the bounding box, `a` is the\n    aspect ratio and `h` is the height.\n\n    Parameters\n    ----------\n    mean : ndarray\n        Mean vector of the initial state distribution.\n    covariance : ndarray\n        Covariance matrix of the initial state distribution.\n    track_id : int\n        A unique track identifier.\n    n_init : int\n        Number of consecutive detections before the track is confirmed. The\n        track state is set to `Deleted` if a miss occurs within the first\n        `n_init` frames.\n    max_age : int\n        The maximum number of consecutive misses before the track state is\n        set to `Deleted`.\n    feature : Optional[ndarray]\n        Feature vector of the detection this track originates from. If not None,\n        this feature is added to the `features` cache.\n\n    Attributes\n    ----------\n    mean : ndarray\n        Mean vector of the initial state distribution.\n    covariance : ndarray\n        Covariance matrix of the initial state distribution.\n    track_id : int\n        A unique track identifier.\n    hits : int\n        Total number of measurement updates.\n    age : int\n        Total number of frames since first occurance.\n    time_since_update : int\n        Total number of frames since last measurement update.\n    state : TrackState\n        The current track state.\n    features : List[ndarray]\n        A cache of features. On each measurement update, the associated feature\n        vector is added to this list.\n\n    \"\"\"\n    def __init__(self, mean, covariance, track_id, n_init, max_age,\n                 feature=None, appearance_feature = None, cuda = False, lstm = None):\n\n        self.mean = mean\n        self.covariance = covariance\n        self.track_id = track_id\n        self.hits = 1\n        self.age = 1\n        self.time_since_update = 0\n        self.tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor\n        self.cuda = cuda\n        self.state = TrackState.Tentative\n        self.features = []\n        self.features_2d = []\n        self.hidden = None\n        if lstm is None:\n            self.features.append(feature)\n            self.features_2d.append(appearance_feature)\n        else:\n            self.feature_update(feature, appearance_feature, lstm)\n        self.first_detection = mean[:7]\n        self._n_init = n_init\n        if self.state == TrackState.Tentative and self.hits >= self._n_init:\n            self.state = TrackState.Confirmed\n        self._max_age = max_age\n        self.matched = True\n        self.exiting = False\n        self.last_box = None\n\n\n    def to_tlwh3d(self):\n        \"\"\"Get current position in bounding box format `(box center of bottom face [x, y, z], l, w, h)`.\n\n        Returns\n        -------\n        ndarray\n            The bounding box.\n\n        \"\"\"\n\n        if self.last_box is not None:\n            return self.last_box.box_3d\n        else:\n            return self.mean[[0,1,2,3,4,5,6]].copy()\n\n    def to_tlwh(self, kf):\n        \"\"\"Get current position in bounding box format `(box center of bottom face [x, y, z], l, w, h)`.\n\n        Returns\n        -------\n        ndarray\n            The bounding box.\n\n        \"\"\"\n        corner_points, _ = kf.calculate_corners(self.mean)\n        min_x, min_y = np.amin(corner_points, axis = 0)[:2]\n        max_x, max_y = np.amax(corner_points, axis = 0)[:2]\n        ret = np.array([min_x, min_y, max_x - min_x, max_y - min_y])\n        return ret\n\n    def predict(self, kf):\n        \"\"\"Propagate the state distribution to the current time step using a\n        Kalman filter prediction step.\n\n        Parameters\n        ----------\n        kf : kalman_filter.KalmanFilter\n            The Kalman filter.\n\n        \"\"\"\n        self.mean, self.covariance = kf.predict(self.mean, self.covariance)\n        self.age += 1\n        self.time_since_update += 1\n\n    # @profile\n    def update(self, kf, detection, compare_2d=False,\n                marginalization=None, detection_idx=None, JPDA=False, lstm = None):\n        \"\"\"Perform Kalman filter measurement update step and update the feature\n        cache.\n\n        Parameters\n        ----------\n        kf : kalman_filter.KalmanFilter\n            The Kalman filter.\n        detection : Detection\n            The associated detection.\n\n        \"\"\"\n        if JPDA:\n\n            detections_2d = [det.tlwh for det in detection]\n            if compare_2d:\n                detections_3d = None\n            else:\n                detections_3d = [np.copy(det.box_3d) for det in detection]\n                for det in detections_3d:\n                    if det[6] - self.mean[6] > np.pi:\n                        det[6] -= 2 * np.pi\n                    elif det[6] - self.mean[6] < -np.pi:\n                        det[6] += 2*np.pi\n            self.mean, self.covariance, self.mean_post_3d = kf.update(\n                self.mean, self.covariance, detections_2d, detections_3d, marginalization, JPDA)\n            self.mean[6] = self.mean[6] % (2 * np.pi)\n            self.feature_update(detection, detection_idx, lstm)\n            if np.argmax(marginalization) != 0:\n                self.matched=True\n            else:\n                self.matched=False\n            if detection_idx < 0:\n                self.last_box = None\n                return\n            self.hits += 1\n            self.time_since_update = 0\n            detection = detection[detection_idx]\n            self.last_box = detection\n        else:\n            detection = detection[detection_idx]\n            detections_3d = detections_3d[detection_idx]\n            self.mean, self.covariance = kf.update(\n                self.mean, self.covariance, detection.tlwh, detections_3d)\n\n        if self.state == TrackState.Tentative and self.hits >= self._n_init:\n            self.state = TrackState.Confirmed\n\n    def mark_missed(self):\n        \"\"\"Mark this track as missed (no association at the current time step).\n        \"\"\"\n        if self.state == TrackState.Tentative:\n            self.state = TrackState.Deleted\n        elif self.time_since_update > self._max_age:\n            self.state = TrackState.Deleted\n\n    def is_tentative(self):\n        \"\"\"Returns True if this track is tentative (unconfirmed).\n        \"\"\"\n        return self.state == TrackState.Tentative\n\n    def is_confirmed(self):\n        \"\"\"Returns True if this track is confirmed.\"\"\"\n        return self.state == TrackState.Confirmed\n\n    def is_deleted(self):\n        \"\"\"Returns True if this track is dead and should be deleted.\"\"\"\n        return self.state == TrackState.Deleted\n\n    def feature_update(self, detections, detection_idx, lstm, JPDA=False, marginalization=None):\n        if JPDA:\n            features=[d.feature for d in detections]\n            appearance_features=[d.appearance_feature for d in detections]\n            if len([i for i in features if i is None])==0:\n                combined_feature=np.sum(np.array(features).reshape(len(features), -1)\n                                        *marginalization[1:].reshape(-1, 1), axis=0).astype(np.float32)\n                self.features.append(combined_feature)\n            if len([i for i in appearance_features if i is None])==0:\n                combined_feature=np.sum(\n                                np.array(appearance_features).reshape(len(appearance_features), -1)\n                                *marginalization[1:].reshape(-1, 1), axis=0).astype(np.float32)\n                self.features_2d.append(combined_feature)\n        else:\n            feature = detections[detection_idx].feature\n            appearance_feature = detections[detection_idx].appearance_feature\n            if feature is not None:\n                if lstm is not None:\n                    input_feature = torch.Tensor(feature).type(self.tensor)\n                    input_feature = input_feature.unsqueeze(0)\n                    with torch.no_grad():\n                        if self.hidden is None:\n                            output_feature, self.hidden = lstm(input_feature)\n                        else:\n                            output_feature, self.hidden = lstm(input_feature, self.hidden)\n                    output_feature = output_feature.cpu().numpy().squeeze(0)\n                else:\n                    output_feature = feature\n                self.features.append(output_feature)\n            if appearance_feature is not None:\n                self.features_2d.append(appearance_feature)\n\n    def get_cov(self):\n        xyz_cov = self.covariance[:3, :3]\n        theta_cov_1 = self.covariance[7, :3]\n        theta_cov_2 = self.covariance[7, 7]\n        out_cov = np.zeros((6, 6))\n        out_cov[:3,:3] = xyz_cov\n        out_cov[5, :3] = theta_cov_1\n        out_cov[:3, 5] = theta_cov_1\n        out_cov[5, 5] = theta_cov_2\n        return out_cov"
  },
  {
    "path": "src/tracker_3d 2.py",
    "content": "# vim: expandtab:ts=4:sw=4\nfrom __future__ import absolute_import\nimport numpy as np\nimport pdb\nimport double_measurement_kf\nimport linear_assignment\nimport iou_matching\nfrom track_3d import Track_3d\nimport JPDA_matching\nimport tracking_utils\nimport math\nimport torch\nfrom nn_matching import NearestNeighborDistanceMetric\n\nclass Tracker_3d:\n    \"\"\"\n    This is the multi-target tracker.\n\n    Parameters\n    ----------\n    metric : nn_matching.NearestNeighborDistanceMetric\n        A distance metric for measurement-to-track association.\n    max_age : int\n        Maximum number of missed misses before a track is deleted.\n    n_init : int\n        Number of consecutive detections before the track is confirmed. The\n        track state is set to `Deleted` if a miss occurs within the first\n        `n_init` frames.\n\n    Attributes\n    ----------\n    metric : nn_matching.NearestNeighborDistanceMetric\n        The distance metric used for measurement to track association.\n    max_age : int\n        Maximum number of missed misses before a track is deleted.\n    n_init : int\n        Number of frames that a track remains in initialization phase.\n    kf : EKF.KalmanFilter\n        A Kalman filter to filter target trajectories in image space.\n    tracks : List[Track]\n        The list of active tracks at the current time step.\n\n    \"\"\"\n\n    def __init__(self, max_age=30, n_init=3,\n                 JPDA=False, m_best_sol=1, assn_thresh=0.0,\n                 matching_strategy=None, appearance_model = None,\n                 gate_full_state=False, lstm = None, cuda = False, calib=None, omni=False,\n                 kf_vel_params=(1./20, 1./160, 1, 1, 2), dummy_node_cost_iou=0.4, dummy_node_cost_app=0.2, nn_budget = None, use_imm=False,\n                 markov=(0.9, 0.7), uncertainty_limit=1.8, optical_flow=False, gate_limit=400, dummy_node_cost_iou_2d=0.5):\n\n        self.metric = NearestNeighborDistanceMetric(\"euclidean\", nn_budget)\n        self.max_age = max_age\n        self.n_init = n_init\n        self.kf = double_measurement_kf.KF_3D(calib, *kf_vel_params, omni=omni)\n        self.tracks = []\n        self._next_id = 1\n        self.JPDA = JPDA\n        self.m_best_sol = m_best_sol\n        self.assn_thresh = assn_thresh\n        self.matching_strategy = matching_strategy\n        self.gate_only_position = not gate_full_state\n        self.lstm = lstm\n        self.cuda = cuda\n        self.dummy_node_cost_app = dummy_node_cost_app\n        self.dummy_node_cost_iou = dummy_node_cost_iou\n        self.dummy_node_cost_iou_2d = dummy_node_cost_iou_2d\n        self.appearance_model = appearance_model\n\n    # @profile\n    def gated_metric(self, tracks, dets, track_indices, detection_indices, compare_2d=None):\n        targets = np.array([tracks[i].track_id for i in track_indices])\n        if not compare_2d and self.metric.check_samples(targets):\n            compare_2d = True\n        if compare_2d:\n            features = torch.stack([dets[i].appearance_feature for i in detection_indices], dim=0)\n        else:\n            features = torch.stack([dets[i].feature for i in detection_indices], dim=0)\n        #cost_matrix = self.metric.distance(features, targets, compare_2d)\n        cost_matrix_appearance = self.metric.distance_torch(features, targets, compare_2d)\n        use_3d = not compare_2d\n        # for i in detection_indices:\n        #     if dets[i].box_3d is None:\n        #         use_3d = False\n        #         break\n        if use_3d:\n            cost_matrix_iou = iou_matching.iou_cost(tracks, dets, track_indices, detection_indices, use3d=use_3d)\n        else:\n            cost_matrix_iou = iou_matching.iou_cost(tracks, dets, track_indices, detection_indices, use3d=use_3d, kf=self.kf)\n        dets_for_gating = dets\n\n        gate_mask = linear_assignment.gate_cost_matrix(\n            self.kf, tracks, dets_for_gating, track_indices,\n            detection_indices, only_position=self.gate_only_position, use3d=use_3d)\n        cost_matrix = np.dstack((cost_matrix_appearance, cost_matrix_iou))\n\n        return cost_matrix, gate_mask\n\n    def predict(self):\n        \"\"\"Propagate track state distributions one time step forward.\n\n        This function should be called once every time step, before `update`.\n        \"\"\"\n        for track in self.tracks:\n            track.predict(self.kf)\n\n    # @profile\n    def update(self, input_img, detections):\n        \"\"\"Perform measurement update and track management.\n\n        Parameters\n        ----------\n        detections : List[deep_sort.detection.Detection]\n            A list of detections at the current time step.\n\n        \"\"\"\n\n        matches, unmatched_tracks, unmatched_detections = \\\n            self._match(detections)\n\n        # update filter for each assigned track\n        # Only do this for non-JPDA because in JPDA the kf states are updated\n        # during the matching process\n        # update track state for unmatched tracks\n        for track_idx in unmatched_tracks:\n            self.tracks[track_idx].mark_missed()\n        \n        self.prune_tracks()\n        # create new tracks\n        for detection_idx in unmatched_detections:\n            self._initiate_track(detections[detection_idx])\n\n         # Update distance metric.\n        active_targets = [t.track_id for t in self.tracks]\n        features, features_2d, targets, targets_2d = [], [], [], []\n        for track in self.tracks:\n            features += track.features\n            features_2d += track.features_2d\n            targets += [track.track_id for _ in track.features]\n            targets_2d += [track.track_id for _ in track.features_2d]\n            track.features = []\n            track.features_2d = []\n\n        self.metric.partial_fit(\n            features, features_2d, targets, targets_2d, active_targets)\n\n    # @profile\n    def _match(self, detections):\n\n        # Associate confirmed tracks using appearance features.\n        if self.JPDA:\n            # Only run JPDA on confirmed tracks\n            det_3d_idx = []\n            det_2d_idx = []\n            for idx, det in enumerate(detections):\n                if det.box_3d is not None:\n                    det_3d_idx.append(idx)\n                else:\n                    det_2d_idx.append(idx)\n            marginalizations = \\\n                linear_assignment.JPDA(self.gated_metric, self.dummy_node_cost_app,\n                                       self.dummy_node_cost_iou, self.tracks, \\\n                                       detections, compare_2d=False,\n                                       detection_indices=det_3d_idx)\n            #print(marginalizations) \n            dets_matching_3d = [d for i, d in enumerate(detections) if i in det_3d_idx]\n            jpda_matcher = JPDA_matching.Matcher(\n                detections, marginalizations, range(len(self.tracks)),\n                self.matching_strategy, assignment_threshold=self.assn_thresh)\n            matches_a, unmatched_tracks_a, unmatched_detections = jpda_matcher.match()\n\n            # Map matched tracks to detections\n            track_detection_map = {t:d for (t,d) in matches_a}\n\n            # Map unmatched tracks to -1 for no detection\n            for t in unmatched_tracks_a:\n                track_detection_map[t] = -1\n            if det_2d_idx:\n                marginalizations_2d = \\\n                linear_assignment.JPDA(self.gated_metric, self.dummy_node_cost_app, self.dummy_node_cost_iou_2d, self.tracks, \\\n                    detections, compare_2d=True, detection_indices=det_2d_idx, track_indices=unmatched_tracks_a)\n                dets_matching_2d = [d for i, d in enumerate(detections) if i in det_2d_idx]\n                jpda_matcher = JPDA_matching.Matcher(\n                dets_matching_2d, marginalizations_2d, range(len(unmatched_tracks_a)),\n                self.matching_strategy, assignment_threshold=self.assn_thresh)\n                matches_a, unmatched_tracks_2d, unmatched_detections = jpda_matcher.match()\n\n                track_detection_map_2d = {unmatched_tracks_a[t]:d for (t,d) in matches_a}\n\n                # Map unmatched tracks to -1 for no detection\n                for t in unmatched_tracks_2d:\n                    track_detection_map_2d[unmatched_tracks_a[t]] = -1\n            # udpate Kalman state\n            if marginalizations.shape[0] > 0:\n                for i in range(len(self.tracks)):\n                    if det_2d_idx and i in unmatched_tracks_a:\n                        self.tracks[i].update(self.kf, dets_matching_2d,\n                            marginalization=marginalizations_2d[unmatched_tracks_a.index(i),:], detection_idx=track_detection_map_2d[i], \n                            JPDA=self.JPDA, lstm = self.lstm, compare_2d=True)\n                    else:\n                        self.tracks[i].update(self.kf, dets_matching_3d,\n                            marginalization=marginalizations[i,:], detection_idx=track_detection_map[i], \n                            JPDA=self.JPDA, lstm = self.lstm)\n\n        else:\n            matches_a, unmatched_tracks_a, unmatched_detections = \\\n                linear_assignment.matching_cascade(\n                    self.gated_metric, self.metric.matching_threshold, self.max_age,\n                    self.tracks, detections, confirmed_tracks, compare_2d = compare_2d, detections_3d=detections_3d)\n\n        return matches_a, unmatched_tracks_a, unmatched_detections\n\n    def _initiate_track(self, detection):\n        if detection.box_3d is None:\n            return\n        mean, covariance = self.kf.initiate(detection.box_3d)\n        self.tracks.append(Track_3d(\n            mean, covariance, self._next_id, self.n_init, self.max_age,\n            feature=detection.feature, appearance_feature = detection.appearance_feature,\n            cuda = self.cuda, lstm = self.lstm))\n        self._next_id += 1\n    \n    def prune_tracks(self):\n\n        # for track in self.tracks:\n        #     # Check if track is leaving\n        #     predicted_mean = self.kf.predict_mean(track.mean)\n        #     predicted_cov = track.covariance\n        #     predicted_pos = predicted_mean[:2]\n        #     predicted_vel = predicted_mean[4:6]\n        #     predicted_pos[0] -= w/2\n        #     predicted_pos[1] -= h/2\n\n        #     cos_theta = np.dot(predicted_pos, predicted_vel)/(np.linalg.norm(predicted_pos)*\n        #                                             np.linalg.norm(predicted_vel) + 1e-6)\n        #     predicted_pos[0] += w/2\n        #     predicted_pos[1] += h/2\n        #     # Thresholds for deciding whether track is outside image\n        #     BORDER_VALUE = 0\n        #     if (cos_theta > 0 and\n        #         (predicted_pos[0] - track.mean[2]/2<= BORDER_VALUE or\n        #         predicted_pos[0] + track.mean[2]/2 >= w - BORDER_VALUE)):\n        #         if track.is_exiting() and not track.matched:\n        #             track.delete_track()\n        #         else:\n        #             track.mark_exiting()\n            # Check if track is too uncertain\n            # cov_axis,_ = np.linalg.eigh(predicted_cov)\n            # if np.abs(np.sqrt(cov_axis[-1]))*6 > self.uncertainty_limit*np.linalg.norm(predicted_mean[2:4]):\n            #    track.delete_track()\n        self.tracks = [t for t in self.tracks if not t.is_deleted()]\n"
  },
  {
    "path": "src/tracker_3d.py",
    "content": "# vim: expandtab:ts=4:sw=4\nfrom __future__ import absolute_import\nimport numpy as np\nimport pdb\nimport double_measurement_kf\nimport linear_assignment\nimport iou_matching\nfrom track_3d import Track_3d\nimport JPDA_matching\nimport tracking_utils\nimport math\nimport torch\nfrom nn_matching import NearestNeighborDistanceMetric\n\nclass Tracker_3d:\n    \"\"\"\n    This is the multi-target tracker.\n\n    Parameters\n    ----------\n    metric : nn_matching.NearestNeighborDistanceMetric\n        A distance metric for measurement-to-track association.\n    max_age : int\n        Maximum number of missed misses before a track is deleted.\n    n_init : int\n        Number of consecutive detections before the track is confirmed. The\n        track state is set to `Deleted` if a miss occurs within the first\n        `n_init` frames.\n\n    Attributes\n    ----------\n    metric : nn_matching.NearestNeighborDistanceMetric\n        The distance metric used for measurement to track association.\n    max_age : int\n        Maximum number of missed misses before a track is deleted.\n    n_init : int\n        Number of frames that a track remains in initialization phase.\n    kf : EKF.KalmanFilter\n        A Kalman filter to filter target trajectories in image space.\n    tracks : List[Track]\n        The list of active tracks at the current time step.\n\n    \"\"\"\n\n    def __init__(self, max_age=30, n_init=3,\n                 JPDA=False, m_best_sol=1, assn_thresh=0.0,\n                 matching_strategy=None, appearance_model = None,\n                 gate_full_state=False, lstm = None, cuda = False, calib=None, omni=False,\n                 kf_vel_params=(1./20, 1./160, 1, 1, 2), dummy_node_cost_iou=0.4, dummy_node_cost_app=0.2, nn_budget = None, use_imm=False,\n                 markov=(0.9, 0.7), uncertainty_limit=1.8, optical_flow=False, gate_limit=400, dummy_node_cost_iou_2d=0.5):\n\n        self.metric = NearestNeighborDistanceMetric(\"euclidean\", nn_budget)\n        self.max_age = max_age\n        self.n_init = n_init\n        self.kf = double_measurement_kf.KF_3D(calib, *kf_vel_params, omni=omni)\n        self.tracks = []\n        self._next_id = 1\n        self.JPDA = JPDA\n        self.m_best_sol = m_best_sol\n        self.assn_thresh = assn_thresh\n        self.matching_strategy = matching_strategy\n        self.gate_only_position = not gate_full_state\n        self.lstm = lstm\n        self.cuda = cuda\n        self.dummy_node_cost_app = dummy_node_cost_app\n        self.dummy_node_cost_iou = dummy_node_cost_iou\n        self.dummy_node_cost_iou_2d = dummy_node_cost_iou_2d\n        self.appearance_model = appearance_model\n\n    # @profile\n    def gated_metric(self, tracks, dets, track_indices, detection_indices, compare_2d=None):\n        targets = np.array([tracks[i].track_id for i in track_indices])\n        if not compare_2d and self.metric.check_samples(targets):\n            compare_2d = True\n        if compare_2d:\n            features = torch.stack([dets[i].appearance_feature for i in detection_indices], dim=0)\n        else:\n            features = torch.stack([dets[i].feature for i in detection_indices], dim=0)\n        #cost_matrix = self.metric.distance(features, targets, compare_2d)\n        cost_matrix_appearance = self.metric.distance_torch(features, targets, compare_2d)\n        use_3d = not compare_2d\n        # for i in detection_indices:\n        #     if dets[i].box_3d is None:\n        #         use_3d = False\n        #         break\n        if use_3d:\n            cost_matrix_iou = iou_matching.iou_cost(tracks, dets, track_indices, detection_indices, use3d=use_3d)\n        else:\n            cost_matrix_iou = iou_matching.iou_cost(tracks, dets, track_indices, detection_indices, use3d=use_3d, kf=self.kf)\n        dets_for_gating = dets\n\n        gate_mask = linear_assignment.gate_cost_matrix(\n            self.kf, tracks, dets_for_gating, track_indices,\n            detection_indices, only_position=self.gate_only_position, use3d=use_3d)\n        cost_matrix = np.dstack((cost_matrix_appearance, cost_matrix_iou))\n\n        return cost_matrix, gate_mask\n\n    def predict(self):\n        \"\"\"Propagate track state distributions one time step forward.\n\n        This function should be called once every time step, before `update`.\n        \"\"\"\n        for track in self.tracks:\n            track.predict(self.kf)\n\n    # @profile\n    def update(self, input_img, detections):\n        \"\"\"Perform measurement update and track management.\n\n        Parameters\n        ----------\n        detections : List[deep_sort.detection.Detection]\n            A list of detections at the current time step.\n\n        \"\"\"\n\n        matches, unmatched_tracks, unmatched_detections = \\\n            self._match(detections)\n\n        # update filter for each assigned track\n        # Only do this for non-JPDA because in JPDA the kf states are updated\n        # during the matching process\n        # update track state for unmatched tracks\n        for track_idx in unmatched_tracks:\n            self.tracks[track_idx].mark_missed()\n        \n        self.prune_tracks()\n        # create new tracks\n        for detection_idx in unmatched_detections:\n            self._initiate_track(detections[detection_idx])\n\n         # Update distance metric.\n        active_targets = [t.track_id for t in self.tracks]\n        features, features_2d, targets, targets_2d = [], [], [], []\n        for track in self.tracks:\n            features += track.features\n            features_2d += track.features_2d\n            targets += [track.track_id for _ in track.features]\n            targets_2d += [track.track_id for _ in track.features_2d]\n            track.features = []\n            track.features_2d = []\n\n        self.metric.partial_fit(\n            features, features_2d, targets, targets_2d, active_targets)\n\n    # @profile\n    def _match(self, detections):\n\n        # Associate confirmed tracks using appearance features.\n        if self.JPDA:\n            # Only run JPDA on confirmed tracks\n            det_3d_idx = []\n            det_2d_idx = []\n            for idx, det in enumerate(detections):\n                if det.box_3d is not None:\n                    det_3d_idx.append(idx)\n                else:\n                    det_2d_idx.append(idx)\n            marginalizations = \\\n                linear_assignment.JPDA(self.gated_metric, self.dummy_node_cost_app,\n                                       self.dummy_node_cost_iou, self.tracks, \\\n                                       detections, compare_2d=False,\n                                       detection_indices=det_3d_idx)\n            #print(marginalizations) \n            dets_matching_3d = [d for i, d in enumerate(detections) if i in det_3d_idx]\n            jpda_matcher = JPDA_matching.Matcher(\n                detections, marginalizations, range(len(self.tracks)),\n                self.matching_strategy, assignment_threshold=self.assn_thresh)\n            matches_a, unmatched_tracks_a, unmatched_detections = jpda_matcher.match()\n\n            # Map matched tracks to detections\n            track_detection_map = {t:d for (t,d) in matches_a}\n\n            # Map unmatched tracks to -1 for no detection\n            for t in unmatched_tracks_a:\n                track_detection_map[t] = -1\n            if det_2d_idx:\n                marginalizations_2d = \\\n                linear_assignment.JPDA(self.gated_metric, self.dummy_node_cost_app, self.dummy_node_cost_iou_2d, self.tracks, \\\n                    detections, compare_2d=True, detection_indices=det_2d_idx, track_indices=unmatched_tracks_a)\n                dets_matching_2d = [d for i, d in enumerate(detections) if i in det_2d_idx]\n                jpda_matcher = JPDA_matching.Matcher(\n                dets_matching_2d, marginalizations_2d, range(len(unmatched_tracks_a)),\n                self.matching_strategy, assignment_threshold=self.assn_thresh)\n                matches_a, unmatched_tracks_2d, unmatched_detections = jpda_matcher.match()\n\n                track_detection_map_2d = {unmatched_tracks_a[t]:d for (t,d) in matches_a}\n\n                # Map unmatched tracks to -1 for no detection\n                for t in unmatched_tracks_2d:\n                    track_detection_map_2d[unmatched_tracks_a[t]] = -1\n            # udpate Kalman state\n            if marginalizations.shape[0] > 0:\n                for i in range(len(self.tracks)):\n                    if det_2d_idx and i in unmatched_tracks_a:\n                        self.tracks[i].update(self.kf, dets_matching_2d,\n                            marginalization=marginalizations_2d[unmatched_tracks_a.index(i),:], detection_idx=track_detection_map_2d[i], \n                            JPDA=self.JPDA, lstm = self.lstm, compare_2d=True)\n                    else:\n                        self.tracks[i].update(self.kf, dets_matching_3d,\n                            marginalization=marginalizations[i,:], detection_idx=track_detection_map[i], \n                            JPDA=self.JPDA, lstm = self.lstm)\n\n        else:\n            matches_a, unmatched_tracks_a, unmatched_detections = \\\n                linear_assignment.matching_cascade(\n                    self.gated_metric, self.metric.matching_threshold, self.max_age,\n                    self.tracks, detections, confirmed_tracks, compare_2d = compare_2d, detections_3d=detections_3d)\n\n        return matches_a, unmatched_tracks_a, unmatched_detections\n\n    def _initiate_track(self, detection):\n        if detection.box_3d is None:\n            return\n        mean, covariance = self.kf.initiate(detection.box_3d)\n        self.tracks.append(Track_3d(\n            mean, covariance, self._next_id, self.n_init, self.max_age,\n            feature=detection.feature, appearance_feature = detection.appearance_feature,\n            cuda = self.cuda, lstm = self.lstm))\n        self._next_id += 1\n    \n    def prune_tracks(self):\n\n        # for track in self.tracks:\n        #     # Check if track is leaving\n        #     predicted_mean = self.kf.predict_mean(track.mean)\n        #     predicted_cov = track.covariance\n        #     predicted_pos = predicted_mean[:2]\n        #     predicted_vel = predicted_mean[4:6]\n        #     predicted_pos[0] -= w/2\n        #     predicted_pos[1] -= h/2\n\n        #     cos_theta = np.dot(predicted_pos, predicted_vel)/(np.linalg.norm(predicted_pos)*\n        #                                             np.linalg.norm(predicted_vel) + 1e-6)\n        #     predicted_pos[0] += w/2\n        #     predicted_pos[1] += h/2\n        #     # Thresholds for deciding whether track is outside image\n        #     BORDER_VALUE = 0\n        #     if (cos_theta > 0 and\n        #         (predicted_pos[0] - track.mean[2]/2<= BORDER_VALUE or\n        #         predicted_pos[0] + track.mean[2]/2 >= w - BORDER_VALUE)):\n        #         if track.is_exiting() and not track.matched:\n        #             track.delete_track()\n        #         else:\n        #             track.mark_exiting()\n            # Check if track is too uncertain\n            # cov_axis,_ = np.linalg.eigh(predicted_cov)\n            # if np.abs(np.sqrt(cov_axis[-1]))*6 > self.uncertainty_limit*np.linalg.norm(predicted_mean[2:4]):\n            #    track.delete_track()\n        self.tracks = [t for t in self.tracks if not t.is_deleted()]\n"
  },
  {
    "path": "src/tracker_3d_node 2.py",
    "content": "#!/home/sibot/anaconda2/bin/python\n\"\"\" yolo_bbox_to_sort.py\n    Subscribe to the Yolo 2 bboxes, and publish the detections with a 2d appearance feature used for reidentification\n\"\"\"\nimport time\nimport rospy\nimport ros_numpy\nimport sys\nimport numpy as np\nimport torch\nimport os\nimport message_filters\nfrom featurepointnet_model_util import generate_detections_3d, \\\n    convert_depth_features\nfrom featurepointnet_model import create_depth_model\nfrom calibration import OmniCalibration\nfrom jpda_rospack.msg import detection3d_with_feature_array, \\\n    detection3d_with_feature, detection2d_with_feature_array\nfrom tracking_utils import convert_detections, combine_features\nfrom combination_model import CombiNet\nfrom tracker_3d import Tracker_3d\nfrom visualization_msgs.msg import MarkerArray, Marker\nfrom std_msgs.msg import Int8\nfrom geometry_msgs.msg import Pose, PoseWithCovariance\nfrom spencer_tracking_msgs.msg import TrackedPerson, TrackedPersons\n\nimport pdb\n\n\nclass Tracker_3D_node:\n    def __init__(self):\n        self.node_name = \"tracker_3d\"\n        \n        rospy.init_node(self.node_name)\n        rospy.on_shutdown(self.cleanup)\n\n        self.depth_weight = float(rospy.get_param('~combination_depth_weight', 1))\n        calibration_folder = rospy.get_param('~calib_3d', 'src/jpda_rospack/calib/')\n        calib = OmniCalibration(calibration_folder)\n        self.tracker = Tracker_3d(max_age=25, n_init=3,\n                                  JPDA=True, m_best_sol=10, assn_thresh=0.6,\n                                  matching_strategy='hungarian',\n                                  cuda=True, calib=calib, omni=True,\n                                  kf_vel_params=(0.08, 0.03, 0.01, 0.03,\n                                                 1.2, 3.9, 0.8, 1.6),\n                                  dummy_node_cost_iou=0.9, dummy_node_cost_app=6,\n                                  nn_budget=3, dummy_node_cost_iou_2d=0.5)\n\n        combination_model_path = rospy.get_param('~combination_model_path', False)\n        if combination_model_path:\n            self.combination_model = CombiNet()\n            checkpoint = torch.load(combination_model_path)\n            self.combination_model.load_state_dict(checkpoint['state_dict'])\n            try:\n                combination_model.cuda()\n            except:\n                pass\n            self.combination_model.eval()\n        else:\n            self.combination_model = None\n        \n        self.detection_2d_sub = \\\n            message_filters.Subscriber(\"detection2d_with_feature\",\n                                       detection2d_with_feature_array,\n                                       queue_size=5)\n        self.detection_3d_sub = \\\n            message_filters.Subscriber(\"detection3d_with_feature\",\n                                       detection3d_with_feature_array,\n                                       queue_size=5)\n        \n        # self.detection_2d_sub.registerCallback(self.find_time_diff_2d)\n        # self.detection_3d_sub.registerCallback(self.find_time_diff_3d)\n        # self.last_seen_2d = 0\n        # self.last_seen_3d = 0\n        self.time_sync = \\\n            message_filters.TimeSynchronizer([self.detection_2d_sub,\n                                                         self.detection_3d_sub],\n                                                        5)\n        self.time_sync.registerCallback(self.do_3d_tracking)\n    \n        self.tracker_output_pub = rospy.Publisher(\"/jpda_output\", TrackedPersons,\n                                                  queue_size=30)\n    \n        self.debug_pub = rospy.Publisher(\"/test\", Int8, queue_size=1)\n        rospy.loginfo(\"Ready.\")\n        \n    def do_3d_tracking(self, detections_2d, detections_3d):\n        start = time.time()\n        #rospy.loginfo(\"Tracking frame\")\n        # convert_detections\n        boxes_2d = []\n        boxes_3d = []\n        valid_3d = []\n        features_2d = []\n        features_3d = []\n        dets_2d = sorted(detections_2d.detection2d_with_features, key=lambda x:x.frame_det_id)\n        dets_3d = sorted(detections_3d.detection3d_with_features, key=lambda x:x.frame_det_id)\n        i, j = 0, 0\n        while i < len(dets_2d) and j < len(dets_3d):\n            det_2d = dets_2d[i]\n            det_3d = dets_3d[j]\n            if det_2d.frame_det_id == det_3d.frame_det_id:\n                i += 1\n                j += 1\n                valid_3d.append(det_3d.valid)\n                boxes_2d.append(np.array([det_2d.x1, det_2d.y1, det_2d.x2, det_2d.y2, 1, -1, -1]))\n                features_2d.append(torch.Tensor(det_2d.feature).to('cuda:0'))\n                if det_3d.valid:\n                    boxes_3d.append(np.array([det_3d.x, det_3d.y, det_3d.z, det_3d.l, det_3d.h, det_3d.w, det_3d.theta]))\n                    features_3d.append(torch.Tensor(det_3d.feature).to('cuda:0'))\n                else:\n                    boxes_3d.append(None)\n                    features_3d.append(None)\n            elif det_2d.frame_det_id < det_3d.frame_det_id:\n                i += 1\n            else:\n                j += 1\n        \n        if not boxes_3d:\n            boxes_3d = None\n        features_3d, features_2d = combine_features(features_2d, features_3d,\n                                                    valid_3d, self.combination_model,\n                                                    depth_weight=self.depth_weight)\n        detections = convert_detections(boxes_2d, features_3d, features_2d, boxes_3d)\n        self.tracker.predict()\n        self.tracker.update(None, detections)\n        tracked_array = TrackedPersons()\n        tracked_array.header.stamp = detections_3d.header.stamp\n        tracked_array.header.frame_id = 'occam'\n\n        for track in self.tracker.tracks:\n            if not track.is_confirmed():\n                continue\n            #print('Confirmed track!')\n            pose_msg = Pose()\n            tracked_person_msg = TrackedPerson()\n            tracked_person_msg.header.stamp = detections_3d.header.stamp\n            tracked_person_msg.header.frame_id = 'occam'\n            tracked_person_msg.track_id = track.track_id\n            if track.time_since_update < 2:\n                tracked_person_msg.is_matched = True\n            else:\n                tracked_person_msg.is_matched = False\n            bbox = track.to_tlwh3d()\n            covariance = track.get_cov().reshape(-1).tolist()\n            pose_msg.position.x = bbox[0]\n            pose_msg.position.y = bbox[1] - bbox[4]/2\n            pose_msg.position.z = bbox[2]\n            pose_msg = PoseWithCovariance(pose=pose_msg, covariance=covariance)\n            tracked_person_msg.pose = pose_msg\n            tracked_array.tracks.append(tracked_person_msg)\n\n        self.tracker_output_pub.publish(tracked_array)\n\n        #rospy.loginfo(\"tracker time: {}\".format(time.time() - start))\n\n    def find_time_diff_2d(self, a):\n        print(a.header.stamp - self.last_seen_3d)\n        self.last_seen_2d = a.header.stamp\n\n    def find_time_diff_3d(self, a):\n        print(a.header.stamp - self.last_seen_2d)\n        self.last_seen_3d = a.header.stamp\n\n    def cleanup(self):\n        print(\"Shutting down 3D tracking node.\")\n        del self.combination_model\n        del self.tracker\n        del self.detection_2d_sub\n        del self.detection_3d_sub\n        del self.time_sync\n        del self.tracker_output_pub\n    \ndef main(args):       \n    try:\n        Tracker_3D_node()\n        rospy.spin()\n    except KeyboardInterrupt:\n        print(\"Shutting down 3D tracking node.\")\n\nif __name__ == '__main__':\n    main(sys.argv)\n"
  },
  {
    "path": "src/tracker_3d_node.py",
    "content": "#!/home/sibot/anaconda2/bin/python\n\"\"\" yolo_bbox_to_sort.py\n    Subscribe to the Yolo 2 bboxes, and publish the detections with a 2d appearance feature used for reidentification\n\"\"\"\nimport time\nimport rospy\nimport ros_numpy\nimport sys\nimport numpy as np\nimport torch\nimport os\nimport message_filters\nfrom featurepointnet_model_util import generate_detections_3d, \\\n    convert_depth_features\nfrom featurepointnet_model import create_depth_model\nfrom calibration import OmniCalibration\nfrom jpda_rospack.msg import detection3d_with_feature_array, \\\n    detection3d_with_feature, detection2d_with_feature_array\nfrom tracking_utils import convert_detections, combine_features\nfrom combination_model import CombiNet\nfrom tracker_3d import Tracker_3d\nfrom visualization_msgs.msg import MarkerArray, Marker\nfrom std_msgs.msg import Int8\nfrom geometry_msgs.msg import Pose, PoseWithCovariance\nfrom spencer_tracking_msgs.msg import TrackedPerson, TrackedPersons\n\nimport pdb\n\n\nclass Tracker_3D_node:\n    def __init__(self):\n        self.node_name = \"tracker_3d\"\n        \n        rospy.init_node(self.node_name)\n        rospy.on_shutdown(self.cleanup)\n\n        self.depth_weight = float(rospy.get_param('~combination_depth_weight', 1))\n        calibration_folder = rospy.get_param('~calib_3d', 'src/jpda_rospack/calib/')\n        calib = OmniCalibration(calibration_folder)\n        self.tracker = Tracker_3d(max_age=25, n_init=3,\n                                  JPDA=True, m_best_sol=10, assn_thresh=0.6,\n                                  matching_strategy='hungarian',\n                                  cuda=True, calib=calib, omni=True,\n                                  kf_vel_params=(0.08, 0.03, 0.01, 0.03,\n                                                 1.2, 3.9, 0.8, 1.6),\n                                  dummy_node_cost_iou=0.9, dummy_node_cost_app=6,\n                                  nn_budget=3, dummy_node_cost_iou_2d=0.5)\n\n        combination_model_path = rospy.get_param('~combination_model_path', False)\n        if combination_model_path:\n            self.combination_model = CombiNet()\n            checkpoint = torch.load(combination_model_path)\n            self.combination_model.load_state_dict(checkpoint['state_dict'])\n            try:\n                combination_model.cuda()\n            except:\n                pass\n            self.combination_model.eval()\n        else:\n            self.combination_model = None\n        \n        self.detection_2d_sub = \\\n            message_filters.Subscriber(\"detection2d_with_feature\",\n                                       detection2d_with_feature_array,\n                                       queue_size=5)\n        self.detection_3d_sub = \\\n            message_filters.Subscriber(\"detection3d_with_feature\",\n                                       detection3d_with_feature_array,\n                                       queue_size=5)\n        \n        # self.detection_2d_sub.registerCallback(self.find_time_diff_2d)\n        # self.detection_3d_sub.registerCallback(self.find_time_diff_3d)\n        # self.last_seen_2d = 0\n        # self.last_seen_3d = 0\n        self.time_sync = \\\n            message_filters.TimeSynchronizer([self.detection_2d_sub,\n                                                         self.detection_3d_sub],\n                                                        5)\n        self.time_sync.registerCallback(self.do_3d_tracking)\n    \n        self.tracker_output_pub = rospy.Publisher(\"/jpda_output\", TrackedPersons,\n                                                  queue_size=30)\n    \n        self.debug_pub = rospy.Publisher(\"/test\", Int8, queue_size=1)\n        rospy.loginfo(\"Ready.\")\n        \n    def do_3d_tracking(self, detections_2d, detections_3d):\n        start = time.time()\n        #rospy.loginfo(\"Tracking frame\")\n        # convert_detections\n        boxes_2d = []\n        boxes_3d = []\n        valid_3d = []\n        features_2d = []\n        features_3d = []\n        dets_2d = sorted(detections_2d.detection2d_with_features, key=lambda x:x.frame_det_id)\n        dets_3d = sorted(detections_3d.detection3d_with_features, key=lambda x:x.frame_det_id)\n        i, j = 0, 0\n        while i < len(dets_2d) and j < len(dets_3d):\n            det_2d = dets_2d[i]\n            det_3d = dets_3d[j]\n            if det_2d.frame_det_id == det_3d.frame_det_id:\n                i += 1\n                j += 1\n                valid_3d.append(det_3d.valid)\n                boxes_2d.append(np.array([det_2d.x1, det_2d.y1, det_2d.x2, det_2d.y2, 1, -1, -1]))\n                features_2d.append(torch.Tensor(det_2d.feature).to('cuda:0'))\n                if det_3d.valid:\n                    boxes_3d.append(np.array([det_3d.x, det_3d.y, det_3d.z, det_3d.l, det_3d.h, det_3d.w, det_3d.theta]))\n                    features_3d.append(torch.Tensor(det_3d.feature).to('cuda:0'))\n                else:\n                    boxes_3d.append(None)\n                    features_3d.append(None)\n            elif det_2d.frame_det_id < det_3d.frame_det_id:\n                i += 1\n            else:\n                j += 1\n        \n        if not boxes_3d:\n            boxes_3d = None\n        features_3d, features_2d = combine_features(features_2d, features_3d,\n                                                    valid_3d, self.combination_model,\n                                                    depth_weight=self.depth_weight)\n        detections = convert_detections(boxes_2d, features_3d, features_2d, boxes_3d)\n        self.tracker.predict()\n        self.tracker.update(None, detections)\n        tracked_array = TrackedPersons()\n        tracked_array.header.stamp = detections_3d.header.stamp\n        tracked_array.header.frame_id = 'occam'\n\n        for track in self.tracker.tracks:\n            if not track.is_confirmed():\n                continue\n            #print('Confirmed track!')\n            pose_msg = Pose()\n            tracked_person_msg = TrackedPerson()\n            tracked_person_msg.header.stamp = detections_3d.header.stamp\n            tracked_person_msg.header.frame_id = 'occam'\n            tracked_person_msg.track_id = track.track_id\n            if track.time_since_update < 2:\n                tracked_person_msg.is_matched = True\n            else:\n                tracked_person_msg.is_matched = False\n            bbox = track.to_tlwh3d()\n            covariance = track.get_cov().reshape(-1).tolist()\n            pose_msg.position.x = bbox[0]\n            pose_msg.position.y = bbox[1] - bbox[4]/2\n            pose_msg.position.z = bbox[2]\n            pose_msg = PoseWithCovariance(pose=pose_msg, covariance=covariance)\n            tracked_person_msg.pose = pose_msg\n            tracked_array.tracks.append(tracked_person_msg)\n\n        self.tracker_output_pub.publish(tracked_array)\n\n        #rospy.loginfo(\"tracker time: {}\".format(time.time() - start))\n\n    def find_time_diff_2d(self, a):\n        print(a.header.stamp - self.last_seen_3d)\n        self.last_seen_2d = a.header.stamp\n\n    def find_time_diff_3d(self, a):\n        print(a.header.stamp - self.last_seen_2d)\n        self.last_seen_3d = a.header.stamp\n\n    def cleanup(self):\n        print(\"Shutting down 3D tracking node.\")\n        del self.combination_model\n        del self.tracker\n        del self.detection_2d_sub\n        del self.detection_3d_sub\n        del self.time_sync\n        del self.tracker_output_pub\n    \ndef main(args):       \n    try:\n        Tracker_3D_node()\n        rospy.spin()\n    except KeyboardInterrupt:\n        print(\"Shutting down 3D tracking node.\")\n\nif __name__ == '__main__':\n    main(sys.argv)\n"
  },
  {
    "path": "src/tracking_utils 2.py",
    "content": "import torch, sys, os, pdb\nimport numpy as np\nfrom PIL import Image\nfrom scipy.spatial import Delaunay\nsys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir)))\nfrom aligned_reid_utils import load_state_dict\nfrom featurepointnet_model_util import rotate_pc_along_y\nfrom deep_sort_utils import non_max_suppression as deepsort_nms\nimport math\nfrom detection import Detection\n\n\ndef create_detector(config_path, weight_path, cuda):\n\n    detector = Darknet(config_path)\n    detector.load_weights(weight_path)\n    if cuda:\n        detector.cuda()\n    detector.eval()\n    return detector\n\ndef get_depth_patches(point_cloud, box_3d, ids_3d, rot_angles, num_point = 1024):\n    #print(ids_3d)\n    depth_patches = []\n    for i, box in enumerate(box_3d):\n        if ids_3d[i] == -1:\n            depth_patches.append(None)\n            continue\n        box_center = np.asarray([ [box[0], box[1], box[2]] ])\n        rotate_pc_along_y(box_center, np.pi/2 + np.squeeze(box[6]))\n        box_center = box_center[0]\n        rotate_pc_along_y(point_cloud, np.pi/2 + np.squeeze(box[6]))\n        x = point_cloud[:, 0]\n        y = point_cloud[:, 1]\n        z = point_cloud[:, 2]\n        idx_1 = np.logical_and(x >= float(box_center[0] - box[3]/2.0), x <= float(box_center[0] + box[3]/2.0))\n        idx_2 = np.logical_and(y <= (box_center[1]+0.1), y >= float(box_center[1] - box[4]))\n        idx_3 = np.logical_and(z >= float(box_center[2] - box[5]/2.0), z <= float(box_center[2] + box[5]/2.0))\n        idx = np.logical_and(idx_1, idx_2)\n        idx = np.logical_and(idx, idx_3)\n        depth_patch = point_cloud[idx, :]\n        rotate_pc_along_y(point_cloud, -(np.squeeze(box[6])+np.pi/2)) #unrotate to prep for next iteration\n        rotate_pc_along_y(depth_patch, -(np.squeeze(box[6])+np.pi/2))\n\n        if depth_patch.size == 0:\n            ids_3d[i] = -1\n            depth_patches.append(None)\n        else:\n            if depth_patch.shape[0] > num_point:\n                pc_in_box_fov = np.expand_dims(depth_patch[np.random.choice(range(depth_patch.shape[0]), size = (num_point), replace=False)], 0)\n            else:\n\n                pc_in_box_fov = np.expand_dims(\n                            np.vstack([depth_patch,\n                            depth_patch[np.random.choice(range(depth_patch.shape[0]), size = (num_point - depth_patch.shape[0]), replace=True)]])\n                            , 0)\n            depth_patches.append( get_center_view_point_set(pc_in_box_fov, rot_angles[i])[0])\n\n    return depth_patches, ids_3d\n\n\ndef non_max_suppression_3D_prime(detections, boxes_3d, ids_3d, ids_2d, nms_thresh = 1, confidence = None):\n    x = [boxes_3d[i][0] for i in range(len(boxes_3d))]\n    z = [boxes_3d[i][2] for i in range(len(boxes_3d))]\n    l = [boxes_3d[i][5] for i in range(len(boxes_3d))] #[3]\n    w = [boxes_3d[i][3] for i in range(len(boxes_3d))] #[5]\n    indices = deepsort_nms(boxes_3d, nms_thresh, np.squeeze(confidence))\n    for i in range(len(ids_3d)):\n        if i not in indices:\n            ids_3d[i] = -1\n            ids_2d[i] = -1\n            boxes_3d[i] = None\n            detections[i] = None\n    return detections, boxes_3d, ids_2d, ids_3d\n\ndef non_max_suppression_3D(depth_patches, ids_3d, ids_2d, nms_thresh = 1, confidence = None):\n    #depth_patches list of patches\n\n    if len(depth_patches) == 0:\n        return []\n\n    pick = []\n\n    if confidence is not None:\n        idxs = np.argsort(confidence)\n    else:\n        idxs = list(range(len(depth_patches)))\n\n    while len(idxs) > 0:\n        last = len(idxs) - 1\n        i = idxs[last]\n        overlap = np.asarray([iou_3d(depth_patches[i], depth_patches[idxs[x]]) for x in range(last)])\n        if np.any(overlap == -np.inf):\n            idxs = np.delete(idxs, [last])\n            continue\n        pick.append(i)\n        idxs = np.delete(\n            idxs, np.concatenate(\n                ([last], np.where(overlap > nms_thresh)[0])))\n    for i in range(len(depth_patches)):\n        if i not in pick:\n            if ids_3d[i]!=-1:\n                ids_2d[i] = -1\n            ids_3d[i] = -1\n    return depth_patches, ids_3d, ids_2d\n\ndef iou_3d(patch_1, patch_2):\n    #Expecting patches of shape (N, 4) or (N,3) (numpy arrays)\n    if patch_2 is None:\n        return np.inf\n    elif patch_1 is None:\n        return -np.inf\n    # Unique points\n    patch_unique_1 = np.unique(patch_1, axis = 0)\n    patch_unique_2 = np.unique(patch_2, axis = 0)\n    intersection_points = 0\n    for point_1_idx in range(patch_unique_1.shape[0]):\n        point_distance = np.sqrt(np.sum((patch_unique_1[point_1_idx]-patch_unique_2)**2, axis = 1))\n        intersection_points += np.any(point_distance<0.3)\n\n    union_points = patch_unique_1.shape[0] + patch_unique_2.shape[0] - intersection_points\n\n    iou = intersection_points/union_points\n\n    return iou\n\ndef convert_detections(detections, features, appearance_features, detections_3d):\n    detection_list = []\n    if detections_3d is None:\n        detections_3d = [None] * len(detections)\n    for detection, feature, appearance_feature, detection_3d in zip(detections, features, appearance_features, detections_3d):\n        x1, y1, x2, y2, conf, _, _ = detection\n        box_2d = [x1, y1, x2-x1, y2-y1]\n        if detection_3d is not None:\n            x, y, z, l, w, h, theta = detection_3d\n            box_3d = [x, y, z, l, w, h, theta]\n        else:\n            box_3d = None\n        if feature is None:\n            detection_list.append(Detection(box_2d, None, conf, appearance_feature, feature))\n        else:\n            detection_list.append(Detection(box_2d, box_3d, conf, appearance_feature, feature))\n\n    return detection_list\n\ndef combine_features(features, depth_features, ids_3d, combination_model, depth_weight=1):\n\n    combined_features = []\n    appearance_features = []\n    for i, (appearance_feature, depth_feature) in enumerate(zip(features, depth_features)):\n        if not ids_3d[i]:\n            depth_feature = torch.zeros(512, device=torch.device(\"cuda:0\"))\n        # appearance_feature = torch.zeros(512, device=torch.device(\"cuda:0\"))\n        combined_features.append(torch.cat([appearance_feature, depth_feature* depth_weight]))\n        appearance_features.append(appearance_feature)\n\n    if combination_model is not None and len(combined_features) > 0:\n        combination_model.eval()\n        combined_feature = torch.stack(combined_features)\n        combined_features = combination_model(combined_feature).detach()\n        combined_features = list(torch.unbind(combined_features))\n    return combined_features, appearance_features\n\ndef filter(detections):\n    for i, det in enumerate(detections): #Note image is 1242 x 375\n        left = det[0]\n        top = det[1]\n        right = det[2]\n        bottom = det[3]\n        if (left < 10 or right > 1232) and (top < 10 or bottom > 365):\n            detections[i] = None\n    return detections\n"
  },
  {
    "path": "src/tracking_utils.py",
    "content": "import torch, sys, os, pdb\nimport numpy as np\nfrom PIL import Image\nfrom scipy.spatial import Delaunay\nsys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir)))\nfrom aligned_reid_utils import load_state_dict\nfrom featurepointnet_model_util import rotate_pc_along_y\nfrom deep_sort_utils import non_max_suppression as deepsort_nms\nimport math\nfrom detection import Detection\n\n\ndef create_detector(config_path, weight_path, cuda):\n\n    detector = Darknet(config_path)\n    detector.load_weights(weight_path)\n    if cuda:\n        detector.cuda()\n    detector.eval()\n    return detector\n\ndef get_depth_patches(point_cloud, box_3d, ids_3d, rot_angles, num_point = 1024):\n    #print(ids_3d)\n    depth_patches = []\n    for i, box in enumerate(box_3d):\n        if ids_3d[i] == -1:\n            depth_patches.append(None)\n            continue\n        box_center = np.asarray([ [box[0], box[1], box[2]] ])\n        rotate_pc_along_y(box_center, np.pi/2 + np.squeeze(box[6]))\n        box_center = box_center[0]\n        rotate_pc_along_y(point_cloud, np.pi/2 + np.squeeze(box[6]))\n        x = point_cloud[:, 0]\n        y = point_cloud[:, 1]\n        z = point_cloud[:, 2]\n        idx_1 = np.logical_and(x >= float(box_center[0] - box[3]/2.0), x <= float(box_center[0] + box[3]/2.0))\n        idx_2 = np.logical_and(y <= (box_center[1]+0.1), y >= float(box_center[1] - box[4]))\n        idx_3 = np.logical_and(z >= float(box_center[2] - box[5]/2.0), z <= float(box_center[2] + box[5]/2.0))\n        idx = np.logical_and(idx_1, idx_2)\n        idx = np.logical_and(idx, idx_3)\n        depth_patch = point_cloud[idx, :]\n        rotate_pc_along_y(point_cloud, -(np.squeeze(box[6])+np.pi/2)) #unrotate to prep for next iteration\n        rotate_pc_along_y(depth_patch, -(np.squeeze(box[6])+np.pi/2))\n\n        if depth_patch.size == 0:\n            ids_3d[i] = -1\n            depth_patches.append(None)\n        else:\n            if depth_patch.shape[0] > num_point:\n                pc_in_box_fov = np.expand_dims(depth_patch[np.random.choice(range(depth_patch.shape[0]), size = (num_point), replace=False)], 0)\n            else:\n\n                pc_in_box_fov = np.expand_dims(\n                            np.vstack([depth_patch,\n                            depth_patch[np.random.choice(range(depth_patch.shape[0]), size = (num_point - depth_patch.shape[0]), replace=True)]])\n                            , 0)\n            depth_patches.append( get_center_view_point_set(pc_in_box_fov, rot_angles[i])[0])\n\n    return depth_patches, ids_3d\n\n\ndef non_max_suppression_3D_prime(detections, boxes_3d, ids_3d, ids_2d, nms_thresh = 1, confidence = None):\n    x = [boxes_3d[i][0] for i in range(len(boxes_3d))]\n    z = [boxes_3d[i][2] for i in range(len(boxes_3d))]\n    l = [boxes_3d[i][5] for i in range(len(boxes_3d))] #[3]\n    w = [boxes_3d[i][3] for i in range(len(boxes_3d))] #[5]\n    indices = deepsort_nms(boxes_3d, nms_thresh, np.squeeze(confidence))\n    for i in range(len(ids_3d)):\n        if i not in indices:\n            ids_3d[i] = -1\n            ids_2d[i] = -1\n            boxes_3d[i] = None\n            detections[i] = None\n    return detections, boxes_3d, ids_2d, ids_3d\n\ndef non_max_suppression_3D(depth_patches, ids_3d, ids_2d, nms_thresh = 1, confidence = None):\n    #depth_patches list of patches\n\n    if len(depth_patches) == 0:\n        return []\n\n    pick = []\n\n    if confidence is not None:\n        idxs = np.argsort(confidence)\n    else:\n        idxs = list(range(len(depth_patches)))\n\n    while len(idxs) > 0:\n        last = len(idxs) - 1\n        i = idxs[last]\n        overlap = np.asarray([iou_3d(depth_patches[i], depth_patches[idxs[x]]) for x in range(last)])\n        if np.any(overlap == -np.inf):\n            idxs = np.delete(idxs, [last])\n            continue\n        pick.append(i)\n        idxs = np.delete(\n            idxs, np.concatenate(\n                ([last], np.where(overlap > nms_thresh)[0])))\n    for i in range(len(depth_patches)):\n        if i not in pick:\n            if ids_3d[i]!=-1:\n                ids_2d[i] = -1\n            ids_3d[i] = -1\n    return depth_patches, ids_3d, ids_2d\n\ndef iou_3d(patch_1, patch_2):\n    #Expecting patches of shape (N, 4) or (N,3) (numpy arrays)\n    if patch_2 is None:\n        return np.inf\n    elif patch_1 is None:\n        return -np.inf\n    # Unique points\n    patch_unique_1 = np.unique(patch_1, axis = 0)\n    patch_unique_2 = np.unique(patch_2, axis = 0)\n    intersection_points = 0\n    for point_1_idx in range(patch_unique_1.shape[0]):\n        point_distance = np.sqrt(np.sum((patch_unique_1[point_1_idx]-patch_unique_2)**2, axis = 1))\n        intersection_points += np.any(point_distance<0.3)\n\n    union_points = patch_unique_1.shape[0] + patch_unique_2.shape[0] - intersection_points\n\n    iou = intersection_points/union_points\n\n    return iou\n\ndef convert_detections(detections, features, appearance_features, detections_3d):\n    detection_list = []\n    if detections_3d is None:\n        detections_3d = [None] * len(detections)\n    for detection, feature, appearance_feature, detection_3d in zip(detections, features, appearance_features, detections_3d):\n        x1, y1, x2, y2, conf, _, _ = detection\n        box_2d = [x1, y1, x2-x1, y2-y1]\n        if detection_3d is not None:\n            x, y, z, l, w, h, theta = detection_3d\n            box_3d = [x, y, z, l, w, h, theta]\n        else:\n            box_3d = None\n        if feature is None:\n            detection_list.append(Detection(box_2d, None, conf, appearance_feature, feature))\n        else:\n            detection_list.append(Detection(box_2d, box_3d, conf, appearance_feature, feature))\n\n    return detection_list\n\ndef combine_features(features, depth_features, ids_3d, combination_model, depth_weight=1):\n\n    combined_features = []\n    appearance_features = []\n    for i, (appearance_feature, depth_feature) in enumerate(zip(features, depth_features)):\n        if not ids_3d[i]:\n            depth_feature = torch.zeros(512, device=torch.device(\"cuda:0\"))\n        # appearance_feature = torch.zeros(512, device=torch.device(\"cuda:0\"))\n        combined_features.append(torch.cat([appearance_feature, depth_feature* depth_weight]))\n        appearance_features.append(appearance_feature)\n\n    if combination_model is not None and len(combined_features) > 0:\n        combination_model.eval()\n        combined_feature = torch.stack(combined_features)\n        combined_features = combination_model(combined_feature).detach()\n        combined_features = list(torch.unbind(combined_features))\n    return combined_features, appearance_features\n\ndef filter(detections):\n    for i, det in enumerate(detections): #Note image is 1242 x 375\n        left = det[0]\n        top = det[1]\n        right = det[2]\n        bottom = det[3]\n        if (left < 10 or right > 1232) and (top < 10 or bottom > 365):\n            detections[i] = None\n    return detections\n"
  }
]