Repository: StanfordVL/JRMOT_ROS
Branch: master
Commit: ca1e87e51ecf
Files: 100
Total size: 715.3 KB

Directory structure:
gitextract_mbelvxqp/

├── CMakeLists.txt
├── LICENSE
├── README.md
├── calib/
│   ├── cameras.yaml
│   └── defaults.yaml
├── config/
│   └── featurepointnet.cfg
├── launch/
│   └── jpda_tracker.launch
├── msg/
│   ├── __init__.py
│   ├── detection2d_with_feature.msg
│   ├── detection2d_with_feature_array.msg
│   ├── detection3d_with_feature.msg
│   └── detection3d_with_feature_array.msg
├── package.xml
├── paper_experiments/
│   ├── models/
│   │   ├── __init__.py
│   │   ├── aligned_reid_model.py
│   │   ├── combination_model.py
│   │   ├── deep_sort_model.py
│   │   ├── featurepointnet_model.py
│   │   ├── pointnet_model.py
│   │   ├── resnet_reid_models.py
│   │   └── yolo_models.py
│   ├── requirements.txt
│   ├── track.py
│   └── utils/
│       ├── EKF.py
│       ├── JPDA_matching.py
│       ├── aligned_reid_utils.py
│       ├── assign_ids_detections.py
│       ├── calibration.py
│       ├── combine_and_process_detections.py
│       ├── dataset.py
│       ├── deep_sort_utils.py
│       ├── detection.py
│       ├── double_measurement_kf.py
│       ├── evaluate_detections.py
│       ├── featurepointnet_model_util.py
│       ├── featurepointnet_tf_util.py
│       ├── imm.py
│       ├── iou_matching.py
│       ├── kf_2d.py
│       ├── kf_3d.py
│       ├── linear_assignment.py
│       ├── logger.py
│       ├── mbest_ilp.py
│       ├── nn_matching.py
│       ├── pointnet_tf_util.py
│       ├── pointnet_transform_nets.py
│       ├── read_detections.py
│       ├── resnet_reid_utils.py
│       ├── test_jpda.py
│       ├── test_kf/
│       │   ├── .gitignore
│       │   ├── run_kf_test.py
│       │   ├── single_track_4state_test.p.val
│       │   ├── single_track_6state_test.p.val
│       │   ├── two_track_4state_test.p.val
│       │   └── write_kf_test.py
│       ├── track.py
│       ├── track_3d.py
│       ├── tracker.py
│       ├── tracker_3d.py
│       ├── tracking_utils.py
│       ├── visualise.py
│       └── yolo_utils/
│           ├── __init__.py
│           ├── datasets.py
│           ├── parse_config.py
│           └── utils.py
├── requirements.txt
└── src/
    ├── 3d_detector.py
    ├── EKF.py
    ├── JPDA_matching.py
    ├── __init__.py
    ├── aligned_reid_model.py
    ├── aligned_reid_utils.py
    ├── calibration.py
    ├── combination_model.py
    ├── deep_sort_utils.py
    ├── detection.py
    ├── distances.py
    ├── double_measurement_kf.py
    ├── evaluation/
    │   ├── __init__.py
    │   ├── distances 2.py
    │   └── distances.py
    ├── featurepointnet_model.py
    ├── featurepointnet_model_util.py
    ├── featurepointnet_tf_util.py
    ├── iou_matching.py
    ├── kf_2d.py
    ├── linear_assignment.py
    ├── mbest_ilp.py
    ├── nn_matching.py
    ├── pointnet_model.py
    ├── template 2.py
    ├── template.py
    ├── track_3d 2.py
    ├── track_3d.py
    ├── tracker_3d 2.py
    ├── tracker_3d.py
    ├── tracker_3d_node 2.py
    ├── tracker_3d_node.py
    ├── tracking_utils 2.py
    └── tracking_utils.py

================================================
FILE CONTENTS
================================================

================================================
FILE: CMakeLists.txt
================================================
cmake_minimum_required(VERSION 2.8.3)
project(jpda_rospack)

## Compile as C++11, supported in ROS Kinetic and newer
# add_compile_options(-std=c++11)

## Find catkin macros and libraries
## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz)
## is used, also find other catkin packages
find_package(catkin REQUIRED COMPONENTS
  roscpp
  rospy
  std_msgs
  vision_msgs
  message_generation
)

## System dependencies are found with CMake's conventions
# find_package(Boost REQUIRED COMPONENTS system)


## Uncomment this if the package has a setup.py. This macro ensures
## modules and global scripts declared therein get installed
## See http://ros.org/doc/api/catkin/html/user_guide/setup_dot_py.html
# catkin_python_setup()

################################################
## Declare ROS messages, services and actions ##
################################################

## To declare and build messages, services or actions from within this
## package, follow these steps:
## * Let MSG_DEP_SET be the set of packages whose message types you use in
##   your messages/services/actions (e.g. std_msgs, actionlib_msgs, ...).
## * In the file package.xml:
##   * add a build_depend tag for "message_generation"
##   * add a build_depend and a exec_depend tag for each package in MSG_DEP_SET
##   * If MSG_DEP_SET isn't empty the following dependency has been pulled in
##     but can be declared for certainty nonetheless:
##     * add a exec_depend tag for "message_runtime"
## * In this file (CMakeLists.txt):
##   * add "message_generation" and every package in MSG_DEP_SET to
##     find_package(catkin REQUIRED COMPONENTS ...)
##   * add "message_runtime" and every package in MSG_DEP_SET to
##     catkin_package(CATKIN_DEPENDS ...)
##   * uncomment the add_*_files sections below as needed
##     and list every .msg/.srv/.action file to be processed
##   * uncomment the generate_messages entry below
##   * add every package in MSG_DEP_SET to generate_messages(DEPENDENCIES ...)

## Generate messages in the 'msg' folder
add_message_files(
   FILES
   detection2d_with_feature.msg
   detection2d_with_feature_array.msg
   detection3d_with_feature.msg
   detection3d_with_feature_array.msg
 )

## Generate services in the 'srv' folder
# add_service_files(
#   FILES
#   Service1.srv
#   Service2.srv
# )

## Generate actions in the 'action' folder
# add_action_files(
#   FILES
#   Action1.action
#   Action2.action
# )

## Generate added messages and services with any dependencies listed here
 generate_messages(
   DEPENDENCIES
   std_msgs
   vision_msgs
   jpda_rospack
  )

################################################
## Declare ROS dynamic reconfigure parameters ##
################################################

## To declare and build dynamic reconfigure parameters within this
## package, follow these steps:
## * In the file package.xml:
##   * add a build_depend and a exec_depend tag for "dynamic_reconfigure"
## * In this file (CMakeLists.txt):
##   * add "dynamic_reconfigure" to
##     find_package(catkin REQUIRED COMPONENTS ...)
##   * uncomment the "generate_dynamic_reconfigure_options" section below
##     and list every .cfg file to be processed

## Generate dynamic reconfigure parameters in the 'cfg' folder
# generate_dynamic_reconfigure_options(
#   cfg/DynReconf1.cfg
#   cfg/DynReconf2.cfg
# )

###################################
## catkin specific configuration ##
###################################
## The catkin_package macro generates cmake config files for your package
## Declare things to be passed to dependent projects
## INCLUDE_DIRS: uncomment this if your package contains header files
## LIBRARIES: libraries you create in this project that dependent projects also need
## CATKIN_DEPENDS: catkin_packages dependent projects also need
## DEPENDS: system dependencies of this project that dependent projects also need
catkin_package(
#  INCLUDE_DIRS include
#  LIBRARIES jpda_rospack
  CATKIN_DEPENDS roscpp rospy std_msgs vision_msgs message_runtime
#  DEPENDS system_lib
)

###########
## Build ##
###########

## Specify additional locations of header files
## Your package locations should be listed before other locations
include_directories(
# include
  ${catkin_INCLUDE_DIRS}
)

## Declare a C++ library
# add_library(${PROJECT_NAME}
#   src/${PROJECT_NAME}/jpda_rospack.cpp
# )

## Add cmake target dependencies of the library
## as an example, code may need to be generated before libraries
## either from message generation or dynamic reconfigure
# add_dependencies(${PROJECT_NAME} ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})

## Declare a C++ executable
## With catkin_make all packages are built within a single CMake context
## The recommended prefix ensures that target names across packages don't collide
# add_executable(${PROJECT_NAME}_node src/jpda_rospack_node.cpp)

## Rename C++ executable without prefix
## The above recommended prefix causes long target names, the following renames the
## target back to the shorter version for ease of user use
## e.g. "rosrun someones_pkg node" instead of "rosrun someones_pkg someones_pkg_node"
# set_target_properties(${PROJECT_NAME}_node PROPERTIES OUTPUT_NAME node PREFIX "")

## Add cmake target dependencies of the executable
## same as for the library above
# add_dependencies(${PROJECT_NAME}_node ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})

## Specify libraries to link a library or executable target against
# target_link_libraries(${PROJECT_NAME}_node
#   ${catkin_LIBRARIES}
# )

#############
## Install ##
#############

# all install targets should use catkin DESTINATION variables
# See http://ros.org/doc/api/catkin/html/adv_user_guide/variables.html

## Mark executable scripts (Python etc.) for installation
## in contrast to setup.py, you can choose the destination
# install(PROGRAMS
#   scripts/my_python_script
#   DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
# )

## Mark executables and/or libraries for installation
# install(TARGETS ${PROJECT_NAME} ${PROJECT_NAME}_node
#   ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
#   LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
#   RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
# )

## Mark cpp header files for installation
# install(DIRECTORY include/${PROJECT_NAME}/
#   DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION}
#   FILES_MATCHING PATTERN "*.h"
#   PATTERN ".svn" EXCLUDE
# )

## Mark other files for installation (e.g. launch and bag files, etc.)
# install(FILES
#   # myfile1
#   # myfile2
#   DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
# )

#############
## Testing ##
#############

## Add gtest based cpp test target and link libraries
# catkin_add_gtest(${PROJECT_NAME}-test test/test_jpda_rospack.cpp)
# if(TARGET ${PROJECT_NAME}-test)
#   target_link_libraries(${PROJECT_NAME}-test ${PROJECT_NAME})
# endif()

## Add folders to be run by python nosetests
# catkin_add_nosetests(test)


================================================
FILE: LICENSE
================================================
MIT License

Copyright (c) 2020 Stanford Vision and Learning Group

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: README.md
================================================
# JRMOT ROS package

The repository contains the code for the work "JRMOT: A Real-Time 3D Multi-Object Tracker and a New Large-Scale Dataset".

Note that due to the global pandemic, this repository is still a work in progress. Updates will be made as soon as possible.

## Introduction

JRMOT is a 3D multi object tracking system that:
- Is real-time
- Is online
- Fuses 2D and 3D information
- Achieves State of the Art performance on KITTI

We also release JRDB:
- A dataset with over 2 million annotated boxes and 3500 time consistent trajectories in 2D and 3D
- Captured in social, human-centric settings
- Captured by our social mobile-manipulator JackRabbot
- Contains 360 degree cylindrical images, stereo camera images, 3D pointclouds and more sensing modalties

All information, including download links for JRDB can be found [here](https://jrdb.stanford.edu).

## JRMOT
![system overview](https://github.com/StanfordVL/JRMOT_ROS/blob/master/assets/framework.png)

- Our system is built on top of state of the art 2D and 3D detectors (mask-RCNN and F-PointNet respectively). These detections are associated with predicted track locations at every time step. 
- Association is done via a novel feature fusion, as well as a cost selection procedure, followed by Kalman state gating and JPDA. 
- Given the JPDA output, we use both 2D and 3D detections in a novel multi-modal Kalman filter to update the track locations.


## Using the code

There are 3 nodes forming parts of the ROS package:
+ 3d_detector.py: Runs F-PointNet, which performs 3D detection and 3D feature extraction
+ template.py: Runs Aligned-Re-ID, which performs 2D feature extraction
+ tracker_3d_node.py: Performs tracking while taking both 2D detections + features and 3D detections + features as input

The launch file in the folder "launch" launches all 3 nodes.

## Dependencies

The following are dependencies of the code:

+ 2D detector: The 2D detector is not included in this package. To interface with your own 2D detector, please modify the file template.py to subscribe to the correct topic, and also to handle the conversion from ROS message to numpy array.
+ Spencer People Tracking messages: The final tracker output is in a Spencer People Tracking message. Please install this package and include these message types.
+ Various python packages: These can be found in [requirements.txt](./requirements.txt).. Please install all dependencies prior to running the code (including CUDA and cuDNN. Additionally, this code requires a solver called Gurobi. Instructions to install gurobipy can be found [here](https://www.gurobi.com/documentation/9.0/quickstart_mac/the_grb_python_interface_f.html).
+ Weight files: The trained weights, (trained on JRDB) for FPointNet and Aligne-ReID can be found [here](https://drive.google.com/open?id=1YQinMPVWEI44KezS9inXe0mvVnm4aL3s).

## Citation

If you find this work useful, please cite:
```
@INPROCEEDINGS{shenoi2020jrmot,
  author={A. {Shenoi} and M. {Patel} and J. {Gwak} and P. {Goebel} and A. {Sadeghian} and H. {Rezatofighi} and R. {Mart\'in-Mart\'in} and S. {Savarese}},
  booktitle={2020 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)}, 
  title={JRMOT: A Real-Time 3D Multi-Object Tracker and a New Large-Scale Dataset}, 
  year={2020},
  volume={},
  number={},
  pages={10335-10342},
  doi={10.1109/IROS45743.2020.9341635}}
```

If you utilise our dataset, please also cite:

```
@article{martin2019jrdb,
  title={JRDB: A dataset and benchmark of egocentric visual perception for navigation in human environments},
  author={Mart{\'i}n-Mart{\'i}n, Roberto and Patel, Mihir and Rezatofighi, Hamid and Shenoi, Abhijeet and Gwak, JunYoung and Frankel, Eric and Sadeghian, Amir and Savarese, Silvio},
  journal={arXiv preprint arXiv:1910.11792},
  year={2019}
}
```
## 


================================================
FILE: calib/cameras.yaml
================================================
stitching:
  radius: 3360000
  rotation: 0
  scalewidth: 1831
  crop: 1
cameras:
  # camera order matters!
  sensor_0:
    width: 752
    height: 480
    D: -0.336591 0.159742 0.00012697 -7.22557e-05 -0.0461953
    # K = fx   0    cx
    #     0    fy   cy
    #     0    0    1
    K: >
      476.71 0 350.738
      0 479.505 209.532
      0 0 1
    R: >
      0.999994 0.000654539 0.00340293
      -0.000654519 1 -6.81963e-06
      -0.00340293 4.59231e-06 0.999994
    T: -0.0104242 -3.70974 -56.9177
  sensor_1:
    width: 752
    height: 480
    D: -0.335073 0.151959 -0.000232061 0.00032014 -0.0396825
    K: >
      483.254 0 365.33
      0 485.78 210.953
      0 0 1
    R: >
      0.305706 -0.00895443 -0.952084
      0.0110396 0.999922 -0.00585963
      0.952062 -0.0087193 0.305781
    T: 0.93957 -4.05131 -52.03
  sensor_2:
    width: 752
    height: 480
    D: -0.338469 0.156256 -0.000385467 0.000295485 -0.0401965
    K: >
      483.911 0 355.144
      0 486.466 223.026
      0 0 1
    R: >
      -0.806828 0.0136361 -0.590629
      0.00870468 0.999899 0.011194
      0.590723 0.00389039 -0.806865
    T: -0.25753 -6.54978 -47.7311
  sensor_3:
    width: 752
    height: 480
    D: -0.330848 0.14747 8.59247e-05 0.000262599 -0.0385311
    K: >
      475.807 0 339.53
      0 478.371 188.481
      0 0 1
    R: >
      -0.811334 0.0033829 0.584574
      0.00046071 0.999987 -0.00514746
      -0.584583 -0.00390699 -0.811324
    T: 2.72207 -6.82928 -45.9778
  sensor_4:
    width: 752
    height: 480
    D: -0.34064 0.168338 0.000147292 0.000229372 -0.0516133
    K: >
      485.046 0 368.864
      0 488.185 208.215
      0 0 1
    R: >
      0.310275 0.00160497 0.950645
      -0.00648686 0.999979 0.000428942
      -0.950625 -0.00629979 0.310279
    T: -0.333857 -5.12974 -56.0573
  sensor_5:
    width: 752
    height: 480
    D: -0.338422 0.163703 -0.000376267 7.73351e-06 -0.0479871
    K: >
      478.406 0 353.499
      0 481.322 190.225
      0 0 1
    R: >
      0.999995 0.00282205 0.00163291
      -0.00282345 0.999996 0.000852931
      -0.00163049 -0.000857537 0.999998
    T: -0.903588 -126.851 -56.6256
  sensor_6:
    width: 752
    height: 480
    D: -0.340676 0.165511 -0.00035978 0.000181532 -0.0493721
    K: >
      480.459 0 362.503
      0 482.924 197.949
      0 0 1
    R: >
      0.308288 -0.0110391 -0.951229
      -0.000933102 0.999929 -0.0119067
      0.951293 0.00455829 0.308256
    T: 1.74525 -127.214 -51.7722
  sensor_7:
    width: 752
    height: 480
    D: -0.344379 0.170343 -0.000137847 0.000141047 -0.0510536
    K: >
      486.491 0 361.559
      0 489.22 210.547
      0 0 1
    R: >
      -0.808201 0.0313998 -0.588068
      0.026057 0.999506 0.0175574
      0.588329 -0.00113337 -0.808621
    T: -2.56535 -129.191 -47.5803
  sensor_8:
    width: 752
    height: 480
    D: -0.331228 0.144696 0.000117553 0.000566449 -0.0343506
    K: >
      476.708 0 354.16
      0 479.424 209.383
      0 0 1
    R: >
      -0.807384 -0.00296577 0.590019
      -0.0122001 0.999857 -0.0116688
      -0.589901 -0.0166195 -0.807305
    T: 3.39727 -129.381 -45.2409
  sensor_9:
    width: 752
    height: 480
    D: -0.345189 0.180808 0.000276465 0.000131868 -0.062103
    K: >
      484.219 0 345.303
      0 487.312 192.371
      0 0 1
    R: >
      0.308505 0.00370159 0.951215
      -0.00403535 0.999988 -0.00258261
      -0.951214 -0.00304174 0.308517
    T: 0.354966 -128.218 -54.0617


================================================
FILE: calib/defaults.yaml
================================================
calibrated:
  # the lidar_to_rgb parameters allow tweaking of the transformation between lidar and rgb frames
  # the default transformation is taken from the TF Tree
  # NOTE: applied to the original (sensor/velodyne) frame [x forward, y left, z up]:
  lidar_upper_to_rgb:
    # in meters: [x,y,z]
    translation: [0, 0, -0.33529]
    # in radians: [x,y,z]
    rotation: [0, 0, 0.085]
 
  lidar_lower_to_rgb:

    translation: [0, 0, 0.13511]
    
    rotation: [0, 0, 0]
image:
  # all in pixels
  width: 3760
  height: 480
  # y-axis forward pixel offset (e.g. 3760/2 => 1880, b/c center of the cylindrical image is forward)
  #  TODO: move into calibrated params, when auto-calibration is possible
  stitched_image_offset: 1880

frames:
  # lookup for people transforms
  global: base_link
  # name of the rgb360 camera frame to which we wish to transform
  rgb360: occam


================================================
FILE: config/featurepointnet.cfg
================================================
[general]
num_point = 1024
model_path = /home/sibot/jr2_catkin_ws/src/jpda_rospack/src/fpointnet_jrdb/model.ckpt


================================================
FILE: launch/jpda_tracker.launch
================================================
<?xml version="1.0" encoding="utf-8"?>

<launch>
  <!-- Console launch prefix -->
  <arg name="output"        default="screen"/>

  <!-- Config and weights folder. -->
  <arg name="aligned_reid_model"         default="$(find jpda_rospack)/src/aligned_reid_JRDB_weights.pth"/>
  <arg name="fpointnet_config"           default="$(find jpda_rospack)/config/featurepointnet.cfg"/>
  <arg name="calib_3d"                   default="$(find jpda_rospack)/calib"/>
  <arg name="combination_depth_weight"   default="1"/>
  <arg name="combination_model_path"     default="0"/>


  <node pkg="jpda_rospack" type="template.py" name="jpda_aligned_reid" output="$(arg output)" respawn="false">
    <param name="aligned_reid_model"          value="$(arg aligned_reid_model)" />
  </node>

  <node pkg="jpda_rospack" type="3d_detector.py" name="jpda_3d_detector" output="$(arg output)" respawn="false">
    <param name="fpointnet_config"          value="$(arg fpointnet_config)" />
    <param name="calib_3d"                  value="$(arg calib_3d)" />
  </node>
  
  <node pkg="jpda_rospack" type="tracker_3d_node.py" name="jpda_tracker_3d" output="$(arg output)" respawn="false">
    <param name="combination_depth_weight"           value="$(arg combination_depth_weight)" />
    <param name="calib_3d"                           value="$(arg calib_3d)" />
    <param name="combination_model_path"             value="$(arg combination_model_path)" />
  </node>

</launch>


================================================
FILE: msg/__init__.py
================================================


================================================
FILE: msg/detection2d_with_feature.msg
================================================
# This message contains a 2D bounding box corresponding to the detection of a person
# Also contains the feature of this person used for re-ID

Header header #header timestamp is time of frame acquisition


uint64 x1 # x coordinate of the top left of the bounding box
uint64 y1 # y coordinate of the top left of the bounding box
uint64 x2 # x coordinate of the bottom right of the bounding box
uint64 y2 # y coordinate of the bottom right of the bounding box

float64[] feature # re-ID feature

uint8 frame_det_id #unique id of this detection within this frame (used for associating 2D and 3D detections)
bool valid # whether detection is valid (within the boundaries of the image and has minimum required size)

================================================
FILE: msg/detection2d_with_feature_array.msg
================================================
Header header
detection2d_with_feature[] detection2d_with_features

================================================
FILE: msg/detection3d_with_feature.msg
================================================
# This message contains a 3D bounding box corresponding to the detection of a person
# Also contains the feature of this person used for re-ID

Header header #header timestamp is time of frame acquisition


float32 x # x coordinate of the center of the bottom face of the bounding box
float32 y # y coordinate of the center of the bottom face of the bounding box
float32 z # x coordinate of the center of the bottom face of the bounding box
float32 l # size of bounding box along x dimension
float32 h # size of bounding box along y dimension
float32 w # size of bounding box along z dimension
float32 theta # rotation of bounding box with respect to the positive x axis


float64[] feature # re-ID feature

uint8 frame_det_id #unique id of this detection within this frame (used for associating 2D and 3D detections)
bool valid # whether detection is valid (enough lidar points)

================================================
FILE: msg/detection3d_with_feature_array.msg
================================================
Header header
detection3d_with_feature[] detection3d_with_features

================================================
FILE: package.xml
================================================
<?xml version="1.0"?>
<package format="2">
  <name>jpda_rospack</name>
  <version>0.0.1</version>
  <description>The jpda_rospack package</description>

  <!-- One maintainer tag required, multiple allowed, one person per tag -->
  <!-- Example:  -->
  <!-- <maintainer email="jane.doe@example.com">Jane Doe</maintainer> -->
  <maintainer email="ashenoi@cs.stanford.edu">ashenoi</maintainer>


  <!-- One license tag required, multiple allowed, one license per tag -->
  <!-- Commonly used license strings: -->
  <!--   BSD, MIT, Boost Software License, GPLv2, GPLv3, LGPLv2.1, LGPLv3 -->
  <license>TODO</license>


  <!-- Url tags are optional, but multiple are allowed, one per tag -->
  <!-- Optional attribute type can be: website, bugtracker, or repository -->
  <!-- Example: -->
  <!-- <url type="website">http://wiki.ros.org/jpda_rospack</url> -->


  <!-- Author tags are optional, multiple are allowed, one per tag -->
  <!-- Authors do not have to be maintainers, but could be -->
  <!-- Example: -->
  <!-- <author email="jane.doe@example.com">Jane Doe</author> -->


  <!-- The *depend tags are used to specify dependencies -->
  <!-- Dependencies can be catkin packages or system dependencies -->
  <!-- Examples: -->
  <!-- Use depend as a shortcut for packages that are both build and exec dependencies -->
  <!--   <depend>roscpp</depend> -->
  <!--   Note that this is equivalent to the following: -->
  <!--   <build_depend>roscpp</build_depend> -->
  <!--   <exec_depend>roscpp</exec_depend> -->
  <!-- Use build_depend for packages you need at compile time: -->
    <build_depend>message_generation</build_depend>
  <!-- Use build_export_depend for packages you need in order to build against this package: -->
  <!--   <build_export_depend>message_generation</build_export_depend> -->
  <!-- Use buildtool_depend for build tool packages: -->
  <!--   <buildtool_depend>catkin</buildtool_depend> -->
  <!-- Use exec_depend for packages you need at runtime: -->
    <exec_depend>message_runtime</exec_depend>
  <!-- Use test_depend for packages you need only for testing: -->
  <!--   <test_depend>gtest</test_depend> -->
  <!-- Use doc_depend for packages you need only for building documentation: -->
  <!--   <doc_depend>doxygen</doc_depend> -->
  <buildtool_depend>catkin</buildtool_depend>
  <build_depend>roscpp</build_depend>
  <build_depend>rospy</build_depend>
  <build_depend>std_msgs</build_depend>
  <build_depend>vision_msgs</build_depend>
  <build_export_depend>roscpp</build_export_depend>
  <build_export_depend>rospy</build_export_depend>
  <build_export_depend>std_msgs</build_export_depend>
  <build_export_depend>vision_msgs</build_export_depend>
  <exec_depend>roscpp</exec_depend>
  <exec_depend>rospy</exec_depend>
  <exec_depend>std_msgs</exec_depend>
  <exec_depend>vision_msgs</exec_depend>


  <!-- The export tag contains other, unspecified, tags -->
  <export>
    <!-- Other tools can request additional information be placed here -->

  </export>
</package>


================================================
FILE: paper_experiments/models/__init__.py
================================================


================================================
FILE: paper_experiments/models/aligned_reid_model.py
================================================
import torch
import torch.nn as nn
import torch.nn.init as init
import torch.nn.functional as F
import torch.utils.model_zoo as model_zoo
import os
import math


class Model(nn.Module):
  def __init__(self, local_conv_out_channels=128, num_classes=None):
    super(Model, self).__init__()
    self.base = resnet50(pretrained=True)
    planes = 2048
    self.local_conv = nn.Conv2d(planes, local_conv_out_channels, 1)
    self.local_bn = nn.BatchNorm2d(local_conv_out_channels)
    self.local_relu = nn.ReLU(inplace=True)

    if num_classes is not None:
      self.fc = nn.Linear(planes, num_classes)
      init.normal(self.fc.weight, std=0.001)
      init.constant(self.fc.bias, 0)

  def forward(self, x):
    """
    Returns:
      global_feat: shape [N, C]
      local_feat: shape [N, H, c]
    """
    # shape [N, C, H, W]
    feat = self.base(x)
    global_feat = F.avg_pool2d(feat, feat.size()[2:])
    # shape [N, C]
    global_feat = global_feat.view(global_feat.size(0), -1)
    # shape [N, C, H, 1]
    local_feat = torch.mean(feat, -1, keepdim=True)
    local_feat = self.local_relu(self.local_bn(self.local_conv(local_feat)))
    # shape [N, H, c]
    local_feat = local_feat.squeeze(-1).permute(0, 2, 1)

    if hasattr(self, 'fc'):
      logits = self.fc(global_feat)
      return global_feat, local_feat, logits

    return global_feat, local_feat


__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
           'resnet152']

model_urls = {
  'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
  'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
  'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
  'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
  'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
}
os.environ["TORCH_HOME"] = "./ResNet_Model"

def conv3x3(in_planes, out_planes, stride=1):
  """3x3 convolution with padding"""
  return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                   padding=1, bias=False)


class BasicBlock(nn.Module):
  expansion = 1

  def __init__(self, inplanes, planes, stride=1, downsample=None):
    super(BasicBlock, self).__init__()
    self.conv1 = conv3x3(inplanes, planes, stride)
    self.bn1 = nn.BatchNorm2d(planes)
    self.relu = nn.ReLU(inplace=True)
    self.conv2 = conv3x3(planes, planes)
    self.bn2 = nn.BatchNorm2d(planes)
    self.downsample = downsample
    self.stride = stride

  def forward(self, x):
    residual = x

    out = self.conv1(x)
    out = self.bn1(out)
    out = self.relu(out)

    out = self.conv2(out)
    out = self.bn2(out)

    if self.downsample is not None:
      residual = self.downsample(x)

    out += residual
    out = self.relu(out)

    return out


class Bottleneck(nn.Module):
  expansion = 4

  def __init__(self, inplanes, planes, stride=1, downsample=None):
    super(Bottleneck, self).__init__()
    self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
    self.bn1 = nn.BatchNorm2d(planes)
    self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
                           padding=1, bias=False)
    self.bn2 = nn.BatchNorm2d(planes)
    self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
    self.bn3 = nn.BatchNorm2d(planes * 4)
    self.relu = nn.ReLU(inplace=True)
    self.downsample = downsample
    self.stride = stride

  def forward(self, x):
    residual = x

    out = self.conv1(x)
    out = self.bn1(out)
    out = self.relu(out)

    out = self.conv2(out)
    out = self.bn2(out)
    out = self.relu(out)

    out = self.conv3(out)
    out = self.bn3(out)

    if self.downsample is not None:
      residual = self.downsample(x)

    out += residual
    out = self.relu(out)

    return out


class ResNet(nn.Module):

  def __init__(self, block, layers):
    self.inplanes = 64
    super(ResNet, self).__init__()
    self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
                           bias=False)
    self.bn1 = nn.BatchNorm2d(64)
    self.relu = nn.ReLU(inplace=True)
    self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
    self.layer1 = self._make_layer(block, 64, layers[0])
    self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
    self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
    self.layer4 = self._make_layer(block, 512, layers[3], stride=2)

    for m in self.modules():
      if isinstance(m, nn.Conv2d):
        n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
        m.weight.data.normal_(0, math.sqrt(2. / n))
      elif isinstance(m, nn.BatchNorm2d):
        m.weight.data.fill_(1)
        m.bias.data.zero_()

  def _make_layer(self, block, planes, blocks, stride=1):
    downsample = None
    if stride != 1 or self.inplanes != planes * block.expansion:
      downsample = nn.Sequential(
        nn.Conv2d(self.inplanes, planes * block.expansion,
                  kernel_size=1, stride=stride, bias=False),
        nn.BatchNorm2d(planes * block.expansion),
      )

    layers = []
    layers.append(block(self.inplanes, planes, stride, downsample))
    self.inplanes = planes * block.expansion
    for i in range(1, blocks):
      layers.append(block(self.inplanes, planes))

    return nn.Sequential(*layers)

  def forward(self, x):
    x = self.conv1(x)
    x = self.bn1(x)
    x = self.relu(x)
    x = self.maxpool(x)

    x = self.layer1(x)
    x = self.layer2(x)
    x = self.layer3(x)
    x = self.layer4(x)

    return x


def remove_fc(state_dict):
  """Remove the fc layer parameters from state_dict."""
  new_state_dict = state_dict.copy()
  for key, value in state_dict.items():
    if key.startswith('fc.'):
      del new_state_dict[key]
  return new_state_dict


def resnet18(pretrained=False):
  """Constructs a ResNet-18 model.

  Args:
      pretrained (bool): If True, returns a model pre-trained on ImageNet
  """
  model = ResNet(BasicBlock, [2, 2, 2, 2])
  if pretrained:
    model.load_state_dict(remove_fc(model_zoo.load_url(model_urls['resnet18'])))
  return model


def resnet34(pretrained=False):
  """Constructs a ResNet-34 model.

  Args:
      pretrained (bool): If True, returns a model pre-trained on ImageNet
  """
  model = ResNet(BasicBlock, [3, 4, 6, 3])
  if pretrained:
    model.load_state_dict(remove_fc(model_zoo.load_url(model_urls['resnet34'])))
  return model


def resnet50(pretrained=False):
  """Constructs a ResNet-50 model.

  Args:
      pretrained (bool): If True, returns a model pre-trained on ImageNet
  """
  model = ResNet(Bottleneck, [3, 4, 6, 3])
  if pretrained:
    model.load_state_dict(remove_fc(model_zoo.load_url(model_urls['resnet50'], model_dir="./ResNet_Model")))### ADDED MODEL_DIR
  return model


def resnet101(pretrained=False):
  """Constructs a ResNet-101 model.

  Args:
      pretrained (bool): If True, returns a model pre-trained on ImageNet
  """
  model = ResNet(Bottleneck, [3, 4, 23, 3])
  if pretrained:
    model.load_state_dict(
      remove_fc(model_zoo.load_url(model_urls['resnet101'])))
  return model


def resnet152(pretrained=False):
  """Constructs a ResNet-152 model.

  Args:
      pretrained (bool): If True, returns a model pre-trained on ImageNet
  """
  model = ResNet(Bottleneck, [3, 8, 36, 3])
  if pretrained:
    model.load_state_dict(
      remove_fc(model_zoo.load_url(model_urls['resnet152'])))
  return model


================================================
FILE: paper_experiments/models/combination_model.py
================================================
import pdb

import numpy as np
import torch.nn as nn

class CombiNet(nn.Module):
	def __init__(self, in_dim = 2560, hidden_units = 512, out_dim = 2560):
		super().__init__()
		self.fc1 = nn.Linear(in_dim, 2*hidden_units)
		# self.bn1 = nn.BatchNorm1d(hidden_units)
		self.fc2 = nn.Linear(2*hidden_units, 2*hidden_units)
		# self.bn2 = nn.BatchNorm1d(2*hidden_units)
		self.fc3 = nn.Linear(2*hidden_units, out_dim)
		self.relu = nn.ReLU()
		self.apply(weight_init)
	def forward(self, x):
		# out = nn.functional.normalize(x)
		skip = x
		out = self.fc1(x)
		# out = self.bn1(out)
		out = self.relu(out)
		out = self.fc2(out)
		# out = self.bn2(out)
		out = self.relu(out)
		out = self.fc3(out)
		# out = nn.functional.normalize(out)
		out += skip
		return out

class CombiLSTM(nn.Module):
	def __init__(self, in_dim = 2560, hidden_units = 512, out_dim = 2560):
		super().__init__()
		self.in_linear1 = nn.Linear(in_dim, hidden_units)
		# self.bn1 = nn.BatchNorm1d(hidden_units)
		self.in_linear2 = nn.Linear(hidden_units, hidden_units)
		self.rnn = nn.LSTM(input_size = hidden_units, hidden_size = hidden_units, dropout = 0)
		self.out_linear1 = nn.Linear(hidden_units, hidden_units)
		# self.bn2 = nn.BatchNorm1d(hidden_units)
		self.out_linear2 = nn.Linear(hidden_units, out_dim)
		self.relu = nn.ReLU()
		self.apply(weight_init)

	def forward(self, x, hidden = None):
		out = nn.functional.normalize(x)
		skip = out
		out = self.in_linear1(out)
		# out = self.bn1(out)
		out = self.relu(out)
		out = self.in_linear2(out)
		out = out.unsqueeze(1) #Adding batch dimension
		if hidden is None:
			out, hidden = self.rnn(out)
		else:
			out, hidden = self.rnn(out, hidden)

		out = out.squeeze(1) #removing batch dimension
		out = self.out_linear1(out)
		# out = self.bn2(out)
		out = self.relu(out)
		out = self.out_linear2(out)
		out = nn.functional.normalize(out)
		out += skip
		return out, hidden

def weight_init(m):
	if type(m)==nn.Linear:
		nn.init.xavier_normal_(m.weight, gain=np.sqrt(2))
	elif type(m)==nn.LSTM:
		nn.init.xavier_normal_(m.weight_ih_l0)
		nn.init.xavier_normal_(m.weight_hh_l0)


================================================
FILE: paper_experiments/models/deep_sort_model.py
================================================
import tensorflow as tf
from skimage.transform import resize
import numpy as np

class ImageEncoder(object):

    def __init__(self, checkpoint_filename="weights/deep_sort_weights.pb", input_name="images",
                 output_name="features"):
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        self.session = tf.Session(config=config)
        with tf.gfile.GFile(checkpoint_filename, "rb") as file_handle:
            graph_def = tf.GraphDef()
            graph_def.ParseFromString(file_handle.read())
        tf.import_graph_def(graph_def, name="net")
        self.input_var = tf.get_default_graph().get_tensor_by_name(
            "net/%s:0" % input_name)
        self.output_var = tf.get_default_graph().get_tensor_by_name(
            "net/%s:0" % output_name)

        assert len(self.output_var.get_shape()) == 2
        assert len(self.input_var.get_shape()) == 4
        self.feature_dim = self.output_var.get_shape().as_list()[-1]
        self.image_shape = self.input_var.get_shape().as_list()[1:]

    def __call__(self, data_x):
        #Resize input to expected size for model
        data_x = resize(data_x[0], self.image_shape, anti_aliasing=True, mode='reflect')
        data_x = np.expand_dims(data_x, 0)
        out = self.session.run(self.output_var, feed_dict={self.input_var: data_x})
        return out

if __name__ == '__main__':
    encoder = ImageEncoder()

================================================
FILE: paper_experiments/models/featurepointnet_model.py
================================================
import os, pdb
import numpy as np
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)
import configparser

import utils.featurepointnet_tf_util as tf_util
import utils.featurepointnet_model_util as model_util
from utils.calibration import Calibration, OmniCalibration

batch_size = 45 #TODO: Update if needed?

class FPointNet():
    def __init__(self, config_path):
        parser = configparser.SafeConfigParser()
        parser.read(config_path)
        self.num_point = parser.getint('general', 'num_point')
        self.model_path = parser.get('general', 'model_path')

        with tf.device('/gpu:'+str('0')):
            pointclouds_pl, one_hot_vec_pl, labels_pl, centers_pl, \
            heading_class_label_pl, heading_residual_label_pl, \
            size_class_label_pl, size_residual_label_pl = model_util.placeholder_inputs(batch_size, self.num_point)
            is_training_pl = tf.placeholder(tf.bool, shape=())
            end_points, depth_feature = self.get_model(pointclouds_pl, one_hot_vec_pl, is_training_pl)
            self.object_pointcloud = tf.placeholder(tf.float32, shape=(None, None, 3))
            #depth_feature = self.get_depth_feature_op(is_training_pl)
            loss = model_util.get_loss(labels_pl, centers_pl, heading_class_label_pl, heading_residual_label_pl, size_class_label_pl, size_residual_label_pl, end_points)
            self.saver = tf.train.Saver()

        # Create a session
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        config.allow_soft_placement = True
        self.sess = tf.Session(config=config)

        #Initialize variables
        self.sess.run(tf.global_variables_initializer())
        # Restore variables from disk.
        self.saver.restore(self.sess, self.model_path)
        self.ops = {'pointclouds_pl': pointclouds_pl,
               'one_hot_vec_pl': one_hot_vec_pl,
               'labels_pl': labels_pl,
               'centers_pl': centers_pl,
               'heading_class_label_pl': heading_class_label_pl,
               'heading_residual_label_pl': heading_residual_label_pl,
               'size_class_label_pl': size_class_label_pl,
               'size_residual_label_pl': size_residual_label_pl,
               'is_training_pl': is_training_pl,
               'logits': end_points['mask_logits'],
               'center': end_points['center'],
               'end_points': end_points,
               'depth_feature':depth_feature,
               'loss': loss}

    # @profile
    def __call__(self, input_point_cloud, rot_angle, peds=False):
        '''
        one_hot_vec = np.zeros((batch_size, 3))
        feed_dict = {self.pointclouds_pl: input_point_cloud,
                     self.one_hot_vec_pl: one_hot_vec,
                     self.is_training_pl: False}
        features = self.sess.run(self.feature,feed_dict=feed_dict)
        return features '''

        ''' Run inference for frustum pointnets in batch mode '''
        
        one_hot_vec = np.zeros((batch_size,3))
        if peds:
            one_hot_vec[:, 1] = 1
        num_batches = input_point_cloud.shape[0]//batch_size + 1
        num_inputs = input_point_cloud.shape[0]
        if input_point_cloud.shape[0]%batch_size !=0:
            input_point_cloud = np.vstack([input_point_cloud, np.zeros((batch_size - input_point_cloud.shape[0]%batch_size, self.num_point, 4))])
        else:
            num_batches -= 1
        logits = np.zeros((input_point_cloud.shape[0], input_point_cloud.shape[1], 2))
        centers = np.zeros((input_point_cloud.shape[0], 3))
        heading_logits = np.zeros((input_point_cloud.shape[0], model_util.NUM_HEADING_BIN))
        heading_residuals = np.zeros((input_point_cloud.shape[0], model_util.NUM_HEADING_BIN))
        size_logits = np.zeros((input_point_cloud.shape[0], model_util.NUM_SIZE_CLUSTER))
        size_residuals = np.zeros((input_point_cloud.shape[0], model_util.NUM_SIZE_CLUSTER, 3))
        mask_mean_prob = np.zeros((input_point_cloud.shape[0],)) # Step scores
        heading_prob = np.zeros((input_point_cloud.shape[0],))
        size_prob = np.zeros((input_point_cloud.shape[0],))
        scores = np.zeros((input_point_cloud.shape[0],)) # 3D box score 
        features = np.zeros((input_point_cloud.shape[0], 512))
        
        for i in range(num_batches):    
            ep = self.ops['end_points'] 
            feed_dict = {\
                self.ops['pointclouds_pl']: input_point_cloud[i*batch_size: (i+1)*batch_size],
                self.ops['one_hot_vec_pl']: one_hot_vec,
                self.ops['is_training_pl']: False}

            batch_logits, batch_centers, \
            batch_heading_scores, batch_heading_residuals, \
            batch_size_scores, batch_size_residuals, batch_features = \
                self.sess.run([self.ops['logits'], self.ops['center'],
                    ep['heading_scores'], ep['heading_residuals'],
                    ep['size_scores'], ep['size_residuals'], self.ops['depth_feature']],
                    feed_dict=feed_dict)

            logits[i*batch_size: (i+1)*batch_size] = batch_logits
            centers[i*batch_size: (i+1)*batch_size] = batch_centers
            heading_logits[i*batch_size: (i+1)*batch_size] = batch_heading_scores
            heading_residuals[i*batch_size: (i+1)*batch_size] = batch_heading_residuals
            size_logits[i*batch_size: (i+1)*batch_size] = batch_size_scores
            size_residuals[i*batch_size: (i+1)*batch_size] = batch_size_residuals
            features[i*batch_size: (i+1)*batch_size] = batch_features[:,0,:]

        heading_cls = np.argmax(heading_logits, 1) # B
        size_cls = np.argmax(size_logits, 1) # B
        heading_res = np.vstack([heading_residuals[i, heading_cls[i]] for i in range(heading_cls.shape[0])])
        size_res = np.vstack([size_residuals[i, size_cls[i], :] for i in range(size_cls.shape[0])])

        #TODO: Make this accept batches if wanted
        boxes = []
        for i in range(num_inputs):
            box = np.array(model_util.from_prediction_to_label_format(centers[i], heading_cls[i], heading_res[i], size_cls[i], size_res[i], rot_angle[i]))
            box[6] = np.squeeze(box[6])
            swp = box[5]
            box[5] = box[4]
            box[4] = swp
            boxes.append(box)       
        boxes = np.vstack(boxes)
        return boxes, mask_mean_prob[:num_inputs], features[:num_inputs]


    def get_instance_seg_v1_net(self, point_cloud, one_hot_vec, is_training, bn_decay, end_points):
        ''' 3D instance segmentation PointNet v1 network.
        Input:
            point_cloud: TF tensor in shape (B,N,4)
                frustum point clouds with XYZ and intensity in point channels
                XYZs are in frustum coordinate
            one_hot_vec: TF tensor in shape (B,3)
                length-3 vectors indicating predicted object type
            is_training: TF boolean scalar
            bn_decay: TF float scalar
            end_points: dict
        Output:
            logits: TF tensor in shape (B,N,2), scores for bkg/clutter and object
            end_points: dict
        '''
        num_point = point_cloud.get_shape()[1].value

        net = tf.expand_dims(point_cloud, 2)

        net = tf_util.conv2d(net, 64, [1,1],
                             padding='VALID', stride=[1,1],
                             bn=True, is_training=is_training,
                             scope='conv1', bn_decay=bn_decay)
        net = tf_util.conv2d(net, 64, [1,1],
                             padding='VALID', stride=[1,1],
                             bn=True, is_training=is_training,
                             scope='conv2', bn_decay=bn_decay)
        point_feat = tf_util.conv2d(net, 64, [1,1],
                             padding='VALID', stride=[1,1],
                             bn=True, is_training=is_training,
                             scope='conv3', bn_decay=bn_decay)
        net = tf_util.conv2d(point_feat, 128, [1,1],
                             padding='VALID', stride=[1,1],
                             bn=True, is_training=is_training,
                             scope='conv4', bn_decay=bn_decay)
        net = tf_util.conv2d(net, 1024, [1,1],
                             padding='VALID', stride=[1,1],
                             bn=True, is_training=is_training,
                             scope='conv5', bn_decay=bn_decay)

        global_feat = tf_util.max_pool2d(net, [num_point,1],
                                         padding='VALID', scope='maxpool')

        global_feat = tf.concat([global_feat, tf.expand_dims(tf.expand_dims(one_hot_vec, 1), 1)], axis=3)
        global_feat_expand = tf.tile(global_feat, [1, num_point, 1, 1])
        concat_feat = tf.concat(axis=3, values=[point_feat, global_feat_expand])

        net = tf_util.conv2d(concat_feat, 512, [1,1],
                             padding='VALID', stride=[1,1],
                             bn=True, is_training=is_training,
                             scope='conv6', bn_decay=bn_decay)
        net = tf_util.conv2d(net, 256, [1,1],
                             padding='VALID', stride=[1,1],
                             bn=True, is_training=is_training,
                             scope='conv7', bn_decay=bn_decay)
        net = tf_util.conv2d(net, 128, [1,1],
                             padding='VALID', stride=[1,1],
                             bn=True, is_training=is_training,
                             scope='conv8', bn_decay=bn_decay)
        net = tf_util.conv2d(net, 128, [1,1],
                             padding='VALID', stride=[1,1],
                             bn=True, is_training=is_training,
                             scope='conv9', bn_decay=bn_decay)
        net = tf_util.dropout(net, is_training, 'dp1', keep_prob=0.5)

        logits = tf_util.conv2d(net, 2, [1,1],
                             padding='VALID', stride=[1,1], activation_fn=None,
                             scope='conv10')
        logits = tf.squeeze(logits, [2]) # BxNxC
        return logits, end_points
     
    def get_3d_box_estimation_v1_net(self, object_point_cloud, one_hot_vec,is_training, bn_decay, end_points):
        ''' 3D Box Estimation PointNet v1 network.
        Input:
            object_point_cloud: TF tensor in shape (B,M,C)
                point clouds in object coordinate
            one_hot_vec: TF tensor in shape (B,3)
                length-3 vectors indicating predicted object type
        Output:
            output: TF tensor in shape (B,3+NUM_HEADING_BIN*2+NUM_SIZE_CLUSTER*4)
                including box centers, heading bin class scores and residuals,
                and size cluster scores and residuals
        ''' 
        num_point = object_point_cloud.get_shape()[1].value
        net = tf.expand_dims(object_point_cloud, 2)
        net = tf_util.conv2d(net, 128, [1,1],
                             padding='VALID', stride=[1,1],
                             bn=True, is_training=is_training,
                             scope='conv-reg1', bn_decay=bn_decay)
        net = tf_util.conv2d(net, 128, [1,1],
                             padding='VALID', stride=[1,1],
                             bn=True, is_training=is_training,
                             scope='conv-reg2', bn_decay=bn_decay)
        net = tf_util.conv2d(net, 256, [1,1],
                             padding='VALID', stride=[1,1],
                             bn=True, is_training=is_training,
                             scope='conv-reg3', bn_decay=bn_decay)
        net = tf_util.conv2d(net, 512, [1,1],
                             padding='VALID', stride=[1,1],
                             bn=True, is_training=is_training,
                             scope='conv-reg4', bn_decay=bn_decay)

        features = tf.reduce_max(net, axis = 1)

        net = tf_util.max_pool2d(net, [num_point,1],
            padding='VALID', scope='maxpool2')
        net = tf.squeeze(net, axis=[1,2])
        net = tf.concat([net, one_hot_vec], axis=1)
        net = tf_util.fully_connected(net, 512, scope='fc1', bn=True,
            is_training=is_training, bn_decay=bn_decay)
        net = tf_util.fully_connected(net, 256, scope='fc2', bn=True,
            is_training=is_training, bn_decay=bn_decay)

        # The first 3 numbers: box center coordinates (cx,cy,cz),
        # the next NUM_HEADING_BIN*2:  heading bin class scores and bin residuals
        # next NUM_SIZE_CLUSTER*4: box cluster scores and residuals
        output = tf_util.fully_connected(net,
            3+model_util.NUM_HEADING_BIN*2+model_util.NUM_SIZE_CLUSTER*4, activation_fn=None, scope='fc3')
        return output, end_points, features

    def get_model(self, point_cloud, one_hot_vec, is_training, bn_decay=None):
        ''' Frustum PointNets model. The model predict 3D object masks and
        amodel bounding boxes for objects in frustum point clouds.
        Input:
            point_cloud: TF tensor in shape (B,N,4)
                frustum point clouds with XYZ and intensity in point channels
                XYZs are in frustum coordinate
            one_hot_vec: TF tensor in shape (B,3)
                length-3 vectors indicating predicted object type
            is_training: TF boolean scalar
            bn_decay: TF float scalar
        Output:
            end_points: dict (map from name strings to TF tensors)
        '''
        end_points = {}
        
        # 3D Instance Segmentation PointNet
        logits, end_points = self.get_instance_seg_v1_net(\
            point_cloud, one_hot_vec,
            is_training, bn_decay, end_points)
        end_points['mask_logits'] = logits

        # Masking
        # select masked points and translate to masked points' centroid
        object_point_cloud_xyz, mask_xyz_mean, end_points = \
            model_util.point_cloud_masking(point_cloud, logits, end_points)

        # T-Net and coordinate translation
        center_delta, end_points = model_util.get_center_regression_net(\
            object_point_cloud_xyz, one_hot_vec,
            is_training, bn_decay, end_points)
        stage1_center = center_delta + mask_xyz_mean # Bx3
        end_points['stage1_center'] = stage1_center
        # Get object point cloud in object coordinate
        object_point_cloud_xyz_new = \
            object_point_cloud_xyz - tf.expand_dims(center_delta, 1)

        # Amodel Box Estimation PointNet
        output, end_points, features = self.get_3d_box_estimation_v1_net(\
            object_point_cloud_xyz_new, one_hot_vec,
            is_training, bn_decay, end_points)

        # Parse output to 3D box parameters
        end_points = model_util.parse_output_to_tensors(output, end_points)
        end_points['center'] = end_points['center_boxnet'] + stage1_center # Bx3

        return end_points, features
    
    def get_depth_feature_op(self, is_training):

        net = tf.expand_dims(self.object_pointcloud, 2)
        net = tf_util.conv2d(net, 128, [1,1],
                             padding='VALID', stride=[1,1],
                             bn=True, is_training=is_training,
                             scope='conv-reg1', bn_decay=None)
        net = tf_util.conv2d(net, 128, [1,1],
                             padding='VALID', stride=[1,1],
                             bn=True, is_training=is_training,
                             scope='conv-reg2', bn_decay=None)
        net = tf_util.conv2d(net, 256, [1,1],
                             padding='VALID', stride=[1,1],
                             bn=True, is_training=is_training,
                             scope='conv-reg3', bn_decay=None)
        net = tf_util.conv2d(net, 512, [1,1],
                             padding='VALID', stride=[1,1],
                             bn=True, is_training=is_training,
                             scope='conv-reg4', bn_decay=None)
        net = tf.reduce_max(net, axis = 1)
        
        return net

    def get_depth_feature(self, object_pointcloud):
        
        feed_dict = {self.object_pointcloud:object_pointcloud, self.ops['is_training_pl']:False}
        depth_feature = self.sess.run([self.ops['depth_feature']], feed_dict = feed_dict)
        return depth_feature

    def softmax(self, x):
        ''' Numpy function for softmax'''
        shape = x.shape
        probs = np.exp(x - np.max(x, axis=len(shape)-1, keepdims=True))
        probs /= np.sum(probs, axis=len(shape)-1, keepdims=True)
        return probs

def create_depth_model(model, config_path):
    #Note that folder path must be the folder containing the config.yaml file if omni_camera is True
    if model == 'FPointNet':
        return FPointNet(config_path)
    elif model == 'PointNet':
        return PointNet(config_path)

================================================
FILE: paper_experiments/models/pointnet_model.py
================================================
import os, pdb
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)
import configparser
from utils.pointnet_transform_nets import input_transform_net, feature_transform_net
import utils.pointnet_tf_util as pointnet_tf_util


class PointNet():
    def __init__(self, config_path):
        parser = configparser.SafeConfigParser()
        parser.read(config_path)
        num_points = parser.getint('general', 'num_point')
        depth_model_path = parser.get('general', 'depth_model_path')

        with tf.device('/gpu:'+str(0)):
            self.pointclouds_pl, _ = self.placeholder_inputs(1, num_points)
            self.is_training_pl = tf.placeholder(tf.bool, shape=())

            # simple model
            feature = self.get_model(self.pointclouds_pl, self.is_training_pl)
            self.feature = feature
            # Add ops to save and restore all the variables.
        
        self.saver = tf.train.Saver()
        #Create session
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        config.allow_soft_placement = True
        config.log_device_placement = False
        self.sess = tf.Session(config=config)
        #Initialize variables
        self.sess.run(tf.global_variables_initializer())
        #Restore model weights
        self.saver.restore(self.sess, depth_model_path)

    def __call__(self, input_point_cloud):
        feed_dict = {self.pointclouds_pl: input_point_cloud,
                     self.is_training_pl: False}
        features = self.sess.run(self.feature,feed_dict=feed_dict)
        return features

    def placeholder_inputs(self, batch_size, num_point):
        pointclouds_pl = tf.placeholder(tf.float32, shape=(batch_size, None, 3))
        labels_pl = tf.placeholder(tf.int32, shape=(batch_size))
        return pointclouds_pl, labels_pl


    def get_model(self, point_cloud, is_training, bn_decay=None):
        """ Classification PointNet, input is BxNx3, output Bx40 """
        batch_size = point_cloud.get_shape()[0].value
        end_points = {}

        with tf.variable_scope('transform_net1', reuse=tf.AUTO_REUSE) as sc:
            transform = input_transform_net(point_cloud, is_training, bn_decay, K=3)
        point_cloud_transformed = tf.matmul(point_cloud, transform)
        input_image = tf.expand_dims(point_cloud_transformed, -1)

        net = pointnet_tf_util.conv2d(input_image, 64, [1,3],
                             padding='VALID', stride=[1,1],
                             bn=True, is_training=is_training,
                             scope='conv1', bn_decay=bn_decay)
        net = pointnet_tf_util.conv2d(net, 64, [1,1],
                             padding='VALID', stride=[1,1],
                             bn=True, is_training=is_training,
                             scope='conv2', bn_decay=bn_decay)

        with tf.variable_scope('transform_net2', reuse=tf.AUTO_REUSE) as sc:
            transform = feature_transform_net(net, is_training, bn_decay, K=64)
        end_points['transform'] = transform
        net_transformed = tf.matmul(tf.squeeze(net, axis=[2]), transform)
        net_transformed = tf.expand_dims(net_transformed, [2])

        net = pointnet_tf_util.conv2d(net_transformed, 64, [1,1],
                             padding='VALID', stride=[1,1],
                             bn=True, is_training=is_training,
                             scope='conv3', bn_decay=bn_decay)
        net = pointnet_tf_util.conv2d(net, 128, [1,1],
                             padding='VALID', stride=[1,1],
                             bn=True, is_training=is_training,
                             scope='conv4', bn_decay=bn_decay)
        net = pointnet_tf_util.conv2d(net, 1024, [1,1],
                             padding='VALID', stride=[1,1],
                             bn=True, is_training=is_training,
                             scope='conv5', bn_decay=bn_decay)

        # Symmetric function: max pooling
        net = tf.reduce_max(net, axis = 1)

        net = tf.reshape(net, [batch_size, -1])
        feature = net

        return feature


    def get_loss(self, pred, label, end_points, reg_weight=0.001):
        """ pred: B*NUM_CLASSES,
            label: B, """
        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=pred, labels=label)
        classify_loss = tf.reduce_mean(loss)
        tf.summary.scalar('classify loss', classify_loss)

        # Enforce the transformation as orthogonal matrix
        transform = end_points['transform'] # BxKxK
        K = transform.get_shape()[1].value
        mat_diff = tf.matmul(transform, tf.transpose(transform, perm=[0,2,1]))
        mat_diff = mat_diff - tf.constant(np.eye(K), dtype=tf.float32)
        mat_diff_loss = tf.nn.l2_loss(mat_diff) 
        tf.summary.scalar('mat loss', mat_diff_loss)

        return classify_loss + mat_diff_loss * reg_weight
    

================================================
FILE: paper_experiments/models/resnet_reid_models.py
================================================
import torch
import torch.nn as nn
from torch.autograd import Variable
import torchvision.models as models
from torchvision import transforms
import torch.nn.functional as F

class FeatureResNet(nn.Module):
    def __init__(self,n_layers=50,pretrained=True):
        super(FeatureResNet,self).__init__()
        if n_layers == 50:
            old_model= models.resnet50(pretrained=pretrained)
        elif n_layers == 34:
            old_model= models.resnet34(pretrained=pretrained)
        elif n_layers == 18:
            old_model= models.resnet18(pretrained=pretrained)
        else:
            raise NotImplementedError('resnet%s is not found'%(n_layers))

        for name,modules in old_model._modules.items():
            if name.find('fc') == -1:
                self.add_module(name,modules)
        self.output_dim = old_model.fc.in_features
        self.pretrained = pretrained
    def forward(self,x):
        for name,module in self._modules.items():
            x = nn.parallel.data_parallel(module, x)
        return x.view(x.size(0), -1)

class ResNet(nn.Module):
    def __init__(self,n_id,n_layers=50,pretrained=True):
        super(ResNet,self).__init__()
        if n_layers == 50:
            old_model= models.resnet50(pretrained=pretrained)
        elif n_layers == 34:
            old_model= models.resnet34(pretrained=pretrained)
        elif n_layers == 18:
            old_model= models.resnet18(pretrained=pretrained)
        else:
            raise NotImplementedError('resnet%s is not found'%(n_layers))

        for name,modules in old_model._modules.items():
            self.add_module(name,modules)
        self.fc = nn.Linear(self.fc.in_features,n_id)
        #########
        self.pretrained = pretrained
    def forward(self,x):
        for name,module in self._modules.items():
            if name != 'fc':
                x = module(x)
        out = self.fc(x.view(x.size(0),-1))
        return out, x.view(x.size(0), -1)

class NLayersFC(nn.Module):
    def __init__(self, in_dim, out_dim, hidden_dim=1, n_layers=0):
        super(NLayersFC, self).__init__()
        if n_layers == 0:
            model = [nn.Linear(in_dim, out_dim)]
        else:
            model = []
            model += [nn.Linear(in_dim, hidden_dim),
                      nn.ReLU(True)]
            for i in range(n_layers-1):
                model += [nn.Linear(hidden_dim, hidden_dim),
                          nn.ReLU(True)]
            model += [nn.Linear(hidden_dim, out_dim)]
        self.model = nn.Sequential(*model)

    def forward(self, x):
        return self.model(x)

class ICT_ResNet(nn.Module):
    def __init__(self,n_id,n_color,n_type,n_layers=50,pretrained=True):
        super(ICT_ResNet,self).__init__()
        if n_layers == 50:
            old_model= models.resnet50(pretrained=pretrained)
        elif n_layers == 34:
            old_model= models.resnet34(pretrained=pretrained)
        elif n_layers == 18:
            old_model= models.resnet18(pretrained=pretrained)
        else:
            raise NotImplementedError('resnet%s is not found'%(n_layers))

        for name,modules in old_model._modules.items():
            self.add_module(name,modules)
        self.fc = nn.Linear(self.fc.in_features,n_id)
        self.fc_c = nn.Linear(self.fc.in_features,n_color)
        self.fc_t = nn.Linear(self.fc.in_features,n_type)
        #########
        self.pretrained = pretrained
    def forward(self,x):
        for name,module in self._modules.items():
            if name.find('fc')==-1:
                x = module(x)
        x = x.view(x.size(0),-1)
        x_i = self.fc(x)
        x_c = self.fc_c(x)
        x_t = self.fc_t(x)
        return x_i,x_c,x_t

class TripletNet(nn.Module):
    def __init__(self, net):
        super(TripletNet, self).__init__()
        self.net = net

    def forward(self, x, y, z):
        pred_x, feat_x = self.net(x)
        pred_y, feat_y = self.net(y)
        pred_z, feat_z = self.net(z)
        dist_pos = F.pairwise_distance(feat_x, feat_y, 2)
        dist_neg = F.pairwise_distance(feat_x, feat_z, 2)
        return dist_pos, dist_neg, pred_x, pred_y, pred_z

if __name__ == '__main__':
    netM = ICT_ResNet(n_id=1000,n_color=7,n_type=7,n_layers=18,pretrained=True).cuda()

    print(netM)
    output = netM(Variable(torch.ones(1,3,224,224).cuda()/2.))
    print(output[0].size())
    print(output[1].size())
    print(output[2].size())


================================================
FILE: paper_experiments/models/yolo_models.py
================================================
from __future__ import division

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np

from PIL import Image

from utils.yolo_utils.parse_config import *
from utils.yolo_utils.utils import build_targets
from collections import defaultdict

import matplotlib.pyplot as plt
import matplotlib.patches as patches


def create_modules(module_defs):
    """
    Constructs module list of layer blocks from module configuration in module_defs
    """
    hyperparams = module_defs.pop(0)
    output_filters = [int(hyperparams["channels"])]
    module_list = nn.ModuleList()
    for i, module_def in enumerate(module_defs):
        modules = nn.Sequential()

        if module_def["type"] == "convolutional":
            bn = int(module_def["batch_normalize"])
            filters = int(module_def["filters"])
            kernel_size = int(module_def["size"])
            pad = (kernel_size - 1) // 2 if int(module_def["pad"]) else 0
            modules.add_module(
                "conv_%d" % i,
                nn.Conv2d(
                    in_channels=output_filters[-1],
                    out_channels=filters,
                    kernel_size=kernel_size,
                    stride=int(module_def["stride"]),
                    padding=pad,
                    bias=not bn,
                ),
            )
            if bn:
                modules.add_module("batch_norm_%d" % i, nn.BatchNorm2d(filters))
            if module_def["activation"] == "leaky":
                modules.add_module("leaky_%d" % i, nn.LeakyReLU(0.1))

        elif module_def["type"] == "maxpool":
            kernel_size = int(module_def["size"])
            stride = int(module_def["stride"])
            if kernel_size == 2 and stride == 1:
                padding = nn.ZeroPad2d((0, 1, 0, 1))
                modules.add_module("_debug_padding_%d" % i, padding)
            maxpool = nn.MaxPool2d(
                kernel_size=int(module_def["size"]),
                stride=int(module_def["stride"]),
                padding=int((kernel_size - 1) // 2),
            )
            modules.add_module("maxpool_%d" % i, maxpool)

        elif module_def["type"] == "upsample":
            upsample = Interpolate(scale_factor=int(module_def["stride"]), mode="nearest")
            modules.add_module("upsample_%d" % i, upsample)

        elif module_def["type"] == "route":
            layers = [int(x) for x in module_def["layers"].split(",")]
            filters = sum([output_filters[layer_i] for layer_i in layers])
            modules.add_module("route_%d" % i, EmptyLayer())

        elif module_def["type"] == "shortcut":
            filters = output_filters[int(module_def["from"])]
            modules.add_module("shortcut_%d" % i, EmptyLayer())

        elif module_def["type"] == "yolo":
            anchor_idxs = [int(x) for x in module_def["mask"].split(",")]
            # Extract anchors
            anchors = [int(x) for x in module_def["anchors"].split(",")]
            anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]
            anchors = [anchors[i] for i in anchor_idxs]
            num_classes = int(module_def["classes"])
            img_height = int(hyperparams["height"])
            # Define detection layer
            yolo_layer = YOLOLayer(anchors, num_classes, img_height)
            modules.add_module("yolo_%d" % i, yolo_layer)
        # Register module list and number of output filters
        module_list.append(modules)
        output_filters.append(filters)

    return hyperparams, module_list


class EmptyLayer(nn.Module):
    """Placeholder for 'route' and 'shortcut' layers"""

    def __init__(self):
        super(EmptyLayer, self).__init__()

class Interpolate(nn.Module):
    def __init__(self, scale_factor, mode):
        super(Interpolate, self).__init__()
        self.interp = nn.functional.interpolate
        self.scale_factor = scale_factor
        self.mode = mode
        
    def forward(self, x):
        x = self.interp(x, scale_factor=self.scale_factor, mode=self.mode)
        return x

class YOLOLayer(nn.Module):
    """Detection layer"""

    def __init__(self, anchors, num_classes, img_dim):
        super(YOLOLayer, self).__init__()
        self.anchors = anchors
        self.num_anchors = len(anchors)
        self.num_classes = num_classes
        self.bbox_attrs = 5 + num_classes
        self.image_dim = img_dim
        self.ignore_thres = 0.5
        self.lambda_coord = 1

        self.mse_loss = nn.MSELoss(reduction = 'elementwise_mean')  # Coordinate loss
        self.bce_loss = nn.BCELoss(reduction = 'elementwise_mean')  # Confidence loss
        self.ce_loss = nn.CrossEntropyLoss()  # Class loss

    def forward(self, x, targets=None):
        nA = self.num_anchors
        nB = x.size(0)
        nG = x.size(2)
        stride = self.image_dim / nG

        # Tensors for cuda support
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor

        prediction = x.view(nB, nA, self.bbox_attrs, nG, nG).permute(0, 1, 3, 4, 2).contiguous()

        # Get outputs
        x = torch.sigmoid(prediction[..., 0])  # Center x
        y = torch.sigmoid(prediction[..., 1])  # Center y
        w = prediction[..., 2]  # Width
        h = prediction[..., 3]  # Height
        pred_conf = torch.sigmoid(prediction[..., 4])  # Conf
        pred_cls = torch.sigmoid(prediction[..., 5:])  # Cls pred.

        # Calculate offsets for each grid
        grid_x = torch.arange(nG).repeat(nG, 1).view([1, 1, nG, nG]).type(FloatTensor)
        grid_y = torch.arange(nG).repeat(nG, 1).t().view([1, 1, nG, nG]).type(FloatTensor)
        scaled_anchors = FloatTensor([(a_w / stride, a_h / stride) for a_w, a_h in self.anchors])
        anchor_w = scaled_anchors[:, 0:1].view((1, nA, 1, 1))
        anchor_h = scaled_anchors[:, 1:2].view((1, nA, 1, 1))

        # Add offset and scale with anchors
        pred_boxes = FloatTensor(prediction[..., :4].shape)
        pred_boxes[..., 0] = x.data + grid_x
        pred_boxes[..., 1] = y.data + grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * anchor_w
        pred_boxes[..., 3] = torch.exp(h.data) * anchor_h

        # Training
        if targets is not None:

            if x.is_cuda:
                self.mse_loss = self.mse_loss.cuda()
                self.bce_loss = self.bce_loss.cuda()
                self.ce_loss = self.ce_loss.cuda()

            nGT, nCorrect, mask, conf_mask, tx, ty, tw, th, tconf, tcls = build_targets(
                pred_boxes=pred_boxes.cpu().data,
                pred_conf=pred_conf.cpu().data,
                pred_cls=pred_cls.cpu().data,
                target=targets.cpu().data,
                anchors=scaled_anchors.cpu().data,
                num_anchors=nA,
                num_classes=self.num_classes,
                grid_size=nG,
                ignore_thres=self.ignore_thres,
                img_dim=self.image_dim,
            )

            nProposals = int((pred_conf > 0.5).sum().item())
            recall = float(nCorrect / nGT) if nGT else 1
            precision = float(nCorrect / nProposals)

            # Handle masks
            mask = Variable(mask.type(ByteTensor))
            conf_mask = Variable(conf_mask.type(ByteTensor))

            # Handle target variables
            tx = Variable(tx.type(FloatTensor), requires_grad=False)
            ty = Variable(ty.type(FloatTensor), requires_grad=False)
            tw = Variable(tw.type(FloatTensor), requires_grad=False)
            th = Variable(th.type(FloatTensor), requires_grad=False)
            tconf = Variable(tconf.type(FloatTensor), requires_grad=False)
            tcls = Variable(tcls.type(LongTensor), requires_grad=False)

            # Get conf mask where gt and where there is no gt
            conf_mask_true = mask
            conf_mask_false = conf_mask - mask

            # Mask outputs to ignore non-existing objects
            loss_x = self.mse_loss(x[mask], tx[mask])
            loss_y = self.mse_loss(y[mask], ty[mask])
            loss_w = self.mse_loss(w[mask], tw[mask])
            loss_h = self.mse_loss(h[mask], th[mask])
            loss_conf = self.bce_loss(pred_conf[conf_mask_false], tconf[conf_mask_false]) + self.bce_loss(
                pred_conf[conf_mask_true], tconf[conf_mask_true]
            )
            loss_cls = (1 / nB) * self.ce_loss(pred_cls[mask], torch.argmax(tcls[mask], 1))
            loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

            return (
                loss,
                loss_x.item(),
                loss_y.item(),
                loss_w.item(),
                loss_h.item(),
                loss_conf.item(),
                loss_cls.item(),
                recall,
                precision,
            )

        else:
            # If not in training phase return predictions
            output = torch.cat(
                (
                    pred_boxes.view(nB, -1, 4) * stride,
                    pred_conf.view(nB, -1, 1),
                    pred_cls.view(nB, -1, self.num_classes),
                ),
                -1,
            )
            return output


class Darknet(nn.Module):
    """YOLOv3 object detection model"""

    def __init__(self, config_path):
        super(Darknet, self).__init__()
        self.module_defs = parse_model_config(config_path)
        self.hyperparams, self.module_list = create_modules(self.module_defs)
        self.seen = 0
        self.header_info = np.array([0, 0, 0, self.seen, 0])
        self.loss_names = ["x", "y", "w", "h", "conf", "cls", "recall", "precision"]
        self.load_weights(self.module_defs[-1]['path'])

    def forward(self, x, targets=None):
        is_training = targets is not None
        output = []
        self.losses = defaultdict(float)
        layer_outputs = []
        for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
            if module_def["type"] in ["convolutional", "upsample", "maxpool"]:
                x = module(x)
            elif module_def["type"] == "route":
                layer_i = [int(x) for x in module_def["layers"].split(",")]
                x = torch.cat([layer_outputs[i] for i in layer_i], 1)
            elif module_def["type"] == "shortcut":
                layer_i = int(module_def["from"])
                x = layer_outputs[-1] + layer_outputs[layer_i]
            elif module_def["type"] == "yolo":
                # Train phase: get loss
                if is_training:
                    x, *losses = module[0](x, targets)
                    for name, loss in zip(self.loss_names, losses):
                        self.losses[name] += loss
                # Test phase: Get detections
                else:
                    x = module(x)
                output.append(x)
            layer_outputs.append(x)

        self.losses["recall"] /= 3
        self.losses["precision"] /= 3
        return sum(output) if is_training else torch.cat(output, 1)

    def load_weights(self, weights_path):
        """Parses and loads the weights stored in 'weights_path'"""

        # Open the weights file
        fp = open(weights_path, "rb")
        header = np.fromfile(fp, dtype=np.int32, count=5)  # First five are header values

        # Needed to write header when saving weights
        self.header_info = header

        self.seen = header[3]
        weights = np.fromfile(fp, dtype=np.float32)  # The rest are weights
        fp.close()

        ptr = 0
        for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
            if module_def["type"] == "convolutional":
                conv_layer = module[0]
                if module_def["batch_normalize"]:
                    # Load BN bias, weights, running mean and running variance
                    bn_layer = module[1]
                    num_b = bn_layer.bias.numel()  # Number of biases
                    # Bias
                    bn_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.bias)
                    bn_layer.bias.data.copy_(bn_b)
                    ptr += num_b
                    # Weight
                    bn_w = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.weight)
                    bn_layer.weight.data.copy_(bn_w)
                    ptr += num_b
                    # Running Mean
                    bn_rm = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_mean)
                    bn_layer.running_mean.data.copy_(bn_rm)
                    ptr += num_b
                    # Running Var
                    bn_rv = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_var)
                    bn_layer.running_var.data.copy_(bn_rv)
                    ptr += num_b
                else:
                    # Load conv. bias
                    num_b = conv_layer.bias.numel()
                    conv_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(conv_layer.bias)
                    conv_layer.bias.data.copy_(conv_b)
                    ptr += num_b
                # Load conv. weights
                num_w = conv_layer.weight.numel()
                conv_w = torch.from_numpy(weights[ptr : ptr + num_w]).view_as(conv_layer.weight)
                conv_layer.weight.data.copy_(conv_w)
                ptr += num_w

    """
        @:param path    - path of the new weights file
        @:param cutoff  - save layers between 0 and cutoff (cutoff = -1 -> all are saved)
    """

    def save_weights(self, path, cutoff=-1):

        fp = open(path, "wb")
        self.header_info[3] = self.seen
        self.header_info.tofile(fp)

        # Iterate through layers
        for i, (module_def, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):
            if module_def["type"] == "convolutional":
                conv_layer = module[0]
                # If batch norm, load bn first
                if module_def["batch_normalize"]:
                    bn_layer = module[1]
                    bn_layer.bias.data.cpu().numpy().tofile(fp)
                    bn_layer.weight.data.cpu().numpy().tofile(fp)
                    bn_layer.running_mean.data.cpu().numpy().tofile(fp)
                    bn_layer.running_var.data.cpu().numpy().tofile(fp)
                # Load conv bias
                else:
                    conv_layer.bias.data.cpu().numpy().tofile(fp)
                # Load conv weights
                conv_layer.weight.data.cpu().numpy().tofile(fp)

        fp.close()


================================================
FILE: paper_experiments/requirements.txt
================================================
absl-py==0.7.0
astor==0.7.1
backcall==0.1.0
bleach==3.3.0
catkin-pkg==0.4.12
certifi==2018.11.29
chardet==3.0.4
cloudpickle==0.7.0
cupy==6.0.0
cycler==0.10.0
Cython==0.29.7
dask==2021.10.0
decorator==4.3.2
defusedxml==0.5.0
docutils==0.14
entrypoints==0.3
fastrlock==0.4
ffmpeg==1.4
gast==0.2.2
grpcio==1.18.0
gurobipy==8.1.0
html5lib==0.9999999
idna==2.8
imageio==2.5.0
imageio-ffmpeg==0.3.0
ipydatawidgets==4.0.0
ipykernel==5.1.0
ipympl==0.2.1
ipython==7.16.3
ipython-genutils==0.2.0
ipyvolume==0.5.1
ipywebrtc==0.4.3
ipywidgets==7.4.2
jedi==0.13.2
Jinja2==2.11.3
jsonschema==2.6.0
jupyter-client==5.2.4
jupyter-core==4.4.0
jupyterlab==1.2.21
jupyterlab-server==0.2.0
kiwisolver==1.0.1
lap==0.4.0
lapjv==1.3.1
line-profiler==2.1.1
llvmlite==0.28.0
Markdown==3.0.1
MarkupSafe==1.1.0
matplotlib==3.0.2
mistune==0.8.4
nbconvert==5.4.1
nbformat==4.4.0
networkx==2.2
notebook==6.4.1
numba==0.43.1
numpy==1.21.0
open3d-python==0.7.0.0
opencv-python==4.2.0.32
pandas==0.24.1
pandocfilters==1.4.2
parso==0.3.3
pexpect==4.6.0
pickleshare==0.7.5
Pillow==9.0.0
pptk==0.1.0
prometheus-client==0.5.0
prompt-toolkit==2.0.8
protobuf==3.6.1
ptyprocess==0.6.0
pycocotools==2.0.0
Pygments==2.7.4
pyparsing==2.3.1
pypcd==0.1.1
python-dateutil==2.8.0
python-lzf==0.2.4
pythreejs==2.0.2
pytz==2018.9
PyWavelets==1.0.1
PyYAML==5.4
pyzmq==17.1.2
requests==2.21.0
rospkg==1.1.9
scikit-image==0.14.2
scikit-learn==0.20.2
scipy==1.2.0
seaborn==0.9.0
Send2Trash==1.5.0
six==1.12.0
sklearn==0.0
tensorboard==1.8.0
tensorboardX==1.6
tensorflow-gpu==2.5.2
termcolor==1.1.0
terminado==0.8.1
testpath==0.4.2
toolz==0.9.0
torch==1.0.1
torchvision==0.2.1
tornado==5.1.1
tqdm==4.30.0
traitlets==4.3.2
traittypes==0.2.1
urllib3==1.26.5
wcwidth==0.1.7
Werkzeug==0.15.3
widgetsnbextension==3.4.2


================================================
FILE: paper_experiments/track.py
================================================
import open3d as o3d
import torch
import argparse
import os, pdb, sys, copy, pickle
import time
import random
import numpy as np
import tensorflow as tf
from torch.utils.data import DataLoader
from tqdm import tqdm

from models.aligned_reid_model import Model as aligned_reid_model
from utils.yolo_utils.utils import non_max_suppression, load_classes
from models.combination_model import CombiNet, CombiLSTM
from utils.dataset import SequenceDataset, STIPDataset, collate_fn
from models.deep_sort_model import ImageEncoder as deep_sort_model
from utils.tracker import Tracker
from utils.tracker_3d import Tracker_3d
from utils.deep_sort_utils import non_max_suppression as deepsort_nms
from utils.visualise import draw_track
from utils.read_detections import read_ground_truth_2d_detections, read_ground_truth_3d_detections
from utils.tracking_utils import create_detector, convert_detections, combine_features
from utils.tracking_utils import non_max_suppression_3D, non_max_suppression_3D_prime
from utils.aligned_reid_utils import generate_features, generate_features_batched, get_image_patches, create_appearance_model
from utils.featurepointnet_model_util import generate_detections_3d, convert_depth_features
from models.featurepointnet_model import create_depth_model

def parse_arguments():
    parser = argparse.ArgumentParser()
    parser.add_argument('--sequence_folder', type=str, default='data/KITTI/sequences/0001', help='path to image sequence')
    parser.add_argument('--output_folder', type=str, default='results', help='output folder')
    parser.add_argument('--aligned_reid_ckpt', type=str, default='weights/aligned_reid_market_weights.ckpt', help='path to model config file')
    parser.add_argument('--resnet_reid_ckpt', type=str, default='weights/resnet_reid.ckpt', help='path to model config file')
    parser.add_argument('--depth_model', type=str, default='FPointNet', help='type of depth model to use')
    parser.add_argument('--depth_config_path', type=str, default='config/featurepointnet.cfg', help='path to model config file')
    parser.add_argument('--appearance_model', type=str, default='resnet_reid', help='type of appearance model to use aligned_reid or deepsort or resnet_reid')
    parser.add_argument('--conf_thres', type=float, default=0.8, help='object confidence threshold')
    parser.add_argument('--depth_weight', type=float, default=1, help='weight of depth feature while concatenating')
    parser.add_argument('--nms_thresh', type=float, default=0.56, help='iou thresshold for non-maximum suppression')
    parser.add_argument('--n_cpu', type=int, default=4, help='number of cpu threads to use during batch generation')
    parser.add_argument('--use_cuda', type=bool, default=True, help='whether to use cuda if available')
    parser.add_argument('-p', '--point_cloud', action='store_false', help='Use to disable pointcloud')
    parser.add_argument('-o', '--optical_flow_initiation', action='store_false', help='Use to enable optical flow based velocity initiation')
    parser.add_argument('-q', '--perfect', action='store_true', help='whether to use perfect assignments')
    parser.add_argument('-g', '--ground_truth', action='store_true', help='whether to use ground truth detections')
    parser.add_argument('-r', '--reference', action='store_false', help='whether to use reference detections')
    parser.add_argument('-t', '--track_3d', action='store_true', help='whether to do 3d tracking')
    parser.add_argument('--ref_det', type = str, default = 'new_rrc_subcnn_car', help='lsvm, subcnn, regionlets, maskrcnn')
    parser.add_argument("--nn_budget", type=int, default=100, help="Maximum size of the appearance descriptors gallery. If None, no budget is enforced.")
    parser.add_argument("--dummy_node_cost_app", type=float, default=0.99, help="Dummy node appearance cost for JPDA (or maximum distnce when using deepsort)")
    parser.add_argument("--dummy_node_cost_iou", type=float, default=0.97, help="Dummy node iou cost for JPDA (or maximum distnce when using deepsort)")
    parser.add_argument("-c", "--combine_features", action = 'store_false', help="Whether to use trained MLP to combine features")
    parser.add_argument("-f", "--fpointnet", action = 'store_false', help="Whether to use F-PointNet for 3d detection")
    parser.add_argument("--combo_model", default = 'weights/resnet_reid_fpointnet_combo_car/mlp__1570759353.0113978/best_checkpoint.tar"', help="Trained MLP checkpoint to combine features")
    parser.add_argument("-j", "--JPDA", action = 'store_false', help="Whether to use JPDA for soft assignments")
    parser.add_argument("-l", "--LSTM", action = 'store_true', help="Whether to use LSTM for feature combination and update")
    parser.add_argument("--lstm_model", default = 'weights/aligned_reid_fpointnet_combo/lstm/best_checkpoint.tar', help="Trained LSTM checkpoint to combine features")
    parser.add_argument("-m","--m_best_sol", type=int, default=10, help="Number of solutions for JPDA")
    parser.add_argument("--log_data", action='store_true', help="Turn on full data logging")
    parser.add_argument("--max_age", type=int, default=2, help="Number of misses before termination")
    parser.add_argument("--n_init", type=int, default=2, help="Consecutive frames for tentative->confirmed")
    parser.add_argument("--assn_thresh", type=float, default=0.65, help="min prob for match")
    parser.add_argument("--matching_strategy", type=str, default="hungarian", help="matching strategy for JPDA (max_and_threshold, strict_max_pair, or hungarian)")
    parser.add_argument("--kf_appearance_feature", type=bool, default=False, help="Whether to use kf state for apperance features")
    parser.add_argument('-i', "--use_imm", action = 'store_true', help='Whether to use IMM')
    parser.add_argument('-v', "--verbose", action = 'store_true', help='Verbose')
    parser.add_argument('--kf_process', type=float, default=5.2, help='kf 2d process noise factor')
    parser.add_argument('--kf_2d_meas', type=float, default=3.2, help='kf 2d measurement noise factor')
    parser.add_argument('--kf_3d_meas', type=float, default=0.25, help='kf 3d measurement noise factor')
    parser.add_argument('--pos_weight_3d', type=float, default=1, help='Weight on position covariance process noise in KF')
    parser.add_argument('--pos_weight', type=float, default=0.006, help='Weight on position covariance process noise in KF')
    parser.add_argument('--vel_weight', type=float, default=0.008, help='Weight on velocity covariance process noise in KF')
    parser.add_argument('--theta_weight', type=float, default=0.02, help='Weight on velocity covariance process noise in KF')
    parser.add_argument('--gate_limit', type=float, default=600, help='Maximum covariance value of the gate')
    parser.add_argument('--initial_uncertainty', type=float, default=1, help='Uncertainty scaling for initial covariance of track')
    parser.add_argument('--uncertainty_limit', type=float, default=1.5, help='Uncertainty limit at which to terminate tracks')
    parser.add_argument("--gate_full_state", action='store_true', help="Whether to gate on full kalman state, default is only position")
    parser.add_argument("--near_online", action = 'store_true', help="Whether to do near online tracking")
    parser.add_argument("--omni", action = 'store_true', help="Omni directional camera (JRDB)")
    opt = parser.parse_args()
    opt.sequence_folder = opt.sequence_folder.rstrip(os.sep)
    opt.using_cuda = torch.cuda.is_available() and opt.use_cuda
    if not opt.point_cloud and opt.track_3d:
        raise("Must provide point cloud if doing 3D tracking!")
    if opt.verbose:
        print(opt)
    if not os.path.exists(opt.output_folder):
        os.makedirs(opt.output_folder)
    return opt

# @profile
def main(opt):

    if opt.verbose:
        print("------------------------")
        print("RUNNING SET UP")
        print("------------------------")
    tf.logging.set_verbosity(40)
    random.seed(0)
    Tensor = torch.cuda.FloatTensor if opt.using_cuda else torch.FloatTensor
    os.makedirs(opt.output_folder, exist_ok=True)
    if opt.LSTM:
        opt.max_cosine_distance = 1
        lstm = CombiLSTM()
        checkpoint = torch.load(opt.lstm_model)
        lstm.load_state_dict(checkpoint['state_dict'])
        if opt.using_cuda:
            lstm.cuda()
        lstm.eval()
    else:
        lstm = None
    if opt.combine_features:
        combination_model = CombiNet()
        checkpoint = torch.load(opt.combo_model)
        combination_model.load_state_dict(checkpoint['state_dict'])
        if opt.using_cuda:
            combination_model.cuda()
        combination_model.eval()
    else:
        combination_model = None
    
    dataset = SequenceDataset(opt.sequence_folder, point_cloud=opt.point_cloud, omni=opt.omni)
    dataloader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=opt.n_cpu, collate_fn = collate_fn)
    appearance_model = create_appearance_model(opt.appearance_model, opt.aligned_reid_ckpt, opt.resnet_reid_ckpt, opt.using_cuda)
    if opt.point_cloud:
        depth_model = create_depth_model(opt.depth_model, opt.depth_config_path)
    if opt.track_3d:
        tracker = Tracker_3d(appearance_model=appearance_model, cuda=opt.using_cuda, JPDA = opt.JPDA, m_best_sol=opt.m_best_sol,
                        max_age = opt.max_age, n_init=opt.n_init, assn_thresh=opt.assn_thresh,
                        matching_strategy=opt.matching_strategy,
                        gate_full_state=opt.gate_full_state,
                        kf_vel_params=(opt.pos_weight_3d, opt.pos_weight, opt.vel_weight, opt.theta_weight,
                                       opt.kf_process, opt.kf_2d_meas, opt.kf_3d_meas, opt.initial_uncertainty),
                        calib=dataset.calib,
                        dummy_node_cost_iou=opt.dummy_node_cost_iou,
                        dummy_node_cost_app=opt.dummy_node_cost_app,
                        nn_budget=opt.nn_budget,
                        use_imm=opt.use_imm,
                        uncertainty_limit=opt.uncertainty_limit,
                        gate_limit=opt.gate_limit,
                        omni=opt.omni)
    else:
        tracker = Tracker(appearance_model=appearance_model, cuda=opt.using_cuda, JPDA = opt.JPDA, m_best_sol=opt.m_best_sol,
                        max_age = opt.max_age, n_init=opt.n_init, assn_thresh=opt.assn_thresh,
                        matching_strategy=opt.matching_strategy,
                        kf_appearance_feature=opt.kf_appearance_feature,
                        gate_full_state=opt.gate_full_state,
                        kf_vel_params=(opt.pos_weight, opt.vel_weight, opt.kf_process, opt.kf_2d_meas, opt.initial_uncertainty),
                        kf_walk_params=(opt.pos_weight, opt.vel_weight, opt.kf_process, opt.kf_2d_meas, opt.initial_uncertainty),
                        calib=dataset.calib,
                        dummy_node_cost_iou=opt.dummy_node_cost_iou,
                        dummy_node_cost_app=opt.dummy_node_cost_app,
                        nn_budget=opt.nn_budget,
                        use_imm=opt.use_imm,
                        uncertainty_limit=opt.uncertainty_limit,
                        optical_flow=opt.optical_flow_initiation,
                        gate_limit=opt.gate_limit)

    results = []
    results_3d = []
    n_frames = len(dataloader)
    if opt.log_data:
        full_log = [{'tracks':[], 'detections':[], 'detections_3d':[]} for _ in range(n_frames)]
    det_matrix = None
    seq_name = os.path.split(opt.sequence_folder)[-1]

    frame_times = []
    if opt.verbose:
        print("------------------------")
        print("BEGINNING TRACKING OF SEQUENCE %s"%seq_name)
        print("------------------------")
    for frame_idx, img_path, input_img, point_cloud in tqdm(dataloader, ncols = 100, disable=not opt.verbose):
        # if frame_idx > 120:
        #     break
        # elif frame_idx < 98:
        #     continue

        if opt.log_data:
            full_log[frame_idx]['img_path'] = copy.copy(img_path)
        input_img = input_img.type(Tensor)
        if opt.reference:
            detections, object_ids, det_matrix = read_ground_truth_2d_detections(os.path.join(opt.sequence_folder,'det',opt.ref_det+'.txt'), frame_idx, det_matrix, threshold = 0, nms_threshold = opt.nms_thresh)
        elif opt.ground_truth:
            detections, object_ids, det_matrix = read_ground_truth_2d_detections(os.path.join(opt.sequence_folder,'gt','gt.txt'), frame_idx, det_matrix, nms_threshold = opt.nms_thresh)
        else:
            raise("Must specify ground truth or detections")

        # --- START OF TRACKING ---
        # start_time = time.time()
        if detections is None or len(detections)==0:
            tracker.predict()
            if opt.log_data:
                full_log[frame_idx]['predicted_tracks'] = copy.deepcopy(tracker.tracks)
            start_time = time.time()
            tracker.update(input_img, [])
        else:
            total_dets = len(detections)
            patches = get_image_patches(input_img, detections)
            appearance_features = generate_features_batched(appearance_model, patches, opt, object_ids)
            if opt.point_cloud:
                if not opt.omni:
                    point_cloud = point_cloud[point_cloud[:,2]>=0]
                if opt.fpointnet:
                    boxes_3d, valid_3d, _, scores_3d, depth_features = generate_detections_3d(depth_model, 
                                                                        detections, np.asarray(point_cloud), 
                                                                        dataset.calib, input_img.shape,
                                                                        peds='ped' in opt.ref_det or opt.omni)
                    depth_features = convert_depth_features(depth_features, valid_3d)
                else:
                    boxes_3d, valid_3d = read_ground_truth_3d_detections(os.path.join(opt.sequence_folder,'gt','3d_detections.txt'), frame_idx)        
                features, appearance_features = combine_features(appearance_features, depth_features, valid_3d, combination_model, depth_weight = opt.depth_weight)
                # boxes_3d = boxes_3d[valid_3d != -1] # Old and buggy way of handling missing box
                # detections = detections[valid_3d != -1]
                if np.any(valid_3d == -1):
                    compare_2d = True
                else:
                    compare_2d = False
                if len(boxes_3d) > 0:
                    detections_3d = []
                    for idx, box in enumerate(boxes_3d):
                        if valid_3d[idx] == -1:
                            detections_3d.append(None)
                        else:
                            detections_3d.append(np.array(box).astype(np.float32))
                else:
                    detections_3d = None
            else:
                appearance_features = [appearance_features[i] for i in range(total_dets)]
                features = [None]*len(appearance_features)
                compare_2d = True
                detections_3d = None
            detections = convert_detections(detections, features, appearance_features, detections_3d)
            tracker.predict()
            if opt.log_data:
                full_log[frame_idx]['predicted_tracks'] = copy.deepcopy(tracker.tracks)
            start_time = time.time()
            tracker.update(input_img, detections, compare_2d)

        # --- END OF TRACKING ---
        end_time = time.time()
        frame_times.append(end_time - start_time)


        if opt.log_data:
            full_tracks = copy.deepcopy(tracker.tracks)
            temp_tracks = []
            for track in full_tracks:
                bbox = track.to_tlwh(None)
                if not (bbox[0] < 0-10 or bbox[1] < 0-10 or bbox[0] + bbox[2] > input_img.shape[2]+10 or bbox[1] + bbox[3] > input_img.shape[1]+10):
                    temp_tracks.append(track)
            full_log[frame_idx]['tracks'] = temp_tracks
            full_log[frame_idx]['detections'] = copy.deepcopy(detections)

        for track in tracker.tracks:
            if opt.track_3d:
                bbox_3d = track.to_tlwh3d()
            else:
                bbox = track.to_tlwh(None)
            if bbox[0] < 0-10 or bbox[1] < 0-10 or bbox[0] + bbox[2] > input_img.shape[2]+10 or bbox[1] + bbox[3] > input_img.shape[1]+10:
                continue
            bbox[0] = max(0,bbox[0]) # Frame adjustments
            bbox[1] = max(0,bbox[1])
            bbox[2] = min(bbox[0]+bbox[2], input_img.shape[2])-bbox[0]
            bbox[3] = min(bbox[1]+bbox[3], input_img.shape[1])-bbox[1]

            track_status = 1
            if not track.is_confirmed(): # or track.time_since_update > 0:
                if opt.near_online:
                    if not track.is_confirmed():
                         track_status = 0
                    else:
                         track_status = 2
                         continue
                else:
                    continue
            if opt.near_online:
                if opt.track_3d:
                    results_3d.append([frame_idx, track.track_id, bbox_3d[0], bbox_3d[1], bbox_3d[2], bbox_3d[3], bbox_3d[4], bbox_3d[5], bbox_3d[6], track_status])
                else:
                    results.append([frame_idx, track.track_id, bbox[0], bbox[1], bbox[2], bbox[3], track_status])

                if track_status == 1: #updates 0s
                    for row_i in range(len(results)):
                        if results[row_i][1] == track.track_id:
                            results[row_i][6] = 1
                        if opt.point_cloud:
                            if results_3d[row_i][1] == track.track_id:
                                results_3d[row_i][7] = 1
            else:
                if opt.track_3d:
                    results_3d.append([frame_idx, track.track_id, bbox_3d[0], bbox_3d[1], bbox_3d[2], bbox_3d[3], bbox_3d[4], bbox_3d[5], bbox_3d[6]])
                else:
                    results.append([frame_idx, track.track_id, bbox[0], bbox[1], bbox[2], bbox[3]])
                # if opt.point_cloud:

    frame_times = np.asarray(frame_times)
    if opt.verbose:
        print("------------------------")
        print("COMPLETED TRACKING, SAVING RESULTS")
        print("------------------------")
        print('\n\n','Total Tracking Time:',np.sum(frame_times),'Average Time Per Frame:',np.mean(frame_times))

    if opt.track_3d:
        output_file_3d = os.path.join(opt.output_folder, seq_name+"_3d.txt")
        if len(results_3d) > 0:
            with open(output_file_3d, 'w+') as f:
                for row in results_3d:
                    if opt.near_online and row[9] != 1:
                        continue
                    print('%d,%d,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%.4f,1,1,1,-1' % (
                        row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8]), file=f)
    else:
        output_file = os.path.join(opt.output_folder, seq_name+".txt")
        if len(results) > 0:
            with open(output_file, 'w+') as f:
                for row in results:
                    if opt.near_online and row[6] != 1:
                        continue
                    print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,1,1,-1' % (
                        row[0], row[1], row[2], row[3], row[4], row[5]), file=f)

    if opt.log_data:
        output_file = os.path.join(opt.output_folder, seq_name+".p")
        with open(output_file, 'wb') as f:
            pickle.dump(full_log, f)

if __name__=='__main__':
    opt = parse_arguments()
    main(opt)


================================================
FILE: paper_experiments/utils/EKF.py
================================================
# vim: expandtab:ts=4:sw=4
import numpy as np
import scipy.linalg
import pdb

"""
Table for the 0.95 quantile of the chi-square distribution with N degrees of
freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
function and used as Mahalanobis gating threshold.
"""
chi2inv95 = {
    1: 3.8415,
    2: 5.9915,
    3: 7.8147,
    4: 9.4877,
    5: 11.070,
    6: 12.592,
    7: 14.067,
    8: 15.507,
    9: 16.919}

chi2inv90 = {
    1: 2.706,
    2: 4.605,
    3: 6.251,
    4: 7.779,
    5: 9.236,
    6: 10.645,
    7: 12.017,
    8: 13.363,
    9: 14.684}

chi2inv975 = {
    1: 5.025,
    2: 7.378,
    3: 9.348,
    4: 11.143,
    5: 12.833,
    6: 14.449,
    7: 16.013,
    8: 17.535,
    9: 19.023}

chi2inv10 = {
    1: .016,
    2: .221,
    3: .584,
    4: 1.064,
    5: 1.610,
    6: 2.204,
    7: 2.833,
    8: 3.490,
    9: 4.168}


chi2inv995 = {
    1: 0.0000393,
    2: 0.0100,
    3: .0717,
    4: .207,
    5: .412,
    6: .676,
    7: .989,
    8: 1.344,
    9: 1.735}


chi2inv75 = {
    1: 1.323,
    2: 2.773,
    3: 4.108,
    4: 5.385,
    5: 6.626,
    6: 7.841,
    7: 9.037,
    8: 10.22,
    9: 11.39}

def squared_mahalanobis_distance(mean, covariance, measurements):
    # cholesky factorization used to solve for 
    # z = d * inv(covariance)
    # so z is also the solution to 
    # covariance * z = d       
    d = measurements - mean

    # Note: The cholesky factorization is giving weird answers. This is marginally slower but correct
    return np.matmul(np.matmul(d, np.linalg.inv(covariance)), d.T).diagonal()

    # print("Measurements:", measurements)
    # print("Mean:", mean)
    # print("dshape:", d.shape, "d:", d)
    # print("d*inv(cov)", np.matmul(d, np.linalg.inv(covariance)))

    cholesky_factor = np.linalg.cholesky(covariance)
    z = scipy.linalg.solve_triangular(
        cholesky_factor, d.T, lower=True, check_finite=False,
        overwrite_b=True)

    squared_maha = np.sum(z * (measurements-mean).T, axis=0)
    # print("Squared maha dist:", squared_maha)
    # print("cov:", covariance)
    # print("z", z, '\n')
    return squared_maha


class EKF(object):
    """
    Generic extended kalman filter class

    """

    def __init__(self):
        pass

    def initiate(self, measurement):
        """Create track from unassociated measurement.

        Parameters
        ----------
        measurement : ndarray

        Returns
        -------
        (ndarray, ndarray)
            Returns the mean vector and covariance matrix of the new track. 
            Unobserved velocities are initialized to 0 mean.

        """
        pass


    def predict_mean(self, mean):
        # Updates predicted state from previous state (function g)
        # Calculates motion update Jacobian (Gt)
        # Returns (g(mean), Gt)
        pass

    def get_process_noise(self, mean, covariance):
        # Returns Rt the motion noise covariance
        pass
    def predict_covariance(self, mean, covariance):
        pass

    def project_mean(self, mean):
        # Measurement prediction from state (function h)
        # Calculations sensor update Jacobian (Ht)
        # Returns (h(mean), Ht)
        pass
    def project_cov(self, mean, covariance):
        pass

    def predict(self, mean, covariance, last_detection, next_to_last_detection):
        """Run Kalman filter prediction step.

        Parameters
        ----------
        mean : ndarray
            The mean vector of the object state at the previous
            time step.
        covariance : ndarray
            The covariance matrix of the object state at the
            previous time step.

        Returns
        -------
        (ndarray, ndarray)
            Returns the mean vector and covariance matrix of the predicted
            state. Unobserved velocities are initialized to 0 mean.

        """
        # Perform prediction
        covariance = self.predict_covariance(mean, covariance, last_detection, next_to_last_detection)
        mean = self.predict_mean(mean)

        return mean, covariance
    def get_innovation_cov(self, covariance):
        pass

    def project(self, mean, covariance):
        """Project state distribution to measurement space.

        Parameters
        ----------
        mean : ndarray
            The state's mean vector 
        covariance : ndarray
            The state's covariance matrix

        Returns
        -------
        (ndarray, ndarray)
            Returns the projected mean and covariance matrix of the given state
            estimate.

        """

        # Measurement uncertainty scaled by estimated height
        return self.project_mean(mean), self.project_cov(mean, covariance)

    def update(self, mean, covariance, measurement_t, marginalization=None, JPDA=False):
        """Run Kalman filter correction step.

        Parameters
        ----------
        mean : ndarray
            The predicted state's mean vector (8 dimensional).
        covariance : ndarray
            The state's covariance matrix (8x8 dimensional).
        measurement : ndarray
            The 4 dimensional measurement vector (x, y, a, h), where (x, y)
            is the center position, a the aspect ratio, and h the height of the
            bounding box.

        Returns
        -------
        (ndarray, ndarray)
            Returns the measurement-corrected state distribution.

        """
        predicted_measurement, innovation_cov  = self.project(mean, covariance)
        # cholesky factorization used to solve for kalman gain since
        # K = covariance * update_mat.T * inv(innovation_cov)
        # so K is also the solution to 
        # innovation_cov * K = covariance * update_mat.T
        try:
            chol_factor, lower = scipy.linalg.cho_factor(
                innovation_cov, lower=True, check_finite=False)
            kalman_gain = scipy.linalg.cho_solve(
                (chol_factor, lower), np.dot(covariance, self._observation_mat.T).T,
                check_finite=False).T
        except:
            # in case cholesky factorization fails, revert to standard solver
            kalman_gain = np.linalg.solve(innovation_cov, np.dot(covariance, self._observation_mat.T).T).T

        if JPDA:
            # marginalization
            innovation = np.zeros((self.ndim)) 
            cov_soft = np.zeros((self.ndim, self.ndim))

            for measurement_idx, measurement in enumerate(measurement_t):

                p_ij = marginalization[measurement_idx + 1] # + 1 for dummy
                y_ij = measurement - predicted_measurement
                innovation += y_ij * p_ij
                cov_soft += p_ij * np.outer(y_ij, y_ij)

            cov_soft = cov_soft - np.outer(innovation, innovation)

            P_star = covariance - np.linalg.multi_dot((
                kalman_gain, innovation_cov, kalman_gain.T))

            p_0 = marginalization[0]
            P_0 = p_0 * covariance + (1 - p_0) * P_star

            new_covariance = P_0 + np.linalg.multi_dot((kalman_gain, cov_soft, kalman_gain.T))
            
        else:
            innovation = measurement_t - predicted_measurement

            new_covariance = covariance - np.linalg.multi_dot((
                kalman_gain, innovation_cov, kalman_gain.T))

        new_mean = mean + np.dot(innovation, kalman_gain.T)
        return new_mean, new_covariance


================================================
FILE: paper_experiments/utils/JPDA_matching.py
================================================
# vim: expandtab:ts=4:sw=4
from __future__ import absolute_import
import numpy as np
from linear_assignment import min_marg_matching
import pdb


def get_unmatched(all_idx, matches, i, marginalization=None):
    assigned = [match[i] for match in matches]
    unmatched = set(all_idx) - set(assigned)
    if marginalization is not None:
        # from 1 for dummy node
        in_gate_dets = np.nonzero(np.sum(
            marginalization[:, 1:], axis=0))[0].tolist()
        # unmatched = [d for d in unmatched if d not in in_gate_dets] # TODO: Filter by gate?
    return list(unmatched)


class Matcher:

    def __init__(self, detections, marginalizations, confirmed_tracks,
                 matching_strategy,
                 assignment_threshold=None):
        self.detections = detections
        self.marginalizations = marginalizations
        self.confirmed_tracks = confirmed_tracks
        self.assignment_threshold = assignment_threshold
        self.detection_indices = np.arange(len(detections))
        self.matching_strategy = matching_strategy

    def match(self):
        self.get_matches()
        self.get_unmatched_tracks()
        self.get_unmatched_detections()
        return self.matches, self.unmatched_tracks, self.unmatched_detections

    def get_matches(self):

        if self.matching_strategy == "max_and_threshold":
            self.max_and_threshold_matching()
        elif self.matching_strategy == "hungarian":
            self.hungarian()
        elif self.matching_strategy == "max_match":
            self.max_match()
        elif self.matching_strategy == "none":
            self.matches = []
        else: 
            raise Exception('Unrecognized matching strategy: {}'.
                            format(self.matching_strategy))

    def get_unmatched_tracks(self):
        self.unmatched_tracks = get_unmatched(self.confirmed_tracks,
                                              self.matches, 0)

    def get_unmatched_detections(self):
        self.unmatched_detections = get_unmatched(self.detection_indices, self.matches, 1, self.marginalizations)

    def max_match(self):
        self.matches = []
        if self.marginalizations.shape[0] == 0:
            return

        detection_map = {}
        for i, track_idx in enumerate(self.confirmed_tracks):
            marginalization = self.marginalizations[i,:]
            detection_id = np.argmax(marginalization) - 1  # subtract one for dummy

            if detection_id < 0:
                continue

            if detection_id not in detection_map.keys():
                detection_map[detection_id] = track_idx
            else:
                cur_track = detection_map[detection_id]
                track_update = track_idx if self.marginalizations[track_idx, detection_id] > self.marginalizations[cur_track, detection_id] else cur_track
                detection_map[detection_id] = track_update
            threshold_p = marginalization[detection_id + 1]
            if threshold_p < self.assignment_threshold:
                continue

        for detection in detection_map.keys():
            self.matches.append((detection_map[detection], detection))

    def max_and_threshold_matching(self):

        self.matches = []
        if self.marginalizations.shape[0] == 0:
            return

        for i, track_idx in enumerate(self.confirmed_tracks):
            marginalization = self.marginalizations[i,:]
            detection_id = np.argmax(marginalization) - 1  # subtract one for dummy

            if detection_id < 0:
                continue

            threshold_p = marginalization[detection_id + 1]
            if threshold_p < self.assignment_threshold:
                continue

            self.matches.append((track_idx, detection_id))

    def hungarian(self):
        self.matches, _, _ = min_marg_matching(self.marginalizations,
                                               self.confirmed_tracks,
                                               self.assignment_threshold)
                               

================================================
FILE: paper_experiments/utils/aligned_reid_utils.py
================================================
from __future__ import print_function
import os
import os.path as osp
import pickle
from scipy import io
import datetime
import time
from contextlib import contextmanager
import numpy as np
from PIL import Image
import torch
from torch.autograd import Variable
from models.aligned_reid_model import Model as aligned_reid_model
from models.deep_sort_model import ImageEncoder as deep_sort_model
from utils.resnet_reid_utils import ResNet_Loader

def time_str(fmt=None):
  if fmt is None:
    fmt = '%Y-%m-%d_%H:%M:%S'
  return datetime.datetime.today().strftime(fmt)


def load_pickle(path):
  """Check and load pickle object.
  According to this post: https://stackoverflow.com/a/41733927, cPickle and 
  disabling garbage collector helps with loading speed."""
  assert osp.exists(path)
  # gc.disable()
  with open(path, 'rb') as f:
    ret = pickle.load(f)
  # gc.enable()
  return ret


def save_pickle(obj, path):
  """Create dir and save file."""
  may_make_dir(osp.dirname(osp.abspath(path)))
  with open(path, 'wb') as f:
    pickle.dump(obj, f, protocol=2)


def save_mat(ndarray, path):
  """Save a numpy ndarray as .mat file."""
  io.savemat(path, dict(ndarray=ndarray))


def to_scalar(vt):
  """Transform a length-1 pytorch Variable or Tensor to scalar. 
  Suppose tx is a torch Tensor with shape tx.size() = torch.Size([1]), 
  then npx = tx.cpu().numpy() has shape (1,), not 1."""
  if isinstance(vt, Variable):
    return vt.data.cpu().numpy().flatten()[0]
  if torch.is_tensor(vt):
    return vt.cpu().numpy().flatten()[0]
  raise TypeError('Input should be a variable or tensor')


def transfer_optim_state(state, device_id=-1):
  """Transfer an optimizer.state to cpu or specified gpu, which means 
  transferring tensors of the optimizer.state to specified device. 
  The modification is in place for the state.
  Args:
    state: An torch.optim.Optimizer.state
    device_id: gpu id, or -1 which means transferring to cpu
  """
  for key, val in state.items():
    if isinstance(val, dict):
      transfer_optim_state(val, device_id=device_id)
    elif isinstance(val, Variable):
      raise RuntimeError("Oops, state[{}] is a Variable!".format(key))
    elif isinstance(val, torch.nn.Parameter):
      raise RuntimeError("Oops, state[{}] is a Parameter!".format(key))
    else:
      try:
        if device_id == -1:
          state[key] = val.cpu()
        else:
          state[key] = val.cuda(device=device_id)
      except:
        pass


def may_transfer_optims(optims, device_id=-1):
  """Transfer optimizers to cpu or specified gpu, which means transferring 
  tensors of the optimizer to specified device. The modification is in place 
  for the optimizers.
  Args:
    optims: A list, which members are either torch.nn.optimizer or None.
    device_id: gpu id, or -1 which means transferring to cpu
  """
  for optim in optims:
    if isinstance(optim, torch.optim.Optimizer):
      transfer_optim_state(optim.state, device_id=device_id)


def may_transfer_modules_optims(modules_and_or_optims, device_id=-1):
  """Transfer optimizers/modules to cpu or specified gpu.
  Args:
    modules_and_or_optims: A list, which members are either torch.nn.optimizer 
      or torch.nn.Module or None.
    device_id: gpu id, or -1 which means transferring to cpu
  """
  for item in modules_and_or_optims:
    if isinstance(item, torch.optim.Optimizer):
      transfer_optim_state(item.state, device_id=device_id)
    elif isinstance(item, torch.nn.Module):
      if device_id == -1:
        item.cpu()
      else:
        item.cuda(device=device_id)
    elif item is not None:
      print('[Warning] Invalid type {}'.format(item.__class__.__name__))


class TransferVarTensor(object):
  """Return a copy of the input Variable or Tensor on specified device."""

  def __init__(self, device_id=-1):
    self.device_id = device_id

  def __call__(self, var_or_tensor):
    return var_or_tensor.cpu() if self.device_id == -1 \
      else var_or_tensor.cuda(self.device_id)


class TransferModulesOptims(object):
  """Transfer optimizers/modules to cpu or specified gpu."""

  def __init__(self, device_id=-1):
    self.device_id = device_id

  def __call__(self, modules_and_or_optims):
    may_transfer_modules_optims(modules_and_or_optims, self.device_id)


def set_devices(sys_device_ids):
  """
  It sets some GPUs to be visible and returns some wrappers to transferring 
  Variables/Tensors and Modules/Optimizers.
  Args:
    sys_device_ids: a tuple; which GPUs to use
      e.g.  sys_device_ids = (), only use cpu
            sys_device_ids = (3,), use the 4th gpu
            sys_device_ids = (0, 1, 2, 3,), use first 4 gpus
            sys_device_ids = (0, 2, 4,), use the 1st, 3rd and 5th gpus
  Returns:
    TVT: a `TransferVarTensor` callable
    TMO: a `TransferModulesOptims` callable
  """
  # Set the CUDA_VISIBLE_DEVICES environment variable
  import os
  visible_devices = ''
  for i in sys_device_ids:
    visible_devices += '{}, '.format(i)
  os.environ['CUDA_VISIBLE_DEVICES'] = visible_devices
  # Return wrappers.
  # Models and user defined Variables/Tensors would be transferred to the
  # first device.
  device_id = 0 if len(sys_device_ids) > 0 else -1
  TVT = TransferVarTensor(device_id)
  TMO = TransferModulesOptims(device_id)
  return TVT, TMO


def set_devices_for_ml(sys_device_ids):
  """This version is for mutual learning.
  
  It sets some GPUs to be visible and returns some wrappers to transferring 
  Variables/Tensors and Modules/Optimizers.
  
  Args:
    sys_device_ids: a tuple of tuples; which devices to use for each model, 
      len(sys_device_ids) should be equal to number of models. Examples:
        
      sys_device_ids = ((-1,), (-1,))
        the two models both on CPU
      sys_device_ids = ((-1,), (2,))
        the 1st model on CPU, the 2nd model on GPU 2
      sys_device_ids = ((3,),)
        the only one model on the 4th gpu 
      sys_device_ids = ((0, 1), (2, 3))
        the 1st model on GPU 0 and 1, the 2nd model on GPU 2 and 3
      sys_device_ids = ((0,), (0,))
        the two models both on GPU 0
      sys_device_ids = ((0,), (0,), (1,), (1,))
        the 1st and 2nd model on GPU 0, the 3rd and 4th model on GPU 1
  
  Returns:
    TVTs: a list of `TransferVarTensor` callables, one for one model.
    TMOs: a list of `TransferModulesOptims` callables, one for one model.
    relative_device_ids: a list of lists; `sys_device_ids` transformed to 
      relative ids; to be used in `DataParallel`
  """
  import os

  all_ids = []
  for ids in sys_device_ids:
    all_ids += ids
  unique_sys_device_ids = list(set(all_ids))
  unique_sys_device_ids.sort()
  if -1 in unique_sys_device_ids:
    unique_sys_device_ids.remove(-1)

  # Set the CUDA_VISIBLE_DEVICES environment variable

  visible_devices = ''
  for i in unique_sys_device_ids:
    visible_devices += '{}, '.format(i)
  os.environ['CUDA_VISIBLE_DEVICES'] = visible_devices

  # Return wrappers

  relative_device_ids = []
  TVTs, TMOs = [], []
  for ids in sys_device_ids:
    relative_ids = []
    for id in ids:
      if id != -1:
        id = find_index(unique_sys_device_ids, id)
      relative_ids.append(id)
    relative_device_ids.append(relative_ids)

    # Models and user defined Variables/Tensors would be transferred to the
    # first device.
    TVTs.append(TransferVarTensor(relative_ids[0]))
    TMOs.append(TransferModulesOptims(relative_ids[0]))
  return TVTs, TMOs, relative_device_ids


def load_ckpt(modules_optims, ckpt_file, load_to_cpu=True, verbose=True):
  """Load state_dict's of modules/optimizers from file.
  Args:
    modules_optims: A list, which members are either torch.nn.optimizer 
      or torch.nn.Module.
    ckpt_file: The file path.
    load_to_cpu: Boolean. Whether to transform tensors in modules/optimizers 
      to cpu type.
  """
  map_location = (lambda storage, loc: storage) if load_to_cpu else None
  ckpt = torch.load(ckpt_file, map_location=map_location)
  for m, sd in zip(modules_optims, ckpt['state_dicts']):
    if 'fc.weight' in sd:
      del sd['fc.weight']
    if 'fc.bias' in sd:  
      del sd['fc.bias']
    load_state_dict(m, sd)
  if verbose:
    print('Resume from ckpt {}, \nepoch {}, \nscores {}'.format(
      ckpt_file, ckpt['ep'], ckpt['scores']))
  return ckpt['ep'], ckpt['scores']


def save_ckpt(modules_optims, ep, scores, ckpt_file):
  """Save state_dict's of modules/optimizers to file. 
  Args:
    modules_optims: A list, which members are either torch.nn.optimizer 
      or torch.nn.Module.
    ep: the current epoch number
    scores: the performance of current model
    ckpt_file: The file path.
  Note:
    torch.save() reserves device type and id of tensors to save, so when 
    loading ckpt, you have to inform torch.load() to load these tensors to 
    cpu or your desired gpu, if you change devices.
  """
  state_dicts = [m.state_dict() for m in modules_optims]
  ckpt = dict(state_dicts=state_dicts,
              ep=ep,
              scores=scores)
  may_make_dir(osp.dirname(osp.abspath(ckpt_file)))
  torch.save(ckpt, ckpt_file)


def load_state_dict(model, src_state_dict):
  """Copy parameters and buffers from `src_state_dict` into `model` and its 
  descendants. The `src_state_dict.keys()` NEED NOT exactly match 
  `model.state_dict().keys()`. For dict key mismatch, just
  skip it; for copying error, just output warnings and proceed.

  Arguments:
    model: A torch.nn.Module object. 
    src_state_dict (dict): A dict containing parameters and persistent buffers.
  Note:
    This is modified from torch.nn.modules.module.load_state_dict(), to make
    the warnings and errors more detailed.
  """
  from torch.nn import Parameter
  dest_state_dict = model.state_dict()
  for name, param in src_state_dict.items():
    ### CHANGED HERE FOR FINE TUNING
    if name not in dest_state_dict:
      continue
    if isinstance(param, Parameter):
      # backwards compatibility for serialized parameters
      param = param.data
    try:
      dest_state_dict[name].copy_(param)
    except Exception as e:
      print("Warning: Error occurs when copying '{}': {}"
            .format(name, str(e)))

  # src_missing = set(dest_state_dict.keys()) - set(src_state_dict.keys())
  # if len(src_missing) > 0:
  #   print("Keys not found in source state_dict: ")
  #   for n in src_missing:
  #     print('\t', n)

  # dest_missing = set(src_state_dict.keys()) - set(dest_state_dict.keys())
  # if len(dest_missing) > 0:
  #   print("Keys not found in destination state_dict: ")
  #   for n in dest_missing:
  #     print('\t', n)


def is_iterable(obj):
  return hasattr(obj, '__len__')


def may_set_mode(maybe_modules, mode):
  """maybe_modules: an object or a list of objects."""
  assert mode in ['train', 'eval']
  if not is_iterable(maybe_modules):
    maybe_modules = [maybe_modules]
  for m in maybe_modules:
    if isinstance(m, torch.nn.Module):
      if mode == 'train':
        m.train()
      else:
        m.eval()


def may_make_dir(path):
  """
  Args:
    path: a dir, or result of `osp.dirname(osp.abspath(file_path))`
  Note:
    `osp.exists('')` returns `False`, while `osp.exists('.')` returns `True`!
  """
  # This clause has mistakes:
  # if path is None or '':

  if path in [None, '']:
    return
  if not osp.exists(path):
    os.makedirs(path)


class AverageMeter(object):
  """Modified from Tong Xiao's open-reid. 
  Computes and stores the average and current value"""

  def __init__(self):
    self.val = 0
    self.avg = 0
    self.sum = 0
    self.count = 0

  def reset(self):
    self.val = 0
    self.avg = 0
    self.sum = 0
    self.count = 0

  def update(self, val, n=1):
    self.val = val
    self.sum += val * n
    self.count += n
    self.avg = float(self.sum) / (self.count + 1e-20)


class RunningAverageMeter(object):
  """Computes and stores the running average and current value"""

  def __init__(self, hist=0.99):
    self.val = None
    self.avg = None
    self.hist = hist

  def reset(self):
    self.val = None
    self.avg = None

  def update(self, val):
    if self.avg is None:
      self.avg = val
    else:
      self.avg = self.avg * self.hist + val * (1 - self.hist)
    self.val = val


class RecentAverageMeter(object):
  """Stores and computes the average of recent values."""

  def __init__(self, hist_size=100):
    self.hist_size = hist_size
    self.fifo = []
    self.val = 0

  def reset(self):
    self.fifo = []
    self.val = 0

  def update(self, val):
    self.val = val
    self.fifo.append(val)
    if len(self.fifo) > self.hist_size:
      del self.fifo[0]

  @property
  def avg(self):
    assert len(self.fifo) > 0
    return float(sum(self.fifo)) / len(self.fifo)


def get_model_wrapper(model, multi_gpu):
  from torch.nn.parallel import DataParallel
  if multi_gpu:
    return DataParallel(model)
  else:
    return model


class ReDirectSTD(object):
  """Modified from Tong Xiao's `Logger` in open-reid.
  This class overwrites sys.stdout or sys.stderr, so that console logs can
  also be written to file.
  Args:
    fpath: file path
    console: one of ['stdout', 'stderr']
    immediately_visible: If `False`, the file is opened only once and closed
      after exiting. In this case, the message written to file may not be
      immediately visible (Because the file handle is occupied by the
      program?). If `True`, each writing operation of the console will
      open, write to, and close the file. If your program has tons of writing
      operations, the cost of opening and closing file may be obvious. (?)
  Usage example:
    `ReDirectSTD('stdout.txt', 'stdout', False)`
    `ReDirectSTD('stderr.txt', 'stderr', False)`
  NOTE: File will be deleted if already existing. Log dir and file is created
    lazily -- if no message is written, the dir and file will not be created.
  """

  def __init__(self, fpath=None, console='stdout', immediately_visible=False):
    import sys
    import os
    import os.path as osp

    assert console in ['stdout', 'stderr']
    self.console = sys.stdout if console == 'stdout' else sys.stderr
    self.file = fpath
    self.f = None
    self.immediately_visible = immediately_visible
    if fpath is not None:
      # Remove existing log file.
      if osp.exists(fpath):
        os.remove(fpath)

    # Overwrite
    if console == 'stdout':
      sys.stdout = self
    else:
      sys.stderr = self

  def __del__(self):
    self.close()

  def __enter__(self):
    pass

  def __exit__(self, *args):
    self.close()

  def write(self, msg):
    self.console.write(msg)
    if self.file is not None:
      may_make_dir(os.path.dirname(osp.abspath(self.file)))
      if self.immediately_visible:
        with open(self.file, 'a') as f:
          f.write(msg)
      else:
        if self.f is None:
          self.f = open(self.file, 'w')
        self.f.write(msg)

  def flush(self):
    self.console.flush()
    if self.f is not None:
      self.f.flush()
      import os
      os.fsync(self.f.fileno())

  def close(self):
    self.console.close()
    if self.f is not None:
      self.f.close()


def set_seed(seed):
  import random
  random.seed(seed)
  print('setting random-seed to {}'.format(seed))

  import numpy as np
  np.random.seed(seed)
  print('setting np-random-seed to {}'.format(seed))

  import torch
  torch.backends.cudnn.enabled = False
  print('cudnn.enabled set to {}'.format(torch.backends.cudnn.enabled))
  # set seed for CPU
  torch.manual_seed(seed)
  print('setting torch-seed to {}'.format(seed))


def print_array(array, fmt='{:.2f}', end=' '):
  """Print a 1-D tuple, list, or numpy array containing digits."""
  s = ''
  for x in array:
    s += fmt.format(float(x)) + end
  s += '\n'
  print(s)
  return s


# Great idea from https://github.com/amdegroot/ssd.pytorch
def str2bool(v):
  return v.lower() in ("yes", "true", "t", "1")


def tight_float_str(x, fmt='{:.4f}'):
  return fmt.format(x).rstrip('0').rstrip('.')


def find_index(seq, item):
  for i, x in enumerate(seq):
    if item == x:
      return i
  return -1


def adjust_lr_exp(optimizer, base_lr, ep, total_ep, start_decay_at_ep):
  """Decay exponentially in the later phase of training. All parameters in the 
  optimizer share the same learning rate.
  
  Args:
    optimizer: a pytorch `Optimizer` object
    base_lr: starting learning rate
    ep: current epoch, ep >= 1
    total_ep: total number of epochs to train
    start_decay_at_ep: start decaying at the BEGINNING of this epoch
  
  Example:
    base_lr = 2e-4
    total_ep = 300
    start_decay_at_ep = 201
    It means the learning rate starts at 2e-4 and begins decaying after 200 
    epochs. And training stops after 300 epochs.
  
  NOTE: 
    It is meant to be called at the BEGINNING of an epoch.
  """
  assert ep >= 1, "Current epoch number should be >= 1"

  if ep < start_decay_at_ep:
    return

  for g in optimizer.param_groups:
    g['lr'] = (base_lr * (0.001 ** (float(ep + 1 - start_decay_at_ep)
                                    / (total_ep + 1 - start_decay_at_ep))))
  print('=====> lr adjusted to {:.10f}'.format(g['lr']).rstrip('0'))


def adjust_lr_staircase(optimizer, base_lr, ep, decay_at_epochs, factor):
  """Multiplied by a factor at the BEGINNING of specified epochs. All 
  parameters in the optimizer share the same learning rate.
  
  Args:
    optimizer: a pytorch `Optimizer` object
    base_lr: starting learning rate
    ep: current epoch, ep >= 1
    decay_at_epochs: a list or tuple; learning rate is multiplied by a factor 
      at the BEGINNING of these epochs
    factor: a number in range (0, 1)
  
  Example:
    base_lr = 1e-3
    decay_at_epochs = [51, 101]
    factor = 0.1
    It means the learning rate starts at 1e-3 and is multiplied by 0.1 at the 
    BEGINNING of the 51'st epoch, and then further multiplied by 0.1 at the 
    BEGINNING of the 101'st epoch, then stays unchanged till the end of 
    training.
  
  NOTE: 
    It is meant to be called at the BEGINNING of an epoch.
  """
  assert ep >= 1, "Current epoch number should be >= 1"

  if ep not in decay_at_epochs:
    return

  ind = find_index(decay_at_epochs, ep)
  for g in optimizer.param_groups:
    g['lr'] = base_lr * factor ** (ind + 1)
  print('=====> lr adjusted to {:.10f}'.format(g['lr']).rstrip('0'))


@contextmanager
def measure_time(enter_msg):
  st = time.time()
  print(enter_msg)
  yield
  print('Done, {:.2f}s'.format(time.time() - st))

# @profile
def generate_features(appearance_model, patches, opt, object_ids = None):
    Tensor = torch.cuda.FloatTensor if opt.using_cuda else torch.FloatTensor
    features = []
    for i, patch in enumerate(patches):
        if patch is None or patch.nelement()==0:
            features.append(None)
            continue
        patch = patch.unsqueeze(0)
        if opt.perfect:
            feature = torch.zeros(1024)
            feature[object_ids[i]] = 1
            feature = feature.type(Tensor)
        else:
            if opt.appearance_model == 'aligned_reid':
                with torch.no_grad():
                    feature ,_ = appearance_model(patch.cuda())
                    feature = feature.squeeze(0).type(Tensor)
            elif opt.appearance_model == 'deepsort':
                patch = patch.permute(0,2,3,1).cpu().numpy()
                feature = appearance_model(patch)
                feature = feature[0]
            elif opt.appearance_model == 'resnet_reid':
                patch = patch.permute(0,2,3,1)
                feature = appearance_model.inference([patch.squeeze(0)])
                feature = feature[0][0].type(Tensor)
        features.append(feature)
    return features

# @profile
def generate_features_batched(appearance_model, patches, opt, object_ids = None):
    if opt.perfect or opt.appearance_model == 'deepsort': # Do old/slow way if perfect features or deepsort features
        return generate_features(appearance_model, patches, opt, object_ids)

    if opt.appearance_model == 'resnet_reid':
        Tensor = torch.cuda.FloatTensor if opt.using_cuda else torch.FloatTensor

        features = []
        resnet_patches = []
        for i, patch in enumerate(patches):
            if patch is None or patch.nelement()==0:
                features.append(None)
            else:
                features.append(1)
                resnet_patches.append(patch.permute(1,2,0))

        resnet_features = appearance_model.inference(resnet_patches)
        ctr = 0
        for i in range(len(features)):
            if features[i] is not None:
                features[i] = resnet_features[ctr].type(Tensor)
                ctr += 1
        return features

    elif opt.appearance_model == 'aligned_reid':
        return generate_features(appearance_model, patches, opt, object_ids)   #TODO: Fix batched appearance features. This currently gives bad features
        Tensor = torch.cuda.FloatTensor if opt.using_cuda else torch.FloatTensor
        maxx = -1
        maxy = -1
        idxs = []
        features = []
        for i, patch in enumerate(patches):
            if patch is None or patch.nelement()==0:
                continue
            maxx = max(maxx, patch.size()[1])
            maxy = max(maxy, patch.size()[2])
            idxs.append(i)

        if(maxx==-1 and maxy==-1):
            return features
        batch = torch.zeros(len(idxs),3,maxx,maxy).cuda()
        for i, idx in enumerate(idxs):
            patch = patches[idx]
            patchx = patch.size()[1]
            patchy = patch.size()[2]
            batch[i,:,:patchx,:patchy] = patch

        with torch.no_grad():
            features_torch, _ = appearance_model(batch)
            features_torch = features_torch.type(Tensor)

            i = 0
            ctr = 0
            for idx in idxs:
                while(i < idx):
                    features.append(None)
                    i+=1
                features.append(features_torch[ctr,:])
                i+=1
                ctr+=1
            while(i<len(patches)):
                features.append(None)
                i+=1

        return features
    else:
        print("Critical Error! Attempted to batch appearance features but no model was selected")

def get_image_patches(input_img, detections):
    #Generates patches and also converts detections
    patches = []

    for detection in detections:
        x1, y1, x2, y2, _, _, _ = detection
            # Rescale coordinates to original dimensions
        x1 = x1.item()
        x2 = x2.item()
        y1 = y1.item()
        y2 = y2.item()

        box_h = round(y2-y1)
        box_w = round(x2-x1)
        x1=round(x1)
        y1=round(y1)

        patch = input_img[:, y1:y1+box_h, x1:x1+box_w]
        patches.append(patch)

    return patches

def create_appearance_model(model_type, alignreid_checkpoint, resnet_reid_checkpoint=None, cuda=True):
    if model_type == 'aligned_reid':
        appearance_model = aligned_reid_model()
        map_location = (lambda storage, loc: storage)
        sd = torch.load(alignreid_checkpoint, map_location=map_location)
        load_state_dict(appearance_model, sd['state_dicts'][0])
        if cuda:
            appearance_model.cuda()
        appearance_model.eval()
    elif model_type == 'deepsort':
        appearance_model = deep_sort_model()
    elif model_type == 'resnet_reid':
        appearance_model = ResNet_Loader(resnet_reid_checkpoint)

    return appearance_model


================================================
FILE: paper_experiments/utils/assign_ids_detections.py
================================================
import numpy as np
import os
import pdb
from tqdm import tqdm
from deep_sort_utils import non_max_suppression as deepsort_nms
from visualise import draw_track
import matplotlib.pyplot as plt
from PIL import Image
from evaluate_detections import iou

def assign_detection_id(detection_path, gt_path, conf_threshold = 0, iou_threshold = 0.5):
	#expecting detections and gt in file with format as in read_detections.py
	# applies confidence thresholding
	try:
		detections = np.loadtxt(detection_path, delimiter=',')
		gt = np.loadtxt(gt_path, delimiter=',')
	except:
		return
	gt_frames = gt[:, 0]
	det_confidence = detections[:, 6]

	###CONFIDENCE THRESHOLD
	detections = detections[det_confidence > conf_threshold]
	########

	det_frames = detections[:, 0]
	det_confidence = detections[:, 6]
	gt_boxes = np.asarray(list(zip(gt[:, 2], gt[:, 3], gt[:, 4], gt[:, 5])))
	det_boxes = np.asarray(list(zip(detections[:, 2], detections[:, 3], detections[:, 4], detections[:, 5])))
	out_matrix = []
	assigned_ids = []

	for frame in np.unique(det_frames):
		frame_mask_det = det_frames == frame
		frame_mask_gt = gt_frames == frame
		gt_ids = gt[frame_mask_gt, 1]
		frame_gt_boxes = gt_boxes[frame_mask_gt]
		frame_det_boxes = det_boxes[frame_mask_det]

		for i, det_box in enumerate(frame_det_boxes):
			iou_list = np.asarray([iou(gt_box, det_box) for gt_box in frame_gt_boxes])
			iou_sorted = np.argsort(iou_list)
			positive_idx = np.where(iou_list >= iou_threshold)[0]
			if len(positive_idx)==0:
				assigned_ids.append(-1)
			else:
				assigned_ids.append(gt_ids[iou_sorted[-1]])
	assigned_ids = np.expand_dims(np.asarray(assigned_ids), 1)
	
	try:
		out_matrix = np.hstack([np.expand_dims(detections[:,0], 1), assigned_ids, detections[:,2:]])
	except:
		pdb.set_trace()
	np.savetxt(detection_path, out_matrix, delimiter=',', fmt = '%.2f')


	return

if __name__=='__main__':
	ap = []
	KITTI_root = 'data/KITTI/sequences'
	for sequence in tqdm(range(21)):
		assign_detection_id(os.path.join(KITTI_root, '%.4d'%sequence, 'det','rrc_subcnn_car_det.txt'), 
										os.path.join(KITTI_root, '%.4d'%sequence, 'gt', 'gt_car.txt'))


================================================
FILE: paper_experiments/utils/calibration.py
================================================
import numpy as np
import cv2
import os
import yaml
import torch
import pdb

class Calibration(object):
    ''' Calibration matrices and utils
        3d XYZ in <label>.txt are in rect camera coord.
        2d box xy are in image2 coord
        Points in <lidar>.bin are in Velodyne coord.
        y_image2 = P^2_rect * x_rect
        y_image2 = P^2_rect * R0_rect * Tr_velo_to_cam * x_velo
        x_ref = Tr_velo_to_cam * x_velo
        x_rect = R0_rect * x_ref
        P^2_rect = [f^2_u,  0,      c^2_u,  -f^2_u b^2_x;
                    0,      f^2_v,  c^2_v,  -f^2_v b^2_y;
                    0,      0,      1,      0]
                 = K * [1|t]
        image2 coord:
         ----> x-axis (u)
        |
        |
        v y-axis (v)
        velodyne coord:
        front x, left y, up z
        rect/ref camera coord:
        right x, down y, front z
        Ref (KITTI paper): http://www.cvlibs.net/publications/Geiger2013IJRR.pdf
        TODO(rqi): do matrix multiplication only once for each projection.
    '''
    def __init__(self, calib_filepath):

        calibs = self.read_calib_file(calib_filepath)
        # Projection matrix from rect camera coord to image2 coord
        self.P = calibs['P2'] 
        self.P = np.reshape(self.P, [3,4])
        self.P_torch = torch.from_numpy(self.P).float().cuda()

        # Rigid transform from Velodyne coord to reference camera coord
        try:
            self.V2C = calibs['Tr_velo_to_cam']
        except:
            self.V2C = calibs['Tr_velo_cam']

        self.V2C = np.reshape(self.V2C, [3,4])
        self.C2V = inverse_rigid_trans(self.V2C)
        # Rotation from reference camera coord to rect camera coord
        try:
            self.R0 = calibs['R0_rect']
        except:
            self.R0 = calibs['R_rect']
        self.R0 = np.reshape(self.R0,[3,3])
        self.R0_torch = torch.from_numpy(self.R0).float().cuda()

        RA = np.zeros((4,4))
        RA[:3,:3] = self.R0
        RA[3,3] = 1
        self.D = np.matmul(self.P,RA).T
        self.D_torch = torch.from_numpy(self.D).float().cuda()

        # Camera intrinsics and extrinsics
        self.c_u = self.P[0,2]
        self.c_v = self.P[1,2]
        self.f_u = self.P[0,0]
        self.f_v = self.P[1,1]
        self.b_x = self.P[0,3]/(-self.f_u) # relative 
        self.b_y = self.P[1,3]/(-self.f_v)

    def read_calib_file(self, filepath):
        ''' Read in a calibration file and parse into a dictionary.
        Ref: https://github.com/utiasSTARS/pykitti/blob/master/pykitti/utils.py
        '''
        data = {}
        with open(filepath, 'r') as f:
            for line in f.readlines():
                line = line.rstrip()
                if len(line)==0: continue
                key, value = line.split(' ', 1)
                if key.endswith(':'):
                    key = key[:-1]
                # The only non-float values in these files are dates, which
                # we don't care about anyway
                try:
                    data[key] = np.array([float(x) for x in value.split()])
                except ValueError:
                    pass

        return data
    
    def read_calib_from_video(self, calib_root_dir):
        ''' Read calibration for camera 2 from video calib files.
            there are calib_cam_to_cam and calib_velo_to_cam under the calib_root_dir
        '''
        data = {}
        cam2cam = self.read_calib_file(os.path.join(calib_root_dir, 'calib_cam_to_cam.txt'))
        velo2cam = self.read_calib_file(os.path.join(calib_root_dir, 'calib_velo_to_cam.txt'))
        Tr_velo_to_cam = np.zeros((3,4))
        Tr_velo_to_cam[0:3,0:3] = np.reshape(velo2cam['R'], [3,3])
        Tr_velo_to_cam[:,3] = velo2cam['T']
        data['Tr_velo_to_cam'] = np.reshape(Tr_velo_to_cam, [12])
        data['R0_rect'] = cam2cam['R_rect_00']
        data['P2'] = cam2cam['P_rect_02']
        return data

    def cart2hom(self, pts_3d):
        ''' Input: nx3 points in Cartesian
            Oupput: nx4 points in Homogeneous by appending 1
        '''
        n = pts_3d.shape[0]
        pts_3d_hom = np.hstack((pts_3d, np.ones((n,1))))
        return pts_3d_hom
 
    def cart2hom_torch(self, pts_3d):
        n = pts_3d.size()[0]
        pts_3d_hom = torch.cat((pts_3d, torch.ones(n,1).to("cuda:0")), 1)
        return pts_3d_hom

    # =========================== 
    # ------- 3d to 3d ---------- 
    # =========================== 
    def project_velo_to_ref(self, pts_3d_velo):
        pts_3d_velo = self.cart2hom(pts_3d_velo) # nx4
        return np.dot(pts_3d_velo, np.transpose(self.V2C))

    def project_ref_to_velo(self, pts_3d_ref):
        pts_3d_ref = self.cart2hom(pts_3d_ref) # nx4
        return np.dot(pts_3d_ref, np.transpose(self.C2V))

    def project_rect_to_ref(self, pts_3d_rect):
        ''' Input and Output are nx3 points '''
        return np.transpose(np.dot(np.linalg.inv(self.R0), np.transpose(pts_3d_rect)))
    
    def project_ref_to_rect(self, pts_3d_ref):
        ''' Input and Output are nx3 points '''
        return np.transpose(np.dot(self.R0, np.transpose(pts_3d_ref)))

    def project_ref_to_rect_torch(self, pts_3d_ref):
        ''' Input and Output are nx3 points '''
        return torch.transpose(torch.matmul(self.R0_torch, torch.transpose(pts_3d_ref,0,1)),0,1)
 
    def project_rect_to_velo(self, pts_3d_rect):
        ''' Input: nx3 points in rect camera coord.
            Output: nx3 points in velodyne coord.
        ''' 
        pts_3d_ref = self.project_rect_to_ref(pts_3d_rect)
        return self.project_ref_to_velo(pts_3d_ref)

    def project_velo_to_rect(self, pts_3d_velo):
        pts_3d_ref = self.project_velo_to_ref(pts_3d_velo)
        return self.project_ref_to_rect(pts_3d_ref)

    # =========================== 
    # ------- 3d to 2d ---------- 
    # =========================== 
    def project_rect_to_image(self, pts_3d_rect):
        ''' Input: nx3 points in rect camera coord.
            Output: nx2 points in image2 coord.
        '''
        pts_3d_rect = self.cart2hom(pts_3d_rect)
        pts_2d = np.dot(pts_3d_rect, np.transpose(self.P)) # nx3
        pts_2d[:,0] /= pts_2d[:,2]
        pts_2d[:,1] /= pts_2d[:,2]
        return pts_2d[:,0:2]

    def project_rect_to_image_torch(self, pts_3d_rect):
        ''' Input: nx3 points in rect camera coord.
            Output: nx2 points in image2 coord.
        '''
        pts_3d_rect = self.cart2hom_torch(pts_3d_rect)
        pts_2d = torch.matmul(pts_3d_rect, torch.transpose(self.P_torch,0,1)) # nx3
        pts_2d[:,0] /= pts_2d[:,2]
        pts_2d[:,1] /= pts_2d[:,2]
        return pts_2d[:,0:2]

    def project_ref_to_image_torch(self, pts_3d_ref):
        ''' Input: nx3 points in ref camera coord.
            Output: nx2 points in image2 coord.
        '''
        pts_3d_ref = self.cart2hom_torch(pts_3d_ref)
        pts_2d = torch.matmul(pts_3d_ref, self.D_torch) # nx3
        pts_2d[:,0] /= pts_2d[:,2]
        pts_2d[:,1] /= pts_2d[:,2]
        return pts_2d[:,0:2]

    def project_velo_to_image(self, pts_3d_velo):
        ''' Input: nx3 points in velodyne coord.
            Output: nx2 points in image2 coord.
        '''
        pts_3d_rect = self.project_velo_to_rect(pts_3d_velo)
        return self.project_rect_to_image(pts_3d_rect)

    # =========================== 
    # ------- 2d to 3d ---------- 
    # =========================== 
    def project_image_to_rect(self, uv_depth):
        ''' Input: nx3 first two channels are uv, 3rd channel
                   is depth in rect camera coord.
            Output: nx3 points in rect camera coord.
        '''
        n = uv_depth.shape[0]
        x = ((uv_depth[:,0]-self.c_u)*uv_depth[:,2])/self.f_u + self.b_x
        y = ((uv_depth[:,1]-self.c_v)*uv_depth[:,2])/self.f_v + self.b_y
        pts_3d_rect = np.zeros((n,3))
        pts_3d_rect[:,0] = x
        pts_3d_rect[:,1] = y
        pts_3d_rect[:,2] = uv_depth[:,2]
        return pts_3d_rect

    def project_image_to_velo(self, uv_depth):
        pts_3d_rect = self.project_image_to_rect(uv_depth)
        return self.project_rect_to_velo(pts_3d_rect)

def rotx(t):
    ''' 3D Rotation about the x-axis. '''
    c = np.cos(t)
    s = np.sin(t)
    return np.array([[1,  0,  0],
                     [0,  c, -s],
                     [0,  s,  c]])


def roty(t):
    ''' Rotation about the y-axis. '''
    c = np.cos(t)
    s = np.sin(t)
    return np.array([[c,  0,  s],
                     [0,  1,  0],
                     [-s, 0,  c]])


def rotz(t):
    ''' Rotation about the z-axis. '''
    c = np.cos(t)
    s = np.sin(t)
    return np.array([[c, -s,  0],
                     [s,  c,  0],
                     [0,  0,  1]])


def transform_from_rot_trans(R, t):
    ''' Transforation matrix from rotation matrix and translation vector. '''
    R = R.reshape(3, 3)
    t = t.reshape(3, 1)
    return np.vstack((np.hstack([R, t]), [0, 0, 0, 1]))


def inverse_rigid_trans(Tr):
    ''' Inverse a rigid body transform matrix (3x4 as [R|t])
        [R'|-R't; 0|1]
    '''
    inv_Tr = np.zeros_like(Tr) # 3x4
    inv_Tr[0:3,0:3] = np.transpose(Tr[0:3,0:3])
    inv_Tr[0:3,3] = np.dot(-np.transpose(Tr[0:3,0:3]), Tr[0:3,3])
    return inv_Tr

def read_label(label_filename):
    lines = [line.rstrip() for line in open(label_filename)]
    objects = [Object3d(line) for line in lines]
    return objects

def load_image(img_filename):
    return cv2.imread(img_filename)

def load_velo_scan(velo_filename):
    scan = np.fromfile(velo_filename, dtype=np.float32)
    scan = scan.reshape((-1, 4))
    return scan

def project_to_image(pts_3d, P):
    ''' Project 3d points to image plane.
    Usage: pts_2d = projectToImage(pts_3d, P)
      input: pts_3d: nx3 matrix
             P:      3x4 projection matrix
      output: pts_2d: nx2 matrix
      P(3x4) dot pts_3d_extended(4xn) = projected_pts_2d(3xn)
      => normalize projected_pts_2d(2xn)
      <=> pts_3d_extended(nx4) dot P'(4x3) = projected_pts_2d(nx3)
          => normalize projected_pts_2d(nx2)
    '''
    n = pts_3d.shape[0]
    pts_3d_extend = np.hstack((pts_3d, np.ones((n,1))))
    print(('pts_3d_extend shape: ', pts_3d_extend.shape))
    pts_2d = np.dot(pts_3d_extend, np.transpose(P)) # nx3
    pts_2d[:,0] /= pts_2d[:,2]
    pts_2d[:,1] /= pts_2d[:,2]
    return pts_2d[:,0:2]


def compute_box_3d(obj, P):
    ''' Takes an object and a projection matrix (P) and projects the 3d
        bounding box into the image plane.
        Returns:
            corners_2d: (8,2) array in left image coord.
            corners_3d: (8,3) array in in rect camera coord.
    '''
    # compute rotational matrix around yaw axis
    R = roty(obj.ry)    

    # 3d bounding box dimensions
    l = obj.l;
    w = obj.w;
    h = obj.h;
    
    # 3d bounding box corners
    x_corners = [l/2,l/2,-l/2,-l/2,l/2,l/2,-l/2,-l/2];
    y_corners = [0,0,0,0,-h,-h,-h,-h];
    z_corners = [w/2,-w/2,-w/2,w/2,w/2,-w/2,-w/2,w/2];
    
    # rotate and translate 3d bounding box
    corners_3d = np.dot(R, np.vstack([x_corners,y_corners,z_corners]))
    #print corners_3d.shape
    corners_3d[0,:] = corners_3d[0,:] + obj.t[0];
    corners_3d[1,:] = corners_3d[1,:] + obj.t[1];
    corners_3d[2,:] = corners_3d[2,:] + obj.t[2];
    #print 'cornsers_3d: ', corners_3d 
    # only draw 3d bounding box for objs in front of the camera
    if np.any(corners_3d[2,:]<0.1):
        corners_2d = None
        return corners_2d, np.transpose(corners_3d)
    
    # project the 3d bounding box into the image plane
    corners_2d = project_to_image(np.transpose(corners_3d), P);
    #print 'corners_2d: ', corners_2d
    return corners_2d, np.transpose(corners_3d)


def compute_orientation_3d(obj, P):
    ''' Takes an object and a projection matrix (P) and projects the 3d
        object orientation vector into the image plane.
        Returns:
            orientation_2d: (2,2) array in left image coord.
            orientation_3d: (2,3) array in in rect camera coord.
    '''
    
    # compute rotational matrix around yaw axis
    R = roty(obj.ry)
   
    # orientation in object coordinate system
    orientation_3d = np.array([[0.0, obj.l],[0,0],[0,0]])
    
    # rotate and translate in camera coordinate system, project in image
    orientation_3d = np.dot(R, orientation_3d)
    orientation_3d[0,:] = orientation_3d[0,:] + obj.t[0]
    orientation_3d[1,:] = orientation_3d[1,:] + obj.t[1]
    orientation_3d[2,:] = orientation_3d[2,:] + obj.t[2]
    
    # vector behind image plane?
    if np.any(orientation_3d[2,:]<0.1):
      orientation_2d = None
      return orientation_2d, np.transpose(orientation_3d)
    
    # project orientation into the image plane
    orientation_2d = project_to_image(np.transpose(orientation_3d), P);
    return orientation_2d, np.transpose(orientation_3d)

def draw_projected_box3d(image, qs, color=(255,255,255), thickness=2):
    ''' Draw 3d bounding box in image
        qs: (8,3) array of vertices for the 3d box in following order:
            1 -------- 0
           /|         /|
          2 -------- 3 .
          | |        | |
          . 5 -------- 4
          |/         |/
          6 -------- 7
    '''
    qs = qs.astype(np.int32)
    for k in range(0,4):
       # Ref: http://docs.enthought.com/mayavi/mayavi/auto/mlab_helper_functions.html
       i,j=k,(k+1)%4
       # use LINE_AA for opencv3
       cv2.line(image, (qs[i,0],qs[i,1]), (qs[j,0],qs[j,1]), color, thickness, cv2.CV_AA)

       i,j=k+4,(k+1)%4 + 4
       cv2.line(image, (qs[i,0],qs[i,1]), (qs[j,0],qs[j,1]), color, thickness, cv2.CV_AA)

       i,j=k,k+4
       cv2.line(image, (qs[i,0],qs[i,1]), (qs[j,0],qs[j,1]), color, thickness, cv2.CV_AA)
    return image


class OmniCalibration(Calibration):
    def __init__(self, calib_folder):

        global_config = os.path.join(calib_folder, 'defaults.yaml')
        camera_config = os.path.join(calib_folder, 'cameras.yaml')

        with open(global_config) as f:
            self.global_config_dict = yaml.safe_load(f)
        
        with open(camera_config) as f:
            self.camera_config_dict = yaml.safe_load(f)
        
        self.median_focal_length_y = self.calculate_median_param_value(param = 'f_y')
        self.median_optical_center_y = self.calculate_median_param_value(param = 't_y')
        # image shape is (color channels, height, width)
        self.img_shape = 3, self.global_config_dict['image']['height'], self.global_config_dict['image']['width']
    
    def project_ref_to_image_torch(self, pointcloud):

        theta = (torch.atan2(pointcloud[:, 0], pointcloud[:, 2]) + np.pi) %(2*np.pi)
        horizontal_fraction = theta/ (2*np.pi)
        x = (horizontal_fraction * self.img_shape[2]) % self.img_shape[2]
        y = -self.median_focal_length_y*(pointcloud[:, 1]*torch.cos(theta)/pointcloud[:, 2]) + self.median_optical_center_y
        pts_2d = torch.stack([x, y], dim=1)
        
        return pts_2d


    def project_image_to_rect(self, uvdepth):

        theta = (uvdepth[:, 0]/self.img_shape[2])*2*np.pi - np.pi
        z = uvdepth[:, 2]*np.cos(theta)
        x = uvdepth[:, 2]*np.sin(theta)
        y = z*-1*(uvdepth[:, 1] - self.median_optical_center_y)/(self.median_focal_length_y * np.cos(theta))

        return np.stack([x,y,z], axis=1)

    def project_velo_to_ref(self, pointcloud):

        pointcloud = pointcloud[:, [1, 2, 0]]
        pointcloud[:, 0] *= -1
        pointcloud[:, 1] *= -1

        return pointcloud

    def move_lidar_to_camera_frame(self, pointcloud, upper = True):
        # assumed only rotation about z axis
        
        if upper:
            pointcloud -= self.global_config_dict['lidar_upper_to_rgb']['translation']
            theta = self.global_config_dict['lidar_upper_to_rgb']['rotation'][-1]
        else:
            pointcloud -= self.global_config_dict['lidar_lower_to_rgb']['translation']
            theta = self.global_config_dict['lidar_lower_to_rgb']['rotation'][-1]

        rotation_matrix = torch.Tensor([[np.cos(theta), np.sin(theta)], [-np.sin(theta), np.cos(theta)]]).type(pointcloud.type())
        pointcloud[:, :2] = torch.matmul(rotation_matrix.unsqueeze(0), pointcloud[:, :2].transpose(0,1)).transpose(0,1)
        return pointcloud
        
    
    def calculate_median_param_value(self, param):
        if param=='f_y':
            idx = 4
        elif param == 'f_x':
            idx = 0
        elif param == 't_y':
            idx = 5
        elif param == 't_x':
            idx = 2
        elif param == 's':
            idx = 1
        else:
            raise 'Wrong parameter!'

        omni_camera = ['sensor_0', 'sensor_2', 'sensor_4', 'sensor_6', 'sensor_8']
        parameter_list = []
        for sensor, camera_params in self.camera_config_dict['cameras'].items():
            if sensor not in omni_camera:
                continue
            K_matrix = camera_params['K'].split(' ')
            parameter_list.append(float(K_matrix[idx]))
        return np.median(parameter_list)


================================================
FILE: paper_experiments/utils/combine_and_process_detections.py
================================================
import os
from os import listdir
from os.path import isfile, join

#root = "/cvgl2/u/mihirp/depth_tracking/data/JRDB/sequences/"
#root = "/cvgl2/u/mihirp/depth_tracking/data/JRDB/test_sequences/"
#root = "/cvgl2/u/mihirp/depth_tracking/data/KITTI/sequences/"
root = "/cvgl2/u/mihirp/depth_tracking/data/KITTI/test_sequences/"

file_name = "new_subcnn_faster_rcnn"
# file_name = "detectron2_x101"

def threshold(filename, thresh, min, max):
  detections = []
  with open(filename, 'r') as readfile:
    dets = readfile.read().split('\n')
    dets = dets[:len(dets)-1] #filter out last line which is just \n
    for det in dets:
      parsedet = det.split(' ')
      score = float(parsedet[len(parsedet)-1])
      parsedet[len(parsedet)-1] = str((float(parsedet[len(parsedet)-1]) - thresh) / (max - thresh))
      if(score > thresh):
        detections.append(parsedet)
  return detections

for seq in sorted(os.listdir(root)): #21 for normal, 29 for testing
  path = os.path.join(root,seq,'det')

  with open(os.path.join(path,file_name+'_raw.txt'), 'w') as f:
    pred_dets = []
    #pred_dets.append(threshold(os.path.join(path,'rrc.txt'), .05, 0, 1))
    pred_dets.append(threshold(os.path.join(path,'subcnn.txt'), .8, 0, 1))
    #pred_dets.append(threshold(os.path.join(path,'faster_rcnn.txt'), .99, 0, 1))
    #pred_dets.append(threshold(os.path.join(path,'detectron2_x101.txt'), .9, 0, 1))
    #pred_dets.append(threshold(path+'regionlets.txt', 5, -5, 25))
    if len(pred_dets[0]) == 0:
      continue
    max_frames = int((pred_dets[0])[len(pred_dets[0])-1][0])

    det_ctrs = [0,0,0,0]
    for frame in range(max_frames+1):
      frame_num = 0
      for j in range(1): #TODO: Update to number of detectors used
        while det_ctrs[j] < len(pred_dets[j]) and int( (pred_dets[j])[det_ctrs[j]][0]) == frame:
          (pred_dets[j])[det_ctrs[j]][1] = str(frame_num)
          frame_num+=1
          f.write( " ".join( (pred_dets[j])[det_ctrs[j]] )+'\n')
          det_ctrs[j]+=1

  # Counts max/min of scores
  for ctr, pred_det in enumerate(pred_dets):
    minval = 1000
    maxval = 0
    for detection in pred_det:
      score = detection[len(detection)-1]
      if float(score)>maxval:
        maxval = float(score)
      if float(score)<minval:
        minval = float(score)
    # print("Detector: "+str(ctr)+" Max: "+str(maxval))
    # print("Detector: "+str(ctr)+" Min: "+str(minval))

  with open(os.path.join(path,file_name+'_raw.txt'), 'r') as f:
    lines = f.readlines()

  with open(os.path.join(path, file_name+'_car.txt'), 'w') as fcar:
    with open(os.path.join(path, file_name+'_ped.txt'), 'w') as fped:
      for line in lines:
        if len(line) < 5:
          continue
        vals = line.split(' ')
        min_x = float(vals[6])
        min_y = float(vals[7])
        max_x = float(vals[8])
        max_y = float(vals[9])
        score = vals[-1]
        out_line = vals[0]+',0,'+str(min_x)+','+str(min_y)+','+str(max_x-min_x)+','+str(max_y-min_y)+','+str(score)
        if vals[2] == 'Car':
          fcar.write(out_line)
        elif vals[2] == 'Pedestrian':
          fped.write(out_line)


================================================
FILE: paper_experiments/utils/dataset.py
================================================
import glob
import os
import pdb
import random
import sys
from itertools import compress

import numpy as np
import torch
import torchvision.transforms as transforms

from PIL import Image
from skimage.transform import resize
from torch.utils.data import Dataset
from tqdm import tqdm

from models.pointnet_model import PointNet
from .calibration import Calibration, OmniCalibration
from .read_detections import (read_ground_truth_2d_detections,
                              read_ground_truth_3d_detections)


class SequenceDataset(Dataset):
    def __init__(self, folder_path, point_cloud=False, cuda=False, omni=False):

        self.files = sorted(glob.glob('%s/imgs/*.*' % folder_path), key = lambda x: int(os.path.splitext(os.path.basename(x))[0]))
        self.files = [file for file in self.files if is_image_file(file)]
        self.point_cloud = point_cloud
        self.seq_name = os.path.split(folder_path)[-1]
        self.omni = omni
        if point_cloud:
            if self.omni:
                calib_folder = os.path.join(folder_path, 'calib')
                self.calib = OmniCalibration(calib_folder)
            else:
                self.calib_file = os.path.join(folder_path, 'calib', self.seq_name+'.txt')
                self.calib = Calibration(self.calib_file)
            self.depth_files = sorted(glob.glob('%s/*.*' % os.path.join(folder_path, 'depth')))
            self.depth_files = [file for file in self.depth_files if file.split('.')[-1]=='bin']
        else:
            self.calib = None
        self.cuda = cuda

    def __getitem__(self, index):


        img_path = self.files[index % len(self.files)]
        if self.point_cloud:
            depth_path = self.depth_files[index % len(self.depth_files)]
        # Extract image
        img = np.array(Image.open(img_path))

        # Channels-first
        input_img = np.transpose(img, (2, 0, 1))/255
        # As pytorch tensor
        input_img = torch.from_numpy(input_img).float()
        if self.cuda:
            input_img = input_img.cuda()
        frame_idx = int(os.path.basename(img_path)[:-4])
        if self.point_cloud:
            #velodyne coordinates and image coordinates are different.
            #velo_x = camera_z
            #velo_y = -camera_x
            #velo_z = -camera_y
            if self.omni:
                scan = np.load(depth_path)
            else:
                scan = np.fromfile(depth_path, dtype=np.float32)
            scan = scan.reshape((-1, 4))
            scan[:, :3] = self.calib.project_velo_to_ref(scan[:, :3])
            return frame_idx, img_path, input_img, scan
        else:
            return frame_idx, img_path, input_img, -1

    def __len__(self):
        return len(self.files)

def is_image_file(file):
    IMG_FILE_FORMATS = ['jpg', 'png', 'tif', 'bmp', 'jpeg']
    if file.split('.')[-1] in IMG_FILE_FORMATS:
        return True
    else:
        return False

class TripletDataset(Dataset):
    
    def __init__(self, feature_path, num_negative_samples = 100, cuda = True, sequence = False, test = False):
        if test:
            feature_file = os.path.join(feature_path, 'test_features.npy')
        else:
            feature_file = os.path.join(feature_path, 'features.npy')
        
        feature_array = np.load(feature_file)

        # feature_array = feature_array[:500]
        # feature_array = np.vstack([feature_array[146], feature_array[148], feature_array[149],feature_array[32], feature_array[10], feature_array[7],feature_array[9],feature_array[31], feature_array[8]])
        # feature_array = np.vstack([feature_array[10], feature_array[11],feature_array[9],feature_array[8],feature_array[249],feature_array[247]])
        # self.ids = feature_array[:, 0]
        # if not test:
        #     feature_array = feature_array[self.ids < 5]
        self.ids = feature_array[:, 0].astype(np.float32).astype(np.int32)
        self.unique_ids = np.unique(self.ids)
        self.frames = feature_array[:, 2].astype(np.float32).astype(np.int32)
        self.features = feature_array[:, 3:].astype(np.float32)
        self.sequences = feature_array[:, 1].astype(np.float32).astype(np.int32)
        self.sequence = sequence
        if self.sequence:
            self.size = self.unique_ids.size
        else:
            self.size = self.ids.size
        self.num_negative_samples = num_negative_samples
        self.tensor_type = torch.cuda.FloatTensor if cuda else torch.FloatTensor


    def __getitem__(self, index):
        
        if self.sequence:
            object_id = self.unique_ids[index]
            positive_ids = self.ids == object_id
            object_sequence = self.sequences[positive_ids][0]
            object_frames = self.frames[positive_ids]

            positive_sequence = self.features[positive_ids]
            positive_sequence = torch.Tensor(positive_sequence).type(self.tensor_type)
            negative_sequence = []
            for frame in object_frames[1:]:
                idx = np.logical_and(self.sequences==object_sequence, self.frames==frame)
                idx = np.logical_and(idx, self.ids!=object_id)
                if np.sum(idx)==0:
                    negative_sequence.append(None)
                else:
                    negative_sequence.append(torch.Tensor(self.features[idx]).type(self.tensor_type))
            negative_ids = np.random.choice(len(self.ids), size = self.num_negative_samples, replace = False)
            negative_ids = negative_ids[self.ids[negative_ids] != object_id]
            negative_features = self.features[negative_ids]
            negative_features = torch.Tensor(negative_features).type(self.tensor_type)
            
            return positive_sequence, negative_sequence, negative_features
        else:
            object_id = self.ids[index]
            anchor_feature = self.features[index]
            anchor_feature = torch.Tensor(anchor_feature).type(self.tensor_type)

            positive_ids = np.where(self.ids == object_id)[0]
            positive_feature = self.features[random.choice(positive_ids)]
            positive_feature = torch.Tensor(positive_feature).type(self.tensor_type)

            negative_ids = np.random.choice(len(self.ids), size = self.num_negative_samples, replace = False)
            negative_ids = negative_ids[self.ids[negative_ids] != object_id]
            negative_features = self.features[negative_ids]
            negative_features = torch.Tensor(negative_features).type(self.tensor_type)


            return anchor_feature, positive_feature, negative_features


    def __len__(self):
        return self.size

class STIPDataset(Dataset):
    def __init__(self, folder_path, img_size=416, point_cloud = False, pad = False):

        self.files = sorted(glob.glob('%s/imgs/*/*.*' % folder_path), key = lambda x: int(os.path.splitext(os.path.basename(x))[0]))
        self.files = [file for file in self.files if is_image_file(file)]
        self.img_shape = (img_size, img_size)
        self.pad = pad
        self.seq_name = os.path.split(folder_path)[-1]

    def __getitem__(self, index):


        img_path = self.files[index % len(self.files)]

        # Extract image
        img = np.array(Image.open(img_path))
        h, w, _ = img.shape
        dim_diff = np.abs(h - w)
        # Upper (left) and lower (right) padding
        pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2
        # Determine padding
        pad = ((pad1, pad2), (0, 0), (0, 0)) if h <= w else ((0, 0), (pad1, pad2), (0, 0))
        # Add padding
        if self.pad:
            img = np.pad(img, pad, 'constant', constant_values=127.5) / 255.
            # Resize and normalize
            img = resize(img, (*self.img_shape, 3), mode='reflect', anti_aliasing = True)
        # Channels-first
        input_img = np.transpose(img, (2, 0, 1))/255
        # As pytorch tensor
        input_img = torch.from_numpy(input_img).float()

        return img_path, input_img, -1

    def __len__(self):
        return len(self.files)

def collate_fn(inputs):
    #ASSUMES BATCH SIZE IS ALWAYS 1
    return inputs[0]

================================================
FILE: paper_experiments/utils/deep_sort_utils.py
================================================
# vim: expandtab:ts=4:sw=4
import numpy as np
import cv2


def non_max_suppression(boxes, max_bbox_overlap, scores=None):
    """Suppress overlapping detections.

    Original code from [1]_ has been adapted to include confidence score.

    .. [1] http://www.pyimagesearch.com/2015/02/16/
           faster-non-maximum-suppression-python/

    Examples
    --------

        >>> boxes = [d.roi for d in detections]
        >>> scores = [d.confidence for d in detections]
        >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores)
        >>> detections = [detections[i] for i in indices]

    Parameters
    ----------
    boxes : ndarray
        Array of ROIs (x, y, width, height).
    max_bbox_overlap : float
        ROIs that overlap more than this values are suppressed.
    scores : Optional[array_like]
        Detector confidence score.

    Returns
    -------
    List[int]
        Returns indices of detections that have survived non-maxima suppression.

    """
    if len(boxes) == 0:
        return []

    boxes = boxes.astype(np.float)
    pick = []

    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2] + boxes[:, 0]
    y2 = boxes[:, 3] + boxes[:, 1]

    area = (x2 - x1 + 1) * (y2 - y1 + 1)
    if scores is not None:
        idxs = np.argsort(scores)
    else:
        idxs = np.argsort(y2)

    while len(idxs) > 0:
        last = len(idxs) - 1
        i = idxs[last]
        pick.append(i)

        xx1 = np.maximum(x1[i], x1[idxs[:last]])
        yy1 = np.maximum(y1[i], y1[idxs[:last]])
        xx2 = np.minimum(x2[i], x2[idxs[:last]])
        yy2 = np.minimum(y2[i], y2[idxs[:last]])

        w = np.maximum(0, xx2 - xx1 + 1)
        h = np.maximum(0, yy2 - yy1 + 1)

        #overlap = (w * h) / (area[idxs[:last]]) # + area[idxs[last:last+1]] - w * h) #changed from deepsort to sum both areas
        overlap = (w * h) / (area[idxs[:last]] + area[idxs[last:last+1]] - w * h) #changed from deepsort to sum both areas
        threshold = np.where(y2[i]-y1[i] < 50, max_bbox_overlap-0.1, max_bbox_overlap)
        idxs = np.delete(
            idxs, np.concatenate(
                ([last], np.where(overlap > threshold)[0])))

    return pick


================================================
FILE: paper_experiments/utils/detection.py
================================================
# vim: expandtab:ts=4:sw=4
import numpy as np


class Detection(object):
    """
    This class represents a bounding box detection in a single image.

    Parameters
    ----------
    tlwh : array_like
        Bounding box in format `(x, y, w, h)`.
    confidence : float
        Detector confidence score.
    feature : array_like
        A feature vector that describes the object contained in this image.

    Attributes
    ----------
    tlwh : ndarray
        Bounding box in format `(top left x, top left y, width, height)`.
    confidence : ndarray
        Detector confidence score.
    feature : ndarray | NoneType
        A feature vector that describes the object contained in this image.

    """

    def __init__(self, tlwh, box_3d, confidence, appearance_feature, feature):
        self.tlwh = np.asarray(tlwh, dtype=np.float)
        # Note that detections format is centre of 3D box and dimensions (not bottom face)
        self.box_3d = box_3d
        if box_3d is not None:
            self.box_3d[1] -= box_3d[4]/2
            self.box_3d = np.asarray(box_3d, dtype=np.float32)
        self.confidence = float(confidence)
        self.appearance_feature = np.asarray(appearance_feature, dtype=np.float32)
        if feature is not None:
            self.feature = np.asarray(feature, dtype = np.float32)
        else:
            self.feature = None


    def to_tlbr(self):
        """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
        `(top left, bottom right)`.
        """
        ret = self.tlwh.copy()
        ret[2:] += ret[:2]
        return ret

    def to_xyah(self):
        """Convert bounding box to format `(center x, center y, aspect ratio,
        height)`, where the aspect ratio is `width / height`.
        """
        ret = self.tlwh.copy()
        ret[:2] += ret[2:] / 2
        ret[2] /= ret[3]
        return ret
    def to_xywh(self):
        """Convert bounding box to format `(center x, center y, aspect ratio,
        height)`, where the aspect ratio is `width / height`.
        """
        ret = self.tlwh.copy()
        ret[:2] += ret[2:] / 2
        return ret
    def get_3d_distance(self):
        if self.box_3d is not None:
            return np.sqrt(self.box_3d[0]**2 + self.box_3d[2]**2)

================================================
FILE: paper_experiments/utils/double_measurement_kf.py
================================================
import random
import numpy as np
import scipy.linalg
import EKF
import pdb
import kf_2d
import os
import pickle
import torch
from copy import deepcopy
import matplotlib.pyplot as plt
from read_detections import read_ground_truth_3d_detections, read_ground_truth_2d_detections
np.set_printoptions(precision=4, suppress=True)
from calibration import Calibration
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from evaluation.distances import iou_matrix

class KF_3D(kf_2d.KalmanFilter2D):
    """
    3D Kalman Filter that tracks objets in 3D space

        The 8-dimensional state space

            x, y, z, l, h, w, theta, vx, vz

        contains the bounding box center position (x, z), the heading angle theta, the
        box dimensions l, w, h, and the x and z velocities.

        Object motion follows a constant velocity model. The bounding box location
        (x, y) is taken as direct observation of the state space (linear
        observation model).
    """
    def __init__(self, calib, pos_weight_3d, pos_weight, velocity_weight, theta_weight, 
                    std_process, std_measurement_2d, std_measurement_3d, 
                    initial_uncertainty, omni = True, debug=True):
        self.ndim, self.dt = 9, 1.

        # Create Kalman filter model matrices.
        # Motion model is constant velocity, i.e. x = x + Vx*dt
        self._motion_mat = np.eye(self.ndim, self.ndim)
        self._motion_mat[0, 7] = self.dt
        self._motion_mat[2, 8] = self.dt
        # Sensor model is direct observation, i.e. x = x
        self._observation_mat = np.eye(self.ndim - 2, self.ndim)
        if omni:
            self.x_constant = calib.img_shape[2]/(2*np.pi)
            self.y_constant = calib.median_focal_length_y
            self.calib = calib
        else:
            self.projection_matrix = calib.P

        self.omni = omni
        self._std_weight_pos_3d = pos_weight_3d
        self._std_weight_pos = pos_weight
        self._std_weight_vel = velocity_weight
        self._std_weight_theta= theta_weight

        self._std_weight_process = std_process
        self._initial_uncertainty = initial_uncertainty
        self._std_weight_measurement_2d = std_measurement_2d
        self._std_weight_measurement_3d = std_measurement_3d
        self.debug = debug

    def initiate(self, measurement_3d):

        mean_pos = measurement_3d
        mean_vel = np.zeros((2,))
        mean = np.r_[mean_pos, mean_vel]
        std = [
                self._std_weight_pos_3d * measurement_3d[0],
                self._std_weight_pos_3d * measurement_3d[1],
                self._std_weight_pos_3d * measurement_3d[2],
                self._std_weight_pos_3d * measurement_3d[3],
                self._std_weight_pos_3d * measurement_3d[4],
                self._std_weight_pos_3d * measurement_3d[5],
                self._std_weight_theta,
                self._std_weight_vel,
                self._std_weight_vel]
        covariance = np.diag(np.square(std))*(self._initial_uncertainty*self._std_weight_process)**2
        
        return mean, covariance
        
    def get_process_noise(self, mean):
        std_pos = [
            self._std_weight_pos_3d, # x
            self._std_weight_pos_3d, # y
            self._std_weight_pos_3d, # z
            self._std_weight_pos_3d, # l
            self._std_weight_pos_3d, # h            
            self._std_weight_pos_3d, # w
            self._std_weight_theta # theta
            ]
        std_vel = [
            self._std_weight_vel, # x
            self._std_weight_vel, # z
            ]
        self._motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
        motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))*self._std_weight_process**2
        return motion_cov
    
    def get_2d_measurement_noise(self, measurement_2d):
        # Returns Qt the sensor noise covariance
                
        # Measurement uncertainty scaled by estimated height
        std = [
                self._std_weight_pos*measurement_2d[2],
                self._std_weight_pos*measurement_2d[3],
                self._std_weight_pos*measurement_2d[2],
                self._std_weight_pos*measurement_2d[3]]
        innovation_cov = np.diag(np.square(std))*self._std_weight_measurement_2d**2
        return innovation_cov
    
    def get_3d_measurement_noise(self, measurement):
        # Returns Qt the sensor noise covariance
                
        # Measurement uncertainty scaled by estimated height
        std = [
            self._std_weight_pos_3d * measurement[0], # x
            self._std_weight_pos_3d * measurement[1], # y
            self._std_weight_pos_3d * measurement[2], # z
            self._std_weight_pos_3d * measurement[3], # l
            self._std_weight_pos_3d * measurement[4], # h
            self._std_weight_pos_3d * measurement[5], # w
            self._std_weight_theta # theta
            ]
        innovation_cov = np.diag(np.square(std))*self._std_weight_measurement_3d**2
        return innovation_cov
    
    def gating_distance(self, mean, covariance, measurements,
                        only_position=False,
                        use_3d=True):
        """Compute gating distance between state distribution and measurements.

        A suitable distance threshold can be obtained from `chi2inv95`. If
        `only_position` is False, the chi-square distribution has 4 degrees of
        freedom, otherwise 2.

        Parameters
        ----------
        mean : ndarray
            Mean vector over the state distribution (8 dimensional).
        covariance : ndarray
            Covariance of the state distribution (8x8 dimensional).
        measurements : ndarray
            An Nx4 dimensional matrix of N measurements, each in
            format (x, y, a, h) where (x, y) is the bounding box center
            position, a the aspect ratio, and h the height.
        only_position : Optional[bool]
            If True, distance computation is done with respect to the bounding
            box center position only.

        Returns
        -------
        ndarray
            Returns an array of length N, where the i-th element contains the
            squared Mahalanobis distance between (mean, covariance) and
            `measurements[i]`.

        """
        if not use_3d:
            corner_points, corner_points_3d = self.calculate_corners(mean)
            H_2d = self.get_2d_measurement_matrix(mean, corner_points, corner_points_3d)
            min_x, min_y = np.amin(corner_points, axis = 0)[:2]
            max_x, max_y = np.amax(corner_points, axis = 0)[:2]
            cov = self.project_cov_2d(mean, covariance, H_2d)
            mean = np.array([min_x, min_y, max_x - min_x, max_y - min_y])
        else:
            mean, cov = mean[:7], covariance[:7, :7]
        if only_position:
            if use_3d:
                mean, cov = mean[:3], cov[:3, :3]
                measurements = measurements[:, :3]
            else:
                mean, cov = mean[:2], cov[:2, :2]
                measurements = measurements[:, :2]
        return EKF.squared_mahalanobis_distance(mean, cov, measurements)

    def project_cov(self, mean, covariance):
        # Returns S the innovation covariance (projected covariance)
                
        measurement_noise = self.get_3d_measurement_noise(mean)
        innovation_cov = (np.linalg.multi_dot((self._observation_mat, covariance,
                                          self._observation_mat.T))
                     + measurement_noise)
        return innovation_cov

    def project_cov_2d(self, mean, covariance, H_2d):
        # Returns S the innovation covariance (projected covariance)
                
        measurement_noise = self.get_2d_measurement_noise(mean)
        innovation_cov = (np.linalg.multi_dot((H_2d, covariance,
                                          H_2d.T))
                     + measurement_noise)
        return innovation_cov
    # @profile
    def update(self, mean, covariance, measurement_2d, measurement_3d = None, marginalization=None, JPDA=False):
        """Run Kalman filter correction step.

        Parameters
        ----------
        mean : ndarray
            The predicted state's mean vector (9 dimensional).
        covariance : ndarray
            The state's covariance matrix (9x9 dimensional).
        measurement_2d : ndarray
            The 4 dimensional measurement vector (x, y, w, h), where (x, y)
            is the center position, a the aspect ratio, and h the height of the
            bounding box.
        measurement_3d : ndarray
            The 7 dimensional measurement vector (x, y, z, l, h, w, theta), where (x, y, z)
            is the center bottom of the box, l, q, h are the dimensions of the bounding box
            theta is the orientation angle w.r.t. the positive x axis.
        Returns
        -------
        (ndarray, ndarray)
            Returns the measurement-corrected state distribution.

        """

        if np.any(np.isnan(mean)):
            return mean, covariance
        out_cov = covariance
        H_3d = self._observation_mat
        do_3d = True
        covariance_3d = None
        for meas in measurement_3d:
            if meas is None:
                do_3d = False
                break
        if do_3d:
            S_matrix = self.project_cov(mean, out_cov)
            try:
                chol_factor, lower = scipy.linalg.cho_factor(
                    S_matrix, lower=True, check_finite=False)
                kalman_gain = scipy.linalg.cho_solve(
                    (chol_factor, lower), np.dot(out_cov, H_3d.T).T,
                    check_finite=False).T
            except:
                # in case cholesky factorization fails, revert to standard solver
                kalman_gain = np.linalg.multi_dot((out_cov, H_3d.T, np.linalg.inv(S_matrix)))
            out_cov -= np.linalg.multi_dot((kalman_gain, S_matrix, kalman_gain.T))
            if JPDA:
                innovation_3d = 0
                cov_uncertainty_3d = 0
                for i, detection_3d in enumerate(measurement_3d):
                    innovation_partial = detection_3d - mean[:7]
                    innovation_3d += innovation_partial * marginalization[i+1]
                    cov_uncertainty_3d += marginalization[i+1] * np.outer(innovation_partial, innovation_partial)
                partial_cov = cov_uncertainty_3d-np.outer(innovation_3d, innovation_3d)
                out_cov *= 1 - marginalization[0]
                out_cov += np.linalg.multi_dot((kalman_gain, partial_cov, kalman_gain.T))
                out_cov += marginalization[0]*covariance
            else:
                out_cov = out_cov - np.linalg.multi_dot((kalman_gain, H_3d, out_cov))
                innovation_3d = measurement_3d - mean[:7]
            mean = mean + np.dot(kalman_gain, innovation_3d)
            post_3d_mean = mean
            covariance_3d = out_cov

        if measurement_2d is not None:
            corner_points, corner_points_3d = self.calculate_corners(mean)
            H_2d = self.get_2d_measurement_matrix(mean, corner_points, corner_points_3d)
            #update based on 2D
            min_x, min_y = np.amin(corner_points, axis = 0)[:2]
            max_x, max_y = np.amax(corner_points, axis = 0)[:2]
            S_matrix = self.project_cov_2d(np.array([min_x, min_y, max_x - min_x, max_y - min_y]), out_cov, H_2d)
            try:
                chol_factor, lower = scipy.linalg.cho_factor(
                    S_matrix, lower=True, check_finite=False)
                kalman_gain = scipy.linalg.cho_solve(
                    (chol_factor, lower), np.dot(out_cov, H_2d.T).T,
                    check_finite=False).T
            except:
                # in case cholesky factorization fails, revert to standard solver
                kalman_gain = np.linalg.multi_dot((out_cov, H_2d.T, np.linalg.inv(S_matrix)))
            out_cov = np.dot(np.eye(*out_cov.shape)-np.dot(kalman_gain, H_2d), out_cov)
            if JPDA:
                innovation_2d = 0
                cov_uncertainty_2d = 0
                for i, detection_2d in enumerate(measurement_2d):
                    innovation_partial = detection_2d[:4] - np.array([min_x, min_y, max_x - min_x, max_y - min_y])
                    innovation_2d += innovation_partial * marginalization[i+1] # +1 to account for dummy node
                    cov_uncertainty_2d += marginalization[i+1] * np.outer(innovation_partial, innovation_partial)
                partial_cov = cov_uncertainty_2d-np.outer(innovation_2d, innovation_2d)
                out_cov *= 1 - marginalization[0]
                out_cov += np.linalg.multi_dot((kalman_gain, partial_cov, kalman_gain.T))
                if covariance_3d is None:
                    out_cov += marginalization[0]*covariance
                else:
                    out_cov += marginalization[0]*covariance_3d                    
            else:
                innovation_2d = measurement_2d[:4] - np.array([min_x, min_y, max_x - min_x, max_y - min_y])
            mean = mean + np.dot(kalman_gain, innovation_2d)
        
        if self.debug:
            return mean, out_cov, post_3d_mean
        return mean, out_cov

    # @profile
    def get_2d_measurement_matrix(self, mean, corner_points, corner_points_3d):

        min_x = np.inf
        min_x_idx = None
        max_x = -np.inf
        max_x_idx = None
        min_y = np.inf
        min_y_idx = None
        max_y = -np.inf
        max_y_idx = None
        for idx, pt in enumerate(corner_points):
            if pt[0] < min_x:
                min_x_idx = idx
                min_x = pt[0]
            if pt[0] > max_x:
                max_x_idx = idx
                max_x = pt[0]
            if pt[1] < min_y:
                min_y_idx = idx
                min_y = pt[1]
            if pt[1] > max_y:
                max_y_idx = idx
                max_y = pt[1]
        if self.omni:
            jac_x = np.dot(self.jacobian_omni(corner_points_3d[min_x_idx])[0], self.corner_jacobian(mean, min_x_idx))
            jac_y = np.dot(self.jacobian_omni(corner_points_3d[min_y_idx])[1], self.corner_jacobian(mean, min_y_idx))
            jac_w = np.dot(self.jacobian_omni(corner_points_3d[max_x_idx])[0], self.corner_jacobian(mean, max_x_idx)) - jac_x
            jac_h = np.dot(self.jacobian_omni(corner_points_3d[max_y_idx])[1], self.corner_jacobian(mean, max_y_idx)) - jac_y
        else:
            jac_x = np.dot(self.jacobian(corner_points_3d[min_x_idx])[0], self.corner_jacobian(mean, min_x_idx))
            jac_y = np.dot(self.jacobian(corner_points_3d[min_y_idx])[1], self.corner_jacobian(mean, min_y_idx))
            jac_w = np.dot(self.jacobian(corner_points_3d[max_x_idx])[0], self.corner_jacobian(mean, max_x_idx)) - jac_x
            jac_h = np.dot(self.jacobian(corner_points_3d[max_y_idx])[1], self.corner_jacobian(mean, max_y_idx)) - jac_y
        jac = np.vstack([jac_x, jac_y, jac_w, jac_h])
        jac = np.hstack([jac, np.zeros((jac.shape[0], 2))])
        return jac 
    # Jacobian for projective transformation
    def jacobian(self, pt_3d):
        den = np.sum(self.projection_matrix[2] * pt_3d)
        dxy = (1 - self.projection_matrix[2] * pt_3d/den) * self.projection_matrix[0:2]/den

        return dxy[:, :3]
    
    def jacobian_omni(self, pt_3d):
        jac = np.zeros((2, 3))
        x, y, z = pt_3d[0], pt_3d[1], pt_3d[2]
        denominator = (x**2 + z**2)
        jac[0, 0] = -self.x_constant*(2*x*(z**2)/denominator)
        jac[0, 0] /= denominator
        jac[0, 2] = self.x_constant*2*z/denominator
        jac[0, 2] *= 1 - (z**2)/denominator

        jac[1, 0] = self.y_constant*x*y/denominator
        jac[1, 1] = -self.y_constant
        jac[1,2] = self.y_constant*z*y/denominator
        jac[1, :] /= np.sqrt(denominator)

        return jac

    def calculate_corners(self, box):
        x,y,z,l,h,w,theta = box[:7]
        pt_3d = []
        x_delta_1 = np.cos(theta)*l/2+np.sin(theta)*w/2
        x_delta_2 = np.cos(theta)*l/2 - np.sin(theta)*w/2
        z_delta_1 = np.sin(theta)*l/2-np.cos(theta)*w/2
        z_delta_2 = np.sin(theta)*l/2+np.cos(theta)*w/2
        pt_3d.append((x+x_delta_1, y + h/2, z+z_delta_1, 1))
        pt_3d.append((x+x_delta_2, y + h/2, z+z_delta_2, 1))
        pt_3d.append((x-x_delta_2, y + h/2, z-z_delta_2, 1))
        pt_3d.append((x-x_delta_1, y + h/2, z-z_delta_1, 1))
        pt_3d.append((x+x_delta_1, y - h/2, z+z_delta_1, 1))
        pt_3d.append((x+x_delta_2, y - h/2, z+z_delta_2, 1))
        pt_3d.append((x-x_delta_2, y - h/2, z-z_delta_2, 1))
        pt_3d.append((x-x_delta_1, y - h/2, z-z_delta_1, 1))
        pts_3d = np.vstack(pt_3d)
        pts_2d = self.project_2d(pts_3d)
        return pts_2d, pts_3d
    
    def corner_jacobian(self, pt_3d, corner_idx):
        _, _, _, l, _, w, theta = pt_3d[:7]
        jac = np.eye(3,7)
        
        jac[1, 4] = 0.5 if corner_idx < 4 else -0.5

        jac[0, 3] = 0.5*np.sin(theta) if corner_idx % 4 < 2 else -0.5*np.sin(theta)
        jac[0, 5] = 0.5*np.cos(theta) if corner_idx % 2 == 0 else -0.5*np.cos(theta)
        
        jac[2, 3] = 0.5*np.cos(theta) if corner_idx%4 < 2 else -0.5*np.cos(theta)
        jac[2, 5] = 0.5*np.sin(theta) if corner_idx%2 == 0 else -0.5*np.sin(theta)

        if corner_idx%4 == 0:
            jac[0, 6] = -np.sin(theta)*l/2 + np.cos(theta)*w/2
            jac[2, 6] = np.cos(theta)*l/2 + np.sin(theta)*w/2
        elif corner_idx%4==1:
            jac[0, 6] = -np.sin(theta)*l/2 - np.cos(theta)*w/2
            jac[2, 6] = np.cos(theta)*l/2 - np.sin(theta)*w/2
        elif corner_idx%4==2:
            jac[0, 6] = +np.sin(theta)*l/2 + np.cos(theta)*w/2
            jac[2, 6] = -np.cos(theta)*l/2 + np.sin(theta)*w/2
        else:
            jac[0, 6] = +np.sin(theta)*l/2 - np.cos(theta)*w/2
            jac[2, 6] = -np.cos(theta)*l/2 - np.sin(theta)*w/2

        return jac

    def project_2d(self, pts_3d):
        if self.omni:
            pts_2d = np.array(self.calib.project_ref_to_image_torch(torch.from_numpy(pts_3d)))
        else:
            pts_2d = np.dot(pts_3d, self.projection_matrix.T)
            pts_2d /= np.expand_dims(pts_2d[:, 2], 1)
        return pts_2d[:, :2]


def swap(detections_3d, iou, idx, swap_prob = 0):
    if random.random() > swap_prob:
        return detections_3d[idx]
    else:
        iou_row = iou[idx]
        iou_row[idx] = -1
        max_idx = np.argmax(iou_row)
        if iou_row[max_idx] > 0.4:
            # print("SWAP")
            return detections_3d[max_idx]
        else:
            return detections_3d[idx]


if __name__ == '__main__':
    seq = '0001'
    gt_path = os.path.join('data','KITTI','sequences', seq, 'gt')
    prob_3d_list = [0.6]
    prob_2d_list = [0.9]
    swap_prob = 0
    std_3d = 0.2
    std_2d = 5
    boxes_3d, ids, frame_3d = read_ground_truth_3d_detections(os.path.join(gt_path, '3d_detections.txt'), None)
    boxes_2d, object_ids, frame_2d = read_ground_truth_2d_detections(os.path.join(gt_path, 'gt.txt'), None, nms_threshold = 1)
    boxes_2d[:,2] -= boxes_2d[:,0]
    boxes_2d[:,3] -= boxes_2d[:,1]
    boxes_3d[:,1] -= boxes_3d[:, 4]/2
    calib = Calibration(os.path.join(os.path.dirname(gt_path), 'calib', seq+'.txt'))
    pos_weight = 0.05
    pos_weight_2d = 0.006
    velocity_weight = 0.0007
    theta_weight = 0.000300
    std_process = 2
    std_measurement_2d = 2.6
    std_measurement_3d = 0.01
    initial_uncertainty = 1
    
    kf = KF_3D(calib, pos_weight, pos_weight_2d, velocity_weight, theta_weight, 
                std_process, std_measurement_2d, std_measurement_3d, 
                initial_uncertainty, omni=False, debug=True)
    final_errors = np.zeros((len(prob_2d_list), len(prob_3d_list)))
    random.seed(14295)
    np.random.seed(14295)
    for idx_3d, prob_3d in enumerate(prob_3d_list):
        for idx_2d, prob_2d in enumerate(prob_2d_list):
            id_means = {idx:[] for idx in np.unique(ids)}
            id_means_2d = {idx:[] for idx in np.unique(ids)}
            id_preds = {idx:[] for idx in np.unique(ids)}
            id_meas = {idx:[] for idx in np.unique(ids)}
            id_errors = {idx:[] for idx in np.unique(ids)}
            for frame in sorted(np.unique(frame_2d)):
                frame_mask = frame_2d==frame
                frame_boxes_2d = boxes_2d[frame_mask]
                frame_boxes_3d = boxes_3d[frame_mask]
                frame_ids = ids[frame_mask]
                iou = 1-iou_matrix(frame_boxes_2d[:,:4], frame_boxes_2d[:,:4], max_iou=10) #output of function is 1 - IoU
                for idx, object_id in enumerate(frame_ids):
                    if frame_boxes_3d[idx][2] > 30:
                        continue
                    noise_2d = np.random.randn(*frame_boxes_2d[idx].shape)*std_2d
                    noise_3d = np.random.randn(*frame_boxes_3d[idx].shape)*std_3d
                    if len(id_means[object_id.item()]) == 0:
                        mean, cov = kf.initiate(frame_boxes_2d[idx]+noise_2d, frame_boxes_3d[idx]+noise_3d)
                        id_means[object_id.item()].append((mean, cov, frame))
                        # id_preds[object_id.item()].append((mean, cov, frame))
                        # id_meas[object_id.item()].append((frame_boxes_3d[idx], frame_boxes_2d[idx], frame))
                        # id_errors[object_id.item()].append((np.sqrt(np.sum((mean[:3] - frame_boxes_3d[idx][:3])**2)), frame))
                        continue
                    mean, cov = kf.predict(id_means[object_id.item()][-1][0], id_means[object_id.item()][-1][1])
                    id_preds[object_id.item()].append((mean, cov, frame))
                    # if object_id.item()==3:
                    #     print("3D box: ", frame_boxes_3d[idx])
                    #     print("Old mean:", id_means[object_id.item()][0])
                    #     print("Predicted mean:", mean)
                        # pdb.set_trace()
                    if random.random() < prob_2d:
                        if random.random() < prob_3d:
                            mean, cov, mean_2d = kf.update(mean, cov, frame_boxes_2d[idx]+noise_2d, swap(frame_boxes_3d, iou, idx, swap_prob)+noise_3d)
                        else:
                            mean, cov, mean_2d = kf.update(mean, cov, frame_boxes_2d[idx]+noise_2d, None)
                    # if object_id.item()==12:
                    #     print("Updated mean after 2D:", mean_2d)
                    #     print("Updated mean after 3D:", mean)
                    #     print("Error:", np.sqrt(np.sum((mean[:3] - frame_boxes_3d[idx][:3])**2)))
                    #     if np.sqrt(np.sum((mean[:3] - frame_boxes_3d[idx][:3])**2)) > 1:
                    #         pdb.set_trace()
                    id_means[object_id.item()].append((mean, cov, frame))
                    id_means_2d[object_id.item()].append((mean_2d, frame))
                    id_meas[object_id.item()].append((frame_boxes_3d[idx], frame_boxes_2d[idx], frame))
                    id_errors[object_id.item()].append((np.sqrt(np.sum((mean[:3] - frame_boxes_3d[idx][:3])**2)), frame))
            errors = [np.mean(error[0]) for idx, error in id_errors.items() if len(error) > 0]
            final_errors[idx_2d, idx_3d] = np.mean(errors)
            print("3D prob: %f %% & 2D prob: %f %% & swap prob: %f %%  RMSE: %f"%(prob_3d*100, prob_2d*100, swap_prob*100, final_errors[idx_2d, idx_3d]))
            # if :
            with open('results/kf_mean_pickle.p', 'wb') as f:
                pickle.dump([id_means, id_means_2d, id_meas, id_preds], f)

    print(final_errors)

================================================
FILE: paper_experiments/utils/evaluate_detections.py
================================================
import numpy as np
import os
import pdb
from tqdm import tqdm
from deep_sort_utils import non_max_suppression as deepsort_nms
from visualise import draw_track
import matplotlib.pyplot as plt
from PIL import Image


def evaluate_detections(detection_path_1, detection_path_2, detection_path_3, detection_path_4, gt_path):
	#expecting detections and gt in file with format as in read_detections.py
	# applies confidence thresholding
	try:
		detections_1 = np.loadtxt(detection_path_1, delimiter=',')
		# detections_2 = np.loadtxt(detection_path_2, delimiter=',')
		# detections_3 = np.loadtxt(detection_path_3, delimiter=',')
		# detections_4 = np.loadtxt(detection_path_4, delimiter=',')
		# detections = np.concatenate([detections_1, detections_2, detections_3, detections_4])
		detections = detections_1
		gt = np.loadtxt(gt_path, delimiter=',')
	except:
		return
	gt_frames = gt[:, 0]
	det_confidence = detections[:, 6]

	###CONFIDENCE THRESHOLD
	detections = detections[det_confidence > 0.9]
	########

	print("Average number of detections per frame = %f"%(detections.shape[0]/len(np.unique(gt_frames))))

	det_frames = detections[:, 0]
	det_confidence = detections[:, 6]
	gt_boxes = np.asarray(list(zip(gt[:, 2], gt[:, 3], gt[:, 4], gt[:, 5])))
	det_boxes = np.asarray(list(zip(detections[:, 2], detections[:, 3], detections[:, 4], detections[:, 5])))
	assignments = []
	missed_detections = 0
	for frame in np.unique(gt_frames):
		frame_mask_det = det_frames == frame
		frame_mask_gt = gt_frames == frame
		frame_gt_boxes = gt_boxes[frame_mask_gt]
		frame_det_boxes = det_boxes[frame_mask_det]
		frame_confidence = det_confidence[frame_mask_det]
		x1 = np.expand_dims(detections[frame_mask_det,2].astype(np.float32), 1)
		y1 = np.expand_dims(detections[frame_mask_det,3].astype(np.float32), 1)
		w = np.expand_dims(detections[frame_mask_det,4].astype(np.float32), 1)
		h = np.expand_dims(detections[frame_mask_det,5].astype(np.float32), 1)
		conf = np.expand_dims(detections[frame_mask_det,6].astype(np.float32), 1)
		boxes = np.hstack([x1, y1, w, h])
		indices = deepsort_nms(boxes, 0.75, np.squeeze(conf))
		frame_det_boxes = frame_det_boxes[indices]

		# print(frame_confidence)
		positive_arr = np.asarray([False]*len(frame_det_boxes))
		for i, gt_box in enumerate(frame_gt_boxes):
			iou_list = np.asarray([iou(gt_box, det_box) for det_box in frame_det_boxes])
			positive_idx = np.where(iou_list >= 0.5)[0]
			if len(positive_idx) == 0:
				missed_detections += 1
				plt.figure(0)

				plt.imshow(Image.open(os.path.join(os.path.split(detection_path_1)[0], '..','imgs','%.6d.png'%frame)))
				draw_track(None, gt_box, det = False)
				for det_box in frame_det_boxes:
					draw_track(None, det_box, det = True)
				# 	print(det_box)
				# print('Boxes:')
				# print(boxes)
				# print('FRAME DONE')
				plt.show()


			positive_arr[positive_idx] = True
		assignments.extend(list(zip(positive_arr, frame_confidence)))
	assignments = sorted(assignments, key = lambda x: x[1], reverse = True)
	predictions = list(zip(*assignments))[0]
	true_positives = np.cumsum(predictions)
	false_negatives = np.cumsum(predictions[::-1])[::-1]+missed_detections
	precision = true_positives/range(1,len(true_positives)+1)
	recall = true_positives/(true_positives + false_negatives)
	print("Total missed detections = %d"%missed_detections)
	base = 0
	idx = []
	for i,recall_val in enumerate(recall):
		if recall_val > base:
			base += 0.1
			idx.append(i)
		if base >1:
			break
	precision_vals = [np.amax(precision[index:]) for index in idx]
	if len(precision_vals) < 11:
		precision_vals.extend([0]*(11-len(precision_vals)))
	print(precision_vals)

	return np.mean(precision_vals)


def iou(bbox_1, bbox_2):

	x1_1, y1_1, w_1, h_1 = bbox_1
	x1_2, y1_2, w_2, h_2 = bbox_2
	x2_1 = x1_1 + w_1
	y2_1 = y1_1 + h_1

	x2_2 = x1_2 + w_2
	y2_2 = y1_2 + h_2
	
	area_1 = abs(x2_1 - x1_1)*abs(y2_1-y1_1)
	area_2 = abs(x2_2 - x1_2)*abs(y2_2-y1_2)


	intersection = max(0, (min(x2_1, x2_2) - max(x1_1, x1_2))) * max(0, (min(y2_1, y2_2) - max(y1_1, y1_2)))
	union = area_1 + area_2 - intersection

	return intersection / union

if __name__=='__main__':
	ap = []
	KITTI_root = 'data/KITTI/sequences'
	for sequence in tqdm(range(21)):
		ap.append(evaluate_detections(os.path.join(KITTI_root, '%.4d'%sequence, 'det','subcnn_car_det.txt'), 
										os.path.join(KITTI_root, '%.4d'%sequence, 'det','rrc_car_det.txt'),
										os.path.join(KITTI_root, '%.4d'%sequence, 'det','lsvm_car_det.txt'),
										os.path.join(KITTI_root, '%.4d'%sequence, 'det','regionlets_car_det.txt'), 
										os.path.join(KITTI_root, '%.4d'%sequence, 'gt', 'gt_car.txt')))
	ap = [ap_val for ap_val in ap if ap_val is not None]
	print("FINAL AVERAGE PRECISION OVER ALL SEQUENCES IS: %f"%np.mean(ap))


================================================
FILE: paper_experiments/utils/featurepointnet_model_util.py
================================================
import open3d as o3d
import numpy as np
import tensorflow as tf
import os
import sys
import torch
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(BASE_DIR)
import featurepointnet_tf_util as tf_util

# -----------------
# Global Constants
# -----------------

NUM_HEADING_BIN = 12
NUM_SIZE_CLUSTER = 8 # one cluster for each type
NUM_OBJECT_POINT = 512
g_type2class={'Car':0, 'Van':1, 'Truck':2, 'Pedestrian':3,
              'Person_sitting':4, 'Cyclist':5, 'Tram':6, 'Misc':7}
g_class2type = {g_type2class[t]:t for t in g_type2class}
g_type2onehotclass = {'Car': 0, 'Pedestrian': 1, 'Cyclist': 2}
#Added 0.5m and 0.2m for car and pedestrian to make boxes slightly bigger
g_type_mean_size = {'Car': np.array([3.88311640418,1.62856739989,1.52563191462]),
                    'Van': np.array([5.06763659,1.9007158,2.20532825]),
                    'Truck': np.array([10.13586957,2.58549199,3.2520595]),
                    'Pedestrian': np.array([0.84422524,0.66068622,1.76255119]),
                    'Person_sitting': np.array([0.80057803,0.5983815,1.27450867]),
                    'Cyclist': np.array([1.76282397,0.59706367,1.73698127]),
                    'Tram': np.array([16.17150617,2.53246914,3.53079012]),
                    'Misc': np.array([3.64300781,1.54298177,1.92320313])}
g_mean_size_arr = np.zeros((NUM_SIZE_CLUSTER, 3)) # size clustrs
for i in range(NUM_SIZE_CLUSTER):
    g_mean_size_arr[i,:] = g_type_mean_size[g_class2type[i]]

# -----------------
# TF Functions Helpers
# -----------------

def tf_gather_object_pc(point_cloud, mask, npoints=512):
    ''' Gather object point clouds according to predicted masks.
    Input:
        point_cloud: TF tensor in shape (B,N,C)
        mask: TF tensor in shape (B,N) of 0 (not pick) or 1 (pick)
        npoints: int scalar, maximum number of points to keep (default: 512)
    Output:
        object_pc: TF tensor in shape (B,npoint,C)
        indices: TF int tensor in shape (B,npoint,2)
    '''
    def mask_to_indices(mask):
        indices = np.zeros((mask.shape[0], npoints, 2), dtype=np.int32)
        for i in range(mask.shape[0]):
            pos_indices = np.where(mask[i,:]>0.5)[0]
            # skip cases when pos_indices is empty
            if len(pos_indices) > 0: 
                if len(pos_indices) > npoints:
                    choice = np.random.choice(len(pos_indices),
                        npoints, replace=False)
                else:
                    choice = np.random.choice(len(pos_indices),
                        npoints-len(pos_indices), replace=True)
                    choice = np.concatenate((np.arange(len(pos_indices)), choice))
                np.random.shuffle(choice)
                indices[i,:,1] = pos_indices[choice]
            indices[i,:,0] = i
        return indices

    indices = tf.py_func(mask_to_indices, [mask], tf.int32)  
    object_pc = tf.gather_nd(point_cloud, indices)
    return object_pc, indices


def get_box3d_corners_helper(centers, headings, sizes):
    """ TF layer. Input: (N,3), (N,), (N,3), Output: (N,8,3) """
    #print '-----', centers
    N = centers.get_shape()[0].value
    l = tf.slice(sizes, [0,0], [-1,1]) # (N,1)
    w = tf.slice(sizes, [0,1], [-1,1]) # (N,1)
    h = tf.slice(sizes, [0,2], [-1,1]) # (N,1)
    #print l,w,h
    x_corners = tf.concat([l/2,l/2,-l/2,-l/2,l/2,l/2,-l/2,-l/2], axis=1) # (N,8)
    y_corners = tf.concat([h/2,h/2,h/2,h/2,-h/2,-h/2,-h/2,-h/2], axis=1) # (N,8)
    z_corners = tf.concat([w/2,-w/2,-w/2,w/2,w/2,-w/2,-w/2,w/2], axis=1) # (N,8)
    corners = tf.concat([tf.expand_dims(x_corners,1), tf.expand_dims(y_corners,1), tf.expand_dims(z_corners,1)], axis=1) # (N,3,8)
    #print x_corners, y_corners, z_corners
    c = tf.cos(headings)
    s = tf.sin(headings)
    ones = tf.ones([N], dtype=tf.float32)
    zeros = tf.zeros([N], dtype=tf.float32)
    row1 = tf.stack([c,zeros,s], axis=1) # (N,3)
    row2 = tf.stack([zeros,ones,zeros], axis=1)
    row3 = tf.stack([-s,zeros,c], axis=1)
    R = tf.concat([tf.expand_dims(row1,1), tf.expand_dims(row2,1), tf.expand_dims(row3,1)], axis=1) # (N,3,3)
    #print row1, row2, row3, R, N
    corners_3d = tf.matmul(R, corners) # (N,3,8)
    corners_3d += tf.tile(tf.expand_dims(centers,2), [1,1,8]) # (N,3,8)
    corners_3d = tf.transpose(corners_3d, perm=[0,2,1]) # (N,8,3)
    return corners_3d

def get_box3d_corners(center, heading_residuals, size_residuals):
    """ TF layer.
    Inputs:
        center: (B,3)
        heading_residuals: (B,NH)
        size_residuals: (B,NS,3)
    Outputs:
        box3d_corners: (B,NH,NS,8,3) tensor
    """
    batch_size = center.get_shape()[0].value
    heading_bin_centers = tf.constant(np.arange(0,2*np.pi,2*np.pi/NUM_HEADING_BIN), dtype=tf.float32) # (NH,)
    headings = heading_residuals + tf.expand_dims(heading_bin_centers, 0) # (B,NH)
    
    mean_sizes = tf.expand_dims(tf.constant(g_mean_size_arr, dtype=tf.float32), 0) + size_residuals # (B,NS,1)
    sizes = mean_sizes + size_residuals # (B,NS,3)
    sizes = tf.tile(tf.expand_dims(sizes,1), [1,NUM_HEADING_BIN,1,1]) # (B,NH,NS,3)
    headings = tf.tile(tf.expand_dims(headings,-1), [1,1,NUM_SIZE_CLUSTER]) # (B,NH,NS)
    centers = tf.tile(tf.expand_dims(tf.expand_dims(center,1),1), [1,NUM_HEADING_BIN, NUM_SIZE_CLUSTER,1]) # (B,NH,NS,3)

    N = batch_size*NUM_HEADING_BIN*NUM_SIZE_CLUSTER
    corners_3d = get_box3d_corners_helper(tf.reshape(centers, [N,3]), tf.reshape(headings, [N]), tf.reshape(sizes, [N,3]))

    return tf.reshape(corners_3d, [batch_size, NUM_HEADING_BIN, NUM_SIZE_CLUSTER, 8, 3])


def huber_loss(error, delta):
    abs_error = tf.abs(error)
    quadratic = tf.minimum(abs_error, delta)
    linear = (abs_error - quadratic)
    losses = 0.5 * quadratic**2 + delta * linear
    return tf.reduce_mean(losses)


def parse_output_to_tensors(output, end_points):
    ''' Parse batch output to separate tensors (added to end_points)
    Input:
        output: TF tensor in shape (B,3+2*NUM_HEADING_BIN+4*NUM_SIZE_CLUSTER)
        end_points: dict
    Output:
        end_points: dict (updated)
    '''
    batch_size = output.get_shape()[0].value
    center = tf.slice(output, [0,0], [-1,3])
    end_points['center_boxnet'] = center

    heading_scores = tf.slice(output, [0,3], [-1,NUM_HEADING_BIN])
    heading_residuals_normalized = tf.slice(output, [0,3+NUM_HEADING_BIN],
        [-1,NUM_HEADING_BIN])
    end_points['heading_scores'] = heading_scores # BxNUM_HEADING_BIN
    end_points['heading_residuals_normalized'] = \
        heading_residuals_normalized # BxNUM_HEADING_BIN (-1 to 1)
    end_points['heading_residuals'] = \
        heading_residuals_normalized * (np.pi/NUM_HEADING_BIN) # BxNUM_HEADING_BIN
    
    size_scores = tf.slice(output, [0,3+NUM_HEADING_BIN*2],
        [-1,NUM_SIZE_CLUSTER]) # BxNUM_SIZE_CLUSTER
    size_residuals_normalized = tf.slice(output,
        [0,3+NUM_HEADING_BIN*2+NUM_SIZE_CLUSTER], [-1,NUM_SIZE_CLUSTER*3])
    size_residuals_normalized = tf.reshape(size_residuals_normalized,
        [batch_size, NUM_SIZE_CLUSTER, 3]) # BxNUM_SIZE_CLUSTERx3
    end_points['size_scores'] = size_scores
    end_points['size_residuals_normalized'] = size_residuals_normalized
    end_points['size_residuals'] = size_residuals_normalized * \
        tf.expand_dims(tf.constant(g_mean_size_arr, dtype=tf.float32), 0)

    return end_points

# -----------------
# Box Parsing Helpers
# -----------------

def from_prediction_to_label_format(center, angle_class, angle_res,\
                                    size_class, size_res, rot_angle):
    ''' Convert predicted box parameters to label format. '''
    l,w,h = class2size(size_class, size_res)
    ry = class2angle(angle_class, angle_res, NUM_HEADING_BIN) + rot_angle
    tx,ty,tz = rotate_pc_along_y(np.expand_dims(center,0),-rot_angle).squeeze()
    ty += h/2.0
    return tx,ty,tz,l,w,h,ry

def size2class(size, type_name):
    ''' Convert 3D bounding box size to template class and residuals.
    todo (rqi): support multiple size clusters per type.
 
    Input:
        size: numpy array of shape (3,) for (l,w,h)
        type_name: string
    Output:
        size_class: int scalar
        size_residual: numpy array of shape (3,)
    '''
    size_class = g_type2class[type_name]
    size_residual = size - g_type_mean_size[type_name]
    return size_class, size_residual

def class2size(pred_cls, residual):
    ''' Inverse function to size2class. '''
    mean_size = g_type_mean_size[g_class2type[pred_cls]]
    return mean_size + residual

def angle2class(angle, num_class):
    ''' Convert continuous angle to discrete class and residual.
    Input:
        angle: rad scalar, from 0-2pi (or -pi~pi), class center at
            0, 1*(2pi/N), 2*(2pi/N) ...  (N-1)*(2pi/N)
        num_class: int scalar, number of classes N
    Output:
        class_id, int, among 0,1,...,N-1
        residual_angle: float, a number such that
            class*(2pi/N) + residual_angle = angle
    '''
    angle = angle%(2*np.pi)
    assert(angle>=0 and angle<=2*np.pi)
    angle_per_class = 2*np.pi/float(num_class)
    shifted_angle = (angle+angle_per_class/2)%(2*np.pi)
    class_id = int(shifted_angle/angle_per_class)
    residual_angle = shifted_angle - \
        (class_id * angle_per_class + angle_per_class/2)
    return class_id, residual_angle

def class2angle(pred_cls, residual, num_class, to_label_format=True):
    ''' Inverse function to angle2class.
    If to_label_format, adjust angle to the range as in labels.
    '''
    angle_per_class = 2*np.pi/float(num_class)
    angle_center = pred_cls * angle_per_class
    angle = angle_center + residual
    if to_label_format and angle>np.pi:
        angle = angle - 2*np.pi
    return angle

def rotate_pc_along_y(pc, rot_angle):
    '''
    Input:
        pc: numpy array (N,C), first 3 channels are XYZ
            z is facing forward, x is left ward, y is downward
        rot_angle: rad scalar
    Output:
        pc: updated pc with XYZ rotated
    '''
    cosval = np.cos(rot_angle)
    sinval = np.sin(rot_angle)
    rotmat = np.array([[cosval, -sinval],[sinval, cosval]])
    pc[:,[0,2]] = np.dot(pc[:,[0,2]], np.transpose(rotmat))
    return pc

# --------------------------------------
# Shared subgraphs for v1 and v2 models
# --------------------------------------

def placeholder_inputs(batch_size, num_point):
    ''' Get useful placeholder tensors.
    Input:
        batch_size: scalar int
        num_point: scalar int
    Output:
        TF placeholders for inputs and ground truths
    '''
    pointclouds_pl = tf.placeholder(tf.float32,
        shape=(batch_size, num_point, 4))
    one_hot_vec_pl = tf.placeholder(tf.float32, shape=(batch_size, 3))

    # labels_pl is for segmentation label
    labels_pl = tf.placeholder(tf.int32, shape=(batch_size, num_point))
    centers_pl = tf.placeholder(tf.float32, shape=(batch_size, 3))
    heading_class_label_pl = tf.placeholder(tf.int32, shape=(batch_size,))
    heading_residual_label_pl = tf.placeholder(tf.float32, shape=(batch_size,))
    size_class_label_pl = tf.placeholder(tf.int32, shape=(batch_size,))
    size_residual_label_pl = tf.placeholder(tf.float32, shape=(batch_size,3))

    return pointclouds_pl, one_hot_vec_pl, labels_pl, centers_pl, \
        heading_class_label_pl, heading_residual_label_pl, \
        size_class_label_pl, size_residual_label_pl


def point_cloud_masking(point_cloud, logits, end_points, xyz_only=True):
    ''' Select point cloud with predicted 3D mask,
    translate coordinates to the masked points centroid.
    
    Input:
        point_cloud: TF tensor in shape (B,N,C)
        logits: TF tensor in shape (B,N,2)
        end_points: dict
        xyz_only: boolean, if True only return XYZ channels
    Output:
        object_point_cloud: TF tensor in shape (B,M,3)
            for simplicity we only keep XYZ here
            M = NUM_OBJECT_POINT as a hyper-parameter
        mask_xyz_mean: TF tensor in shape (B,3)
    '''
    batch_size = point_cloud.get_shape()[0].value
    num_point = point_cloud.get_shape()[1].value
    mask = tf.slice(logits,[0,0,0],[-1,-1,1]) < \
        tf.slice(logits,[0,0,1],[-1,-1,1])
    mask = tf.to_float(mask) # BxNx1
    mask_count = tf.tile(tf.reduce_sum(mask,axis=1,keep_dims=True),
        [1,1,3]) # Bx1x3
    point_cloud_xyz = tf.slice(point_cloud, [0,0,0], [-1,-1,3]) # BxNx3
    mask_xyz_mean = tf.reduce_sum(tf.tile(mask, [1,1,3])*point_cloud_xyz,
        axis=1, keep_dims=True) # Bx1x3
    mask = tf.squeeze(mask, axis=[2]) # BxN
    end_points['mask'] = mask
    mask_xyz_mean = mask_xyz_mean/tf.maximum(mask_count,1) # Bx1x3

    # Translate to masked points' centroid
    point_cloud_xyz_stage1 = point_cloud_xyz - \
        tf.tile(mask_xyz_mean, [1,num_point,1])

    if xyz_only:
        point_cloud_stage1 = point_cloud_xyz_stage1
    else:
        point_cloud_features = tf.slice(point_cloud, [0,0,3], [-1,-1,-1])
        point_cloud_stage1 = tf.concat(\
            [point_cloud_xyz_stage1, point_cloud_features], axis=-1)
    num_channels = point_cloud_stage1.get_shape()[2].value

    object_point_cloud, _ = tf_gather_object_pc(point_cloud_stage1,
        mask, NUM_OBJECT_POINT)
    object_point_cloud.set_shape([batch_size, NUM_OBJECT_POINT, num_channels])

    return object_point_cloud, tf.squeeze(mask_xyz_mean, axis=1), end_points


def get_center_regression_net(object_point_cloud, one_hot_vec,
                              is_training, bn_decay, end_points):
    ''' Regression network for center delta. a.k.a. T-Net.
    Input:
        object_point_cloud: TF tensor in shape (B,M,C)
            point clouds in 3D mask coordinate
        one_hot_vec: TF tensor in shape (B,3)
            length-3 vectors indicating predicted object type
    Output:
        predicted_center: TF tensor in shape (B,3)
    ''' 
    num_point = object_point_cloud.get_shape()[1].value
    net = tf.expand_dims(object_point_cloud, 2)
    net = tf_util.conv2d(net, 128, [1,1],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='conv-reg1-stage1', bn_decay=bn_decay)
    net = tf_util.conv2d(net, 128, [1,1],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='conv-reg2-stage1', bn_decay=bn_decay)
    net = tf_util.conv2d(net, 256, [1,1],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='conv-reg3-stage1', bn_decay=bn_decay)
    net = tf_util.max_pool2d(net, [num_point,1],
        padding='VALID', scope='maxpool-stage1')
    net = tf.squeeze(net, axis=[1,2])
    net = tf.concat([net, one_hot_vec], axis=1)
    net = tf_util.fully_connected(net, 256, scope='fc1-stage1', bn=True,
        is_training=is_training, bn_decay=bn_decay)
    net = tf_util.fully_connected(net, 128, scope='fc2-stage1', bn=True,
        is_training=is_training, bn_decay=bn_decay)
    predicted_center = tf_util.fully_connected(net, 3, activation_fn=None,
        scope='fc3-stage1')
    return predicted_center, end_points


def get_loss(mask_label, center_label, \
             heading_class_label, heading_residual_label, \
             size_class_label, size_residual_label, \
             end_points, \
             corner_loss_weight=10.0, \
             box_loss_weight=1.0):
    ''' Loss functions for 3D object detection.
    Input:
        mask_label: TF int32 tensor in shape (B,N)
        center_label: TF tensor in shape (B,3)
        heading_class_label: TF int32 tensor in shape (B,) 
        heading_residual_label: TF tensor in shape (B,) 
        size_class_label: TF tensor int32 in shape (B,)
        size_residual_label: TF tensor tensor in shape (B,)
        end_points: dict, outputs from our model
        corner_loss_weight: float scalar
        box_loss_weight: float scalar
    Output:
        total_loss: TF scalar tensor
            the total_loss is also added to the losses collection
    '''
    # 3D Segmentation loss
    mask_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(\
        logits=end_points['mask_logits'], labels=mask_label))
    tf.summary.scalar('3d mask loss', mask_loss)

    # Center regression losses
    center_dist = tf.norm(center_label - end_points['center'], axis=-1)
    center_loss = huber_loss(center_dist, delta=2.0)
    tf.summary.scalar('center loss', center_loss)
    stage1_center_dist = tf.norm(center_label - \
        end_points['stage1_center'], axis=-1)
    stage1_center_loss = huber_loss(stage1_center_dist, delta=1.0)
    tf.summary.scalar('stage1 center loss', stage1_center_loss)

    # Heading loss
    heading_class_loss = tf.reduce_mean( \
        tf.nn.sparse_softmax_cross_entropy_with_logits( \
        logits=end_points['heading_scores'], labels=heading_class_label))
    tf.summary.scalar('heading class loss', heading_class_loss)

    hcls_onehot = tf.one_hot(heading_class_label,
        depth=NUM_HEADING_BIN,
        on_value=1, off_value=0, axis=-1) # BxNUM_HEADING_BIN
    heading_residual_normalized_label = \
        heading_residual_label / (np.pi/NUM_HEADING_BIN)
    heading_residual_normalized_loss = huber_loss(tf.reduce_sum( \
        end_points['heading_residuals_normalized']*tf.to_float(hcls_onehot), axis=1) - \
        heading_residual_normalized_label, delta=1.0)
    tf.summary.scalar('heading residual normalized loss',
        heading_residual_normalized_loss)

    # Size loss
    size_class_loss = tf.reduce_mean( \
        tf.nn.sparse_softmax_cross_entropy_with_logits( \
        logits=end_points['size_scores'], labels=size_class_label))
    tf.summary.scalar('size class loss', size_class_loss)

    scls_onehot = tf.one_hot(size_class_label,
        depth=NUM_SIZE_CLUSTER,
        on_value=1, off_value=0, axis=-1) # BxNUM_SIZE_CLUSTER
    scls_onehot_tiled = tf.tile(tf.expand_dims( \
        tf.to_float(scls_onehot), -1), [1,1,3]) # BxNUM_SIZE_CLUSTERx3
    predicted_size_residual_normalized = tf.reduce_sum( \
        end_points['size_residuals_normalized']*scls_onehot_tiled, axis=[1]) # Bx3

    mean_size_arr_expand = tf.expand_dims( \
        tf.constant(g_mean_size_arr, dtype=tf.float32),0) # 1xNUM_SIZE_CLUSTERx3
    mean_size_label = tf.reduce_sum( \
        scls_onehot_tiled * mean_size_arr_expand, axis=[1]) # Bx3
    size_residual_label_normalized = size_residual_label / mean_size_label
    size_normalized_dist = tf.norm( \
        size_residual_label_normalized - predicted_size_residual_normalized,
        axis=-1)
    size_residual_normalized_loss = huber_loss(size_normalized_dist, delta=1.0)
    tf.summary.scalar('size residual normalized loss',
        size_residual_normalized_loss)

    # Corner loss
    # We select the predicted corners corresponding to the 
    # GT heading bin and size cluster.
    corners_3d = get_box3d_corners(end_points['center'],
        end_points['heading_residuals'],
        end_points['size_residuals']) # (B,NH,NS,8,3)
    gt_mask = tf.tile(tf.expand_dims(hcls_onehot, 2), [1,1,NUM_SIZE_CLUSTER]) * \
        tf.tile(tf.expand_dims(scls_onehot,1), [1,NUM_HEADING_BIN,1]) # (B,NH,NS)
    corners_3d_pred = tf.reduce_sum( \
        tf.to_float(tf.expand_dims(tf.expand_dims(gt_mask,-1),-1)) * corners_3d,
        axis=[1,2]) # (B,8,3)

    heading_bin_centers = tf.constant( \
        np.arange(0,2*np.pi,2*np.pi/NUM_HEADING_BIN), dtype=tf.float32) # (NH,)
    heading_label = tf.expand_dims(heading_residual_label,1) + \
        tf.expand_dims(heading_bin_centers, 0) # (B,NH)
    heading_label = tf.reduce_sum(tf.to_float(hcls_onehot)*heading_label, 1)
    mean_sizes = tf.expand_dims( \
        tf.constant(g_mean_size_arr, dtype=tf.float32), 0) # (1,NS,3)
    size_label = mean_sizes + \
        tf.expand_dims(size_residual_label, 1) # (1,NS,3) + (B,1,3) = (B,NS,3)
    size_label = tf.reduce_sum( \
        tf.expand_dims(tf.to_float(scls_onehot),-1)*size_label, axis=[1]) # (B,3)
    corners_3d_gt = get_box3d_corners_helper( \
        center_label, heading_label, size_label) # (B,8,3)
    corners_3d_gt_flip = get_box3d_corners_helper( \
        center_label, heading_label+np.pi, size_label) # (B,8,3)

    corners_dist = tf.minimum(tf.norm(corners_3d_pred - corners_3d_gt, axis=-1),
        tf.norm(corners_3d_pred - corners_3d_gt_flip, axis=-1))
    corners_loss = huber_loss(corners_dist, delta=1.0) 
    tf.summary.scalar('corners loss', corners_loss)

    # Weighted sum of all losses
    total_loss = mask_loss + box_loss_weight * (center_loss + \
        heading_class_loss + size_class_loss + \
        heading_residual_normalized_loss*20 + \
        size_residual_normalized_loss*20 + \
        stage1_center_loss + \
        corner_loss_weight*corners_loss)
    tf.add_to_collection('losses', total_loss)

    return total_loss

def get_lidar_in_image_fov(pc_velo, calib, xmin, ymin, xmax, ymax,
                           clip_distance=40.0):
    ''' Filter lidar points, keep those in image FOV '''
    #pts_2d = calib.project_rect_to_image(calib.project_ref_to_rect(pc_velo))
    #pts_2d = calib.project_rect_to_image_torch(calib.project_ref_to_rect_torch(torch.from_numpy(pc_velo).cuda()))
    pts_2d = calib.project_ref_to_image_torch(torch.from_numpy(pc_velo).cuda())
    pts_2d = pts_2d.cpu().numpy()

    fov_inds = (pts_2d[:,0]<xmax) & (pts_2d[:,0]>=xmin) & \
        (pts_2d[:,1]<ymax) & (pts_2d[:,1]>=ymin)
    # fov_inds = fov_inds & (pc_velo[:,2]<clip_distance) #filter out far z pts
    imgfov_pc_velo = pc_velo[fov_inds,:]
    return imgfov_pc_velo, pts_2d, fov_inds

# @profile
def preprocess_pointcloud(detections, point_cloud, pc_image_coord,
                            calib, num_point = 1024, 
                            lidar_point_threshold=5):
    ''' Extract point clouds in frustums extruded from 2D detection boxes.
        Update: Lidar points and 3d boxes are in *rect camera* coord system
            (as that in 3d box label files)
        
    Input:
        lidar_point_threshold: int, neglect frustum with too few points.
    Output:

    '''
    
    point_clouds = [] # channel number = 4, xyz,intensity in rect camera coord
    rot_angles = []
    ids_3d = np.zeros((len(detections)))
    for i, detection in enumerate(detections):

        xmin,ymin,xmax,ymax,_,_,_ = detection
        box_fov_inds = (pc_image_coord[:,0]<xmax) & \
            (pc_image_coord[:,0]>=xmin) & \
            (pc_image_coord[:,1]<ymax) & \
            (pc_image_coord[:,1]>=ymin)
        pc_in_box_fov = point_cloud[box_fov_inds,:]
        box_center = np.array([xmax+xmin, ymin+ymax])/2
        uvdepth = np.zeros((1,3))
        uvdepth[0,0:2] = box_center
        uvdepth[0,2] = 20 # some random depth
        box2d_center_rect = calib.project_image_to_rect(uvdepth)
        frustum_angle = np.pi/2 - np.arctan2(box2d_center_rect[0,2],
            box2d_center_rect[0,0])
        rot_angles.append(frustum_angle)
        if len(pc_in_box_fov)<lidar_point_threshold:
            ids_3d[i] = -1
            point_clouds.append(np.zeros((1, num_point, 4)))
        else:
            if pc_in_box_fov.shape[0] > num_point:
                pc_in_box_fov = np.expand_dims(pc_in_box_fov[np.random.choice(range(pc_in_box_fov.shape[0]), size = (num_point), replace=False)], 0)
            else:
                pc_in_box_fov = np.expand_dims(np.vstack([pc_in_box_fov, pc_in_box_fov[np.random.choice(range(pc_in_box_fov.shape[0]), size = (num_point-pc_in_box_fov.shape[0]), replace=True)]]), 0)
            pc_in_box_fov[0] = rotate_pc_along_y(pc_in_box_fov[0], frustum_angle)
            # frustum = o3d.geometry.PointCloud()
            # out = pc_in_box_fov[0, :, :3]
            # # out = out[:, [2, 0, 1]]
            # # out[:, 1] *= -1
            # # out[:, 2] *= -1
            # frustum.points = o3d.utility.Vector3dVector(out)
            # o3d.io.write_point_cloud("pc_frustum_sample_rot_1.xyz", frustum)
            # # rot_angles.append(0) #no frsturum rotation
            # import pdb; pdb.set_trace()
            point_clouds.append(pc_in_box_fov)

    return point_clouds, rot_angles, ids_3d
# @profile
def generate_detections_3d(detector, detections_2d, point_cloud, calib, img_shape, peds=False):
    _, img_height, img_width = img_shape
    _, pc_image_coord, img_fov_inds = get_lidar_in_image_fov(np.copy(point_cloud[:,:3]), calib, 0, 0, img_width, img_height)
    pc_image_coord = pc_image_coord[img_fov_inds,:]
    point_cloud = point_cloud[img_fov_inds,:]
    point_cloud_frustrums, rot_angles, ids_3d = preprocess_pointcloud(detections_2d, point_cloud, pc_image_coord, calib, num_point = detector.num_point)
    point_cloud_frustrums = np.vstack(point_cloud_frustrums)
    boxes_3d, scores_3d, depth_features = detector(point_cloud_frustrums, np.asarray(rot_angles), peds)
    for i in range(len(ids_3d)):
        if ids_3d[i] == -1:
            boxes_3d[i] = None
    return boxes_3d, ids_3d, rot_angles, scores_3d, depth_features

def convert_depth_features(depth_features_orig, ids_3d, cuda = True):
    Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
    depth_features = []
    for i, depth_feature_orig in enumerate(depth_features_orig):
        if depth_feature_orig is None or ids_3d[i] == -1:
            depth_features.append(None)
        else:
            depth_features.append(torch.Tensor(depth_feature_orig).type(Tensor))
    return depth_features


================================================
FILE: paper_experiments/utils/featurepointnet_tf_util.py
================================================
""" Wrapper functions for TensorFlow layers.

Author: Charles R. Qi
Date: November 2017
"""

import numpy as np
import tensorflow as tf

def _variable_on_cpu(name, shape, initializer, use_fp16=False):
  """Helper to create a Variable stored on CPU memory.
  Args:
    name: name of the variable
    shape: list of ints
    initializer: initializer for Variable
  Returns:
    Variable Tensor
  """
  with tf.device("/cpu:0"):
    dtype = tf.float16 if use_fp16 else tf.float32
    var = tf.get_variable(name, shape, initializer=initializer, dtype=dtype)
  return var

def _variable_with_weight_decay(name, shape, stddev, wd, use_xavier=True):
  """Helper to create an initialized Variable with weight decay.

  Note that the Variable is initialized with a truncated normal distribution.
  A weight decay is added only if one is specified.

  Args:
    name: name of the variable
    shape: list of ints
    stddev: standard deviation of a truncated Gaussian
    wd: add L2Loss weight decay multiplied by this float. If None, weight
        decay is not added for this Variable.
    use_xavier: bool, whether to use xavier initializer

  Returns:
    Variable Tensor
  """
  if use_xavier:
    initializer = tf.contrib.layers.xavier_initializer()
  else:
    initializer = tf.truncated_normal_initializer(stddev=stddev)
  var = _variable_on_cpu(name, shape, initializer)
  if wd is not None:
    weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss')
    tf.add_to_collection('losses', weight_decay)
  return var


def conv1d(inputs,
           num_output_channels,
           kernel_size,
           scope,
           stride=1,
           padding='SAME',
           data_format='NHWC',
           use_xavier=True,
           stddev=1e-3,
           weight_decay=None,
           activation_fn=tf.nn.relu,
           bn=False,
           bn_decay=None,
           is_training=None):
  """ 1D convolution with non-linear operation.

  Args:
    inputs: 3-D tensor variable BxLxC
    num_output_channels: int
    kernel_size: int
    scope: string
    stride: int
    padding: 'SAME' or 'VALID'
    data_format: 'NHWC' or 'NCHW'
    use_xavier: bool, use xavier_initializer if true
    stddev: float, stddev for truncated_normal init
    weight_decay: float
    activation_fn: function
    bn: bool, whether to use batch norm
    bn_decay: float or float tensor variable in [0,1]
    is_training: bool Tensor variable

  Returns:
    Variable tensor
  """
  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
    assert(data_format=='NHWC' or data_format=='NCHW')
    if data_format == 'NHWC':
      num_in_channels = inputs.get_shape()[-1].value
    elif data_format=='NCHW':
      num_in_channels = inputs.get_shape()[1].value
    kernel_shape = [kernel_size,
                    num_in_channels, num_output_channels]
    kernel = _variable_with_weight_decay('weights',
                                         shape=kernel_shape,
                                         use_xavier=use_xavier,
                                         stddev=stddev,
                                         wd=weight_decay)
    outputs = tf.nn.conv1d(inputs, kernel,
                           stride=stride,
                           padding=padding,
                           data_format=data_format)
    biases = _variable_on_cpu('biases', [num_output_channels],
                              tf.constant_initializer(0.0))
    outputs = tf.nn.bias_add(outputs, biases, data_format=data_format)

    if bn:
      outputs = batch_norm_for_conv1d(outputs, is_training,
                                      bn_decay=bn_decay, scope='bn',
                                      data_format=data_format)

    if activation_fn is not None:
      outputs = activation_fn(outputs)
    return outputs


def conv2d(inputs,
           num_output_channels,
           kernel_size,
           scope,
           stride=[1, 1],
           padding='SAME',
           data_format='NHWC',
           use_xavier=True,
           stddev=1e-3,
           weight_decay=None,
           activation_fn=tf.nn.relu,
           bn=False,
           bn_decay=None,
           is_training=None):
  """ 2D convolution with non-linear operation.

  Args:
    inputs: 4-D tensor variable BxHxWxC
    num_output_channels: int
    kernel_size: a list of 2 ints
    scope: string
    stride: a list of 2 ints
    padding: 'SAME' or 'VALID'
    data_format: 'NHWC' or 'NCHW'
    use_xavier: bool, use xavier_initializer if true
    stddev: float, stddev for truncated_normal init
    weight_decay: float
    activation_fn: function
    bn: bool, whether to use batch norm
    bn_decay: float or float tensor variable in [0,1]
    is_training: bool Tensor variable

  Returns:
    Variable tensor
  """
  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
      kernel_h, kernel_w = kernel_size
      assert(data_format=='NHWC' or data_format=='NCHW')
      if data_format == 'NHWC':
        num_in_channels = inputs.get_shape()[-1].value
      elif data_format=='NCHW':
        num_in_channels = inputs.get_shape()[1].value
      kernel_shape = [kernel_h, kernel_w,
                      num_in_channels, num_output_channels]
      kernel = _variable_with_weight_decay('weights',
                                           shape=kernel_shape,
                                           use_xavier=use_xavier,
                                           stddev=stddev,
                                           wd=weight_decay)
      stride_h, stride_w = stride
      outputs = tf.nn.conv2d(inputs, kernel,
                             [1, stride_h, stride_w, 1],
                             padding=padding,
                             data_format=data_format)
      biases = _variable_on_cpu('biases', [num_output_channels],
                                tf.constant_initializer(0.0))
      outputs = tf.nn.bias_add(outputs, biases, data_format=data_format)

      if bn:
        outputs = batch_norm_for_conv2d(outputs, is_training,
                                        bn_decay=bn_decay, scope='bn',
                                        data_format=data_format)

      if activation_fn is not None:
        outputs = activation_fn(outputs)
      return outputs


def conv2d_transpose(inputs,
                     num_output_channels,
                     kernel_size,
                     scope,
                     stride=[1, 1],
                     padding='SAME',
                     use_xavier=True,
                     stddev=1e-3,
                     weight_decay=None,
                     activation_fn=tf.nn.relu,
                     bn=False,
                     bn_decay=None,
                     is_training=None):
  """ 2D convolution transpose with non-linear operation.

  Args:
    inputs: 4-D tensor variable BxHxWxC
    num_output_channels: int
    kernel_size: a list of 2 ints
    scope: string
    stride: a list of 2 ints
    padding: 'SAME' or 'VALID'
    use_xavier: bool, use xavier_initializer if true
    stddev: float, stddev for truncated_normal init
    weight_decay: float
    activation_fn: function
    bn: bool, whether to use batch norm
    bn_decay: float or float tensor variable in [0,1]
    is_training: bool Tensor variable

  Returns:
    Variable tensor

  Note: conv2d(conv2d_transpose(a, num_out, ksize, stride), a.shape[-1], ksize, stride) == a
  """
  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
      kernel_h, kernel_w = kernel_size
      num_in_channels = inputs.get_shape()[-1].value
      kernel_shape = [kernel_h, kernel_w,
                      num_output_channels, num_in_channels] # reversed to conv2d
      kernel = _variable_with_weight_decay('weights',
                                           shape=kernel_shape,
                                           use_xavier=use_xavier,
                                           stddev=stddev,
                                           wd=weight_decay)
      stride_h, stride_w = stride
      
      # from slim.convolution2d_transpose
      def get_deconv_dim(dim_size, stride_size, kernel_size, padding):
          dim_size *= stride_size

          if padding == 'VALID' and dim_size is not None:
            dim_size += max(kernel_size - stride_size, 0)
          return dim_size

      # caculate output shape
      batch_size = inputs.get_shape()[0].value
      height = inputs.get_shape()[1].value
      width = inputs.get_shape()[2].value
      out_height = get_deconv_dim(height, stride_h, kernel_h, padding)
      out_width = get_deconv_dim(width, stride_w, kernel_w, padding)
      output_shape = [batch_size, out_height, out_width, num_output_channels]

      outputs = tf.nn.conv2d_transpose(inputs, kernel, output_shape,
                             [1, stride_h, stride_w, 1],
                             padding=padding)
      biases = _variable_on_cpu('biases', [num_output_channels],
                                tf.constant_initializer(0.0))
      outputs = tf.nn.bias_add(outputs, biases)

      if bn:
        outputs = batch_norm_for_conv2d(outputs, is_training,
                                        bn_decay=bn_decay, scope='bn')

      if activation_fn is not None:
        outputs = activation_fn(outputs)
      return outputs

   
def conv3d(inputs,
           num_output_channels,
           kernel_size,
           scope,
           stride=[1, 1, 1],
           padding='SAME',
           use_xavier=True,
           stddev=1e-3,
           weight_decay=None,
           activation_fn=tf.nn.relu,
           bn=False,
           bn_decay=None,
           is_training=None):
  """ 3D convolution with non-linear operation.

  Args:
    inputs: 5-D tensor variable BxDxHxWxC
    num_output_channels: int
    kernel_size: a list of 3 ints
    scope: string
    stride: a list of 3 ints
    padding: 'SAME' or 'VALID'
    use_xavier: bool, use xavier_initializer if true
    stddev: float, stddev for truncated_normal init
    weight_decay: float
    activation_fn: function
    bn: bool, whether to use batch norm
    bn_decay: float or float tensor variable in [0,1]
    is_training: bool Tensor variable

  Returns:
    Variable tensor
  """
  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
    kernel_d, kernel_h, kernel_w = kernel_size
    num_in_channels = inputs.get_shape()[-1].value
    kernel_shape = [kernel_d, kernel_h, kernel_w,
                    num_in_channels, num_output_channels]
    kernel = _variable_with_weight_decay('weights',
                                         shape=kernel_shape,
                                         use_xavier=use_xavier,
                                         stddev=stddev,
                                         wd=weight_decay)
    stride_d, stride_h, stride_w = stride
    outputs = tf.nn.conv3d(inputs, kernel,
                           [1, stride_d, stride_h, stride_w, 1],
                           padding=padding)
    biases = _variable_on_cpu('biases', [num_output_channels],
                              tf.constant_initializer(0.0))
    outputs = tf.nn.bias_add(outputs, biases)
    
    if bn:
      outputs = batch_norm_for_conv3d(outputs, is_training,
                                      bn_decay=bn_decay, scope='bn')

    if activation_fn is not None:
      outputs = activation_fn(outputs)
    return outputs

def fully_connected(inputs,
                    num_outputs,
                    scope,
                    use_xavier=True,
                    stddev=1e-3,
                    weight_decay=None,
                    activation_fn=tf.nn.relu,
                    bn=False,
                    bn_decay=None,
                    is_training=None):
  """ Fully connected layer with non-linear operation.
  
  Args:
    inputs: 2-D tensor BxN
    num_outputs: int
  
  Returns:
    Variable tensor of size B x num_outputs.
  """
  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
    num_input_units = inputs.get_shape()[-1].value
    weights = _variable_with_weight_decay('weights',
                                          shape=[num_input_units, num_outputs],
                                          use_xavier=use_xavier,
                                          stddev=stddev,
                                          wd=weight_decay)
    outputs = tf.matmul(inputs, weights)
    biases = _variable_on_cpu('biases', [num_outputs],
                             tf.constant_initializer(0.0))
    outputs = tf.nn.bias_add(outputs, biases)
     
    if bn:
      outputs = batch_norm_for_fc(outputs, is_training, bn_decay, 'bn')

    if activation_fn is not None:
      outputs = activation_fn(outputs)
    return outputs


def max_pool2d(inputs,
               kernel_size,
               scope,
               stride=[2, 2],
               padding='VALID'):
  """ 2D max pooling.

  Args:
    inputs: 4-D tensor BxHxWxC
    kernel_size: a list of 2 ints
    stride: a list of 2 ints
  
  Returns:
    Variable tensor
  """
  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
    kernel_h, kernel_w = kernel_size
    stride_h, stride_w = stride
    outputs = tf.nn.max_pool(inputs,
                             ksize=[1, kernel_h, kernel_w, 1],
                             strides=[1, stride_h, stride_w, 1],
                             padding=padding,
                             name=sc.name)
    return outputs

def avg_pool2d(inputs,
               kernel_size,
               scope,
               stride=[2, 2],
               padding='VALID'):
  """ 2D avg pooling.

  Args:
    inputs: 4-D tensor BxHxWxC
    kernel_size: a list of 2 ints
    stride: a list of 2 ints
  
  Returns:
    Variable tensor
  """
  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
    kernel_h, kernel_w = kernel_size
    stride_h, stride_w = stride
    outputs = tf.nn.avg_pool(inputs,
                             ksize=[1, kernel_h, kernel_w, 1],
                             strides=[1, stride_h, stride_w, 1],
                             padding=padding,
                             name=sc.name)
    return outputs


def max_pool3d(inputs,
               kernel_size,
               scope,
               stride=[2, 2, 2],
               padding='VALID'):
  """ 3D max pooling.

  Args:
    inputs: 5-D tensor BxDxHxWxC
    kernel_size: a list of 3 ints
    stride: a list of 3 ints
  
  Returns:
    Variable tensor
  """
  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
    kernel_d, kernel_h, kernel_w = kernel_size
    stride_d, stride_h, stride_w = stride
    outputs = tf.nn.max_pool3d(inputs,
                               ksize=[1, kernel_d, kernel_h, kernel_w, 1],
                               strides=[1, stride_d, stride_h, stride_w, 1],
                               padding=padding,
                               name=sc.name)
    return outputs

def avg_pool3d(inputs,
               kernel_size,
               scope,
               stride=[2, 2, 2],
               padding='VALID'):
  """ 3D avg pooling.

  Args:
    inputs: 5-D tensor BxDxHxWxC
    kernel_size: a list of 3 ints
    stride: a list of 3 ints
  
  Returns:
    Variable tensor
  """
  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
    kernel_d, kernel_h, kernel_w = kernel_size
    stride_d, stride_h, stride_w = stride
    outputs = tf.nn.avg_pool3d(inputs,
                               ksize=[1, kernel_d, kernel_h, kernel_w, 1],
                               strides=[1, stride_d, stride_h, stride_w, 1],
                               padding=padding,
                               name=sc.name)
    return outputs


def batch_norm_template_unused(inputs, is_training, scope, moments_dims, bn_decay):
  """ NOTE: this is older version of the util func. it is deprecated.
  Batch normalization on convolutional maps and beyond...
  Ref.: http://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow
  
  Args:
      inputs:        Tensor, k-D input ... x C could be BC or BHWC or BDHWC
      is_training:   boolean tf.Varialbe, true indicates training phase
      scope:         string, variable scope
      moments_dims:  a list of ints, indicating dimensions for moments calculation
      bn_decay:      float or float tensor variable, controling moving average weight
  Return:
      normed:        batch-normalized maps
  """
  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
    num_channels = inputs.get_shape()[-1].value
    beta = _variable_on_cpu(name='beta',shape=[num_channels],
                            initializer=tf.constant_initializer(0))
    gamma = _variable_on_cpu(name='gamma',shape=[num_channels],
                            initializer=tf.constant_initializer(1.0))
    batch_mean, batch_var = tf.nn.moments(inputs, moments_dims, name='moments')
    decay = bn_decay if bn_decay is not None else 0.9
    ema = tf.train.ExponentialMovingAverage(decay=decay)
    # Operator that maintains moving averages of variables.
    # Need to set reuse=False, otherwise if reuse, will see moments_1/mean/ExponentialMovingAverage/ does not exist
    # https://github.com/shekkizh/WassersteinGAN.tensorflow/issues/3
    with tf.variable_scope(tf.get_variable_scope(), reuse=False):
        ema_apply_op = tf.cond(is_training,
                               lambda: ema.apply([batch_mean, batch_var]),
                               lambda: tf.no_op())
    
    # Update moving average and return current batch's avg and var.
    def mean_var_with_update():
      with tf.control_dependencies([ema_apply_op]):
        return tf.identity(batch_mean), tf.identity(batch_var)
    
    # ema.average returns the Variable holding the average of var.
    mean, var = tf.cond(is_training,
                        mean_var_with_update,
                        lambda: (ema.average(batch_mean), ema.average(batch_var)))
    normed = tf.nn.batch_normalization(inputs, mean, var, beta, gamma, 1e-3)
  return normed


def batch_norm_template(inputs, is_training, scope, moments_dims_unused, bn_decay, data_format='NHWC'):
  """ Batch normalization on convolutional maps and beyond...
  Ref.: http://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow
  
  Args:
      inputs:        Tensor, k-D input ... x C could be BC or BHWC or BDHWC
      is_training:   boolean tf.Varialbe, true indicates training phase
      scope:         string, variable scope
      moments_dims:  a list of ints, indicating dimensions for moments calculation
      bn_decay:      float or float tensor variable, controling moving average weight
      data_format:   'NHWC' or 'NCHW'
  Return:
      normed:        batch-normalized maps
  """
  bn_decay = bn_decay if bn_decay is not None else 0.9
  return tf.contrib.layers.batch_norm(inputs, 
                                      center=True, scale=True,
                                      is_training=is_training, decay=bn_decay,updates_collections=None,
                                      scope=scope,
                                      data_format=data_format)


def batch_norm_for_fc(inputs, is_training, bn_decay, scope):
  """ Batch normalization on FC data.
  
  Args:
      inputs:      Tensor, 2D BxC input
      is_training: boolean tf.Varialbe, true indicates training phase
      bn_decay:    float or float tensor variable, controling moving average weight
      scope:       string, variable scope
  Return:
      normed:      batch-normalized maps
  """
  return batch_norm_template(inputs, is_training, scope, [0,], bn_decay)


def batch_norm_for_conv1d(inputs, is_training, bn_decay, scope, data_format):
  """ Batch normalization on 1D convolutional maps.
  
  Args:
      inputs:      Tensor, 3D BLC input maps
      is_training: boolean tf.Varialbe, true indicates training phase
      bn_decay:    float or float tensor variable, controling moving average weight
      scope:       string, variable scope
      data_format: 'NHWC' or 'NCHW'
  Return:
      normed:      batch-normalized maps
  """
  return batch_norm_template(inputs, is_training, scope, [0,1], bn_decay, data_format)


def batch_norm_for_conv2d(inputs, is_training, bn_decay, scope, data_format):
  """ Batch normalization on 2D convolutional maps.
  
  Args:
      inputs:      Tensor, 4D BHWC input maps
      is_training: boolean tf.Varialbe, true indicates training phase
      bn_decay:    float or float tensor variable, controling moving average weight
      scope:       string, variable scope
      data_format: 'NHWC' or 'NCHW'
  Return:
      normed:      batch-normalized maps
  """
  return batch_norm_template(inputs, is_training, scope, [0,1,2], bn_decay, data_format)


def batch_norm_for_conv3d(inputs, is_training, bn_decay, scope):
  """ Batch normalization on 3D convolutional maps.
  
  Args:
      inputs:      Tensor, 5D BDHWC input maps
      is_training: boolean tf.Varialbe, true indicates training phase
      bn_decay:    float or float tensor variable, controling moving average weight
      scope:       string, variable scope
  Return:
      normed:      batch-normalized maps
  """
  return batch_norm_template(inputs, is_training, scope, [0,1,2,3], bn_decay)


def dropout(inputs,
            is_training,
            scope,
            keep_prob=0.5,
            noise_shape=None):
  """ Dropout layer.

  Args:
    inputs: tensor
    is_training: boolean tf.Variable
    scope: string
    keep_prob: float in [0,1]
    noise_shape: list of ints

  Returns:
    tensor variable
  """
  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
    outputs = tf.cond(is_training,
                      lambda: tf.nn.dropout(inputs, keep_prob, noise_shape),
                      lambda: inputs)
    return outputs


================================================
FILE: paper_experiments/utils/imm.py
================================================
# vim: expandtab:ts=4:sw=4
import numpy as np
import scipy.linalg
import utils.EKF as EKF
import pdb
import utils.kf_2d as kf_2d
import matplotlib.pyplot as plt

np.set_printoptions(precision=4, suppress=True)

class IMMFilter2D(EKF.EKF):
    """
    An IMM filter for tracking bounding boxes in image space.
    Contains 2 Kalman Filters
    Filter 1: Constant Velocity Model:
        The 8-dimensional state space
            x, y, a, h, vx, vy, va, vh
        contains the bounding box center position (x, y), aspect ratio a, height h,
        and their respective velocities.
        Object motion follows a constant velocity model. The bounding box location
        (x, y, a, h) is taken as direct observation of the state space (linear
        observation model).
    Filter 2: Random Walk Model:
        The 4-dimensional state space
            x, y, a, h
        contains the bounding box center position (x, y), aspect ratio a, height h.
        Object motion follows a random walk model. The bounding box location
        (x, y, a, h) is taken as direct observation of the state space (linear
        observation model).
    """
    def __init__(self, kf_vel_params=(1./20, 1./160, 1, 1, 2), kf_walk_params=(1./20, 1./160, 1, 1, 2), markov=(0.9,0.7)):
        self.kf1 = kf_2d.KalmanFilter2D(*kf_vel_params)
        self.kf2 = kf_2d.RandomWalkKalmanFilter2D(*kf_walk_params)

        self.markov_transition = np.asarray([[markov[0], 1-markov[0]],
                                             [markov[1], 1-markov[1]]])

    def initiate(self, measurement, flow):
        """Create track from unassociated measurement.
        Parameters
        ----------
        measurement : ndarray
            Bounding box coordinates (x, y, a, h) with center position (x, y),
            aspect ratio a, and height h.
        Returns
        -------
        (ndarray, ndarray)
            Returns the mean vector (2,8 dimensional) and covariance matrix (2,8x8
            dimensional) of the new track. Unobserved velocities are initialized
            to 0 mean.
        """
        mean_pos1, cov1 = self.kf1.initiate(measurement, flow)
        #Random walk does not need the flow
        mean_pos2, cov2 = self.kf2.initiate(measurement, None)

        covariance = np.dstack([cov1, cov2])
        covariance = np.transpose(covariance, axes=(2,0,1))
        mean = np.vstack([mean_pos1, mean_pos2])
        model_probs = np.ones((2,1))*0.5

        return mean, covariance, model_probs


    def gating_distance(self, mean, covariance, measurements,
                        only_position=False):
        """Compute gating distance between state distribution and measurements.
        A suitable distance threshold can be obtained from `chi2inv95`. If
        `only_position` is False, the chi-square distribution has 4 degrees of
        freedom, otherwise 2.
        Parameters
        ----------
        mean : ndarray
            Mean vector over the state distribution (8 dimensional).
        covariance : ndarray
            Covariance of the state distribution (8x8 dimensional).
        measurements : ndarray
            An Nx4 dimensional matrix of N measurements, each in
            format (x, y, a, h) where (x, y) is the bounding box center
            position, a the aspect ratio, and h the height.
        only_position : Optional[bool]
            If True, distance computation is done with respect to the bounding
            box center position only.
        Returns
        -------
        ndarray
            Returns an array of length N, where the i-th element contains the
            squared Mahalanobis distance between (mean, covariance) and
            `measurements[i]`.
        """
        dist1 = self.kf1.gating_distance(mean[0, :], covariance[0, :, :], measurements, only_position)
        dist2 = self.kf2.gating_distance(mean[1, :], covariance[1, :, :], measurements, only_position)
        return np.where(dist1 < dist2, dist1, dist2)

    def update(self, mean, covariance, measurement, model_probabilities, marginalization=None, JPDA=False):
        """Run Kalman filter correction step.
        Parameters
        ----------
        mean : ndarray
            The predicted state's mean vector (8 dimensional).
        covariance : ndarray
            The state's covariance matrix (8x8 dimensional).
        measurement : ndarray
            The 4 dimensional measurement vector (x, y, a, h), where (x, y)
            is the center position, a the aspect ratio, and h the height of the
            bounding box.
        Returns
        -------
        (ndarray, ndarray)
            Returns the measurement-corrected state distribution.
        """
        # cholesky factorization used to solve for kalman gain since
        # K = covariance * update_mat * inv(projected_cov)
        # so K is also the solution to 
        # projected_cov * K = covariance * update_mat
        # model_probabilities = np.dot(self.markov_transition.T, model_probabilities)
        # combined_H = np.stack([self.kf1._update_mat, self.kf2._update_mat])
        # S = np.linalg.multi_dot([combined_H, covariance, np.transpose(combined_H, (0,2,1))])

        mean_1, cov_1 = self.kf1.project(mean[0], covariance[0])
        mean_2, cov_2 = self.kf2.project(mean[1], covariance[1])

        distance_1 = EKF.squared_mahalanobis_distance(mean_1, cov_1, measurement)
        distance_2 = EKF.squared_mahalanobis_distance(mean_2, cov_2, measurement)

        distance = np.vstack([distance_1, distance_2])

        distance -= np.amin(distance)
        
        dets = np.vstack([np.sqrt(np.linalg.det(cov_1)), np.sqrt(np.linalg.det(cov_2))])
        if distance.ndim > 1:
            likelihood = np.sum(np.exp(-distance/2)/dets, axis = -1, keepdims = True)
        else:
            likelihood = np.exp(-distance/2)/dets

        model_probs = (likelihood*model_probabilities)/\
                        np.sum(likelihood*model_probabilities)
            

        out_mean_1, out_cov_1 = self.kf1.update(mean[0], covariance[0], measurement, marginalization, JPDA)
        out_mean_2, out_cov_2 = self.kf2.update(mean[1], covariance[1], measurement, marginalization, JPDA)
        out_mean = np.vstack([out_mean_1, out_mean_2])
        out_cov = np.dstack([out_cov_1, out_cov_2])
        out_cov = np.transpose(out_cov, axes=(2,0,1))

        return out_mean, out_cov, model_probs
    
    def predict(self, mean, covariance, model_probabilities):
        """Run Kalman filter prediction step.
        Parameters
        ----------
        mean : ndarray
            The mean vector of the object state at the previous
            time step.
        covariance : ndarray
            The covariance matrix of the object state at the
            previous time step.
        Returns
        -------
        (ndarray, ndarray)
            Returns the mean vector and covariance matrix of the predicted
            state. Unobserved velocities are initialized to 0 mean.
        """
        # Perform prediction

        model_future_probabilities = np.dot(self.markov_transition.T, model_probabilities)
        model_transition_probabilities = self.markov_transition*(model_probabilities/model_future_probabilities.T)
        mixed_mean_1, mixed_cov_1, mixed_mean_2, mixed_cov_2 = self.mix_models(mean[0], covariance[0], mean[1], covariance[1], model_transition_probabilities)
        out_mean_1, out_cov_1 = self.kf1.predict(mixed_mean_1, mixed_cov_1)
        out_mean_2, out_cov_2 = self.kf2.predict(mixed_mean_2, mixed_cov_2)

        out_mean = np.vstack([out_mean_1, out_mean_2])
        out_cov = np.dstack([out_cov_1, out_cov_2])
        out_cov = np.transpose(out_cov, axes=(2,0,1))
        return out_mean, out_cov, model_future_probabilities

    def mix_models(self, mean_1, cov_1, mean_2, cov_2, model_transition_probabilities):
        
        mixed_mean_1 = model_transition_probabilities[0, 0]*mean_1 + model_transition_probabilities[1, 0]*mean_2
        mixed_mean_2 = model_transition_probabilities[0, 1]*mean_1 + model_transition_probabilities[1, 1]*mean_2
        mean_diff_12 = mean_1 - mixed_mean_2
        mean_diff_21 = mean_2 - mixed_mean_1
        mean_diff_11 = mean_1 - mixed_mean_1
        mean_diff_22 = mean_2 - mixed_mean_2

        mixed_cov_1 = model_transition_probabilities[0, 0]*(cov_1+np.outer(mean_diff_11, mean_diff_11)) + \
                        model_transition_probabilities[1, 0]*(cov_2+np.outer(mean_diff_21, mean_diff_21))
        mixed_cov_2 = model_transition_probabilities[0, 1]*(cov_2+np.outer(mean_diff_12, mean_diff_12)) + \
                    model_transition_probabilities[1, 1]*(cov_2+np.outer(mean_diff_22, mean_diff_22))

        return mixed_mean_1, mixed_cov_1, mixed_mean_2, mixed_cov_2
    
    @staticmethod
    def combine_states(mean, cov, model_probabilities):

        mean = np.sum(model_probabilities*mean, axis = 0)
        covariance = np.sum(np.expand_dims(model_probabilities,2)*cov, axis = 0)

        return mean, covariance
        
def generate_particle_motion(motion_matrices, initial_state, process_noise, length = 100):
    state_list = [initial_state]
    seed_mode = 0 if np.random.random() < 0.5 else 1
    markov_transition_matrix = np.asarray([[0.9, 0.1],[.7, 0.3]])
    modes = [seed_mode]
    for i in range(length):
        modes.append(seed_mode)
        motion_matrix = motion_matrices[seed_mode]
        state_list.append(np.dot(motion_matrix, state_list[-1])+np.random.randn(*initial_state.shape)*process_noise[seed_mode])
        if np.random.rand() < markov_transition_matrix[seed_mode][0]:
            seed_mode = 0
        else:
            seed_mode = 1
    return np.array(state_list), modes

def generate_observations(input_state_list, observation_matrix, observation_noise):
    observation_shape = np.dot(observation_matrix, input_state_list[0]).shape
    output = [np.dot(observation_matrix, state)+np.random.randn(*observation_shape)*observation_noise 
                for state in input_state_list]
    return np.array(output)

if __name__=='__main__':
    imm_filter = IMMFilter2D()
    motion_matrix = np.eye(8)
    motion_matrix[0,4] = 1
    motion_matrix[1,5] = 1
    initial_state = np.array([0,0,1,1,1,1,0,0])
    states, modes = generate_particle_motion([motion_matrix, np.eye(8)], initial_state, [0.1, 2], 50)
    plt.subplot(211)
    plt.plot(states[:,0], states[:,1], linestyle = '--', marker='.', label= 'True state')
    observation_matrix = np.eye(4,8)
    obs = generate_observations(states, observation_matrix, 0.5)
    # plt.scatter(obs[:,0], obs[:,1], marker='x', color='green', label = 'observation')
    rnd_filter = kf_2d.KalmanFilter2D()
    mean, covariance, probs = imm_filter.initiate(obs[0])
    mean_rand, cov_rand = rnd_filter.initiate(obs[0])
    mean_list, covariance_list, probs_list = [], [], []
    mean_list_rand, covariance_list_rand = [], []

    combined_mean, combined_cov = imm_filter.combine_states(mean, covariance, probs)
    mean_list.append(combined_mean)
    covariance_list.append(combined_cov)
    mean_list_rand.append(mean_rand)
    covariance_list_rand.append(cov_rand)
    probs_list.append(probs)
    for idx, i in enumerate(obs[1:]):
        mean_rand_new, cov_rand_new = rnd_filter.predict(mean_rand, cov_rand)
        mean_rand, cov_rand = rnd_filter.update(mean_rand_new, cov_rand_new, i)
        mean_list_rand.append(mean_rand)
        covariance_list_rand.append(cov_rand)


        mean_new, covariance_new, probs_new = imm_filter.predict(mean, covariance, probs)
        mean, covariance, probs = imm_filter.update(mean_new, covariance_new, i, probs_new)
        combined_mean, combined_cov = imm_filter.combine_states(mean, covariance, probs)
        pdb.set_trace()
        pdb.set_trace()
        mean_list.append(combined_mean)
        covariance_list.append(combined_cov)
        probs_list.append(probs)

    mean_list = np.array(mean_list)
    mean_list_rand = np.array(mean_list_rand)
    plt.plot(mean_list[:, 0], mean_list[:, 1], marker='+', c='k', label = 'IMMestimate', alpha = 0.6)
    plt.plot(mean_list_rand[:, 0], mean_list_rand[:, 1], marker=',', c='orange', label = 'CV estimate', alpha = 0.6)
    # plt.scatter(mean_list[:, 0], mean_list[:, 1], marker='+', c=np.vstack([probs, np.zeros((1,1))]).T, label = 'IMMestimate')
    # plt.scatter(mean_list_rand[:, 0], mean_list_rand[:, 1], marker='x', c='orange', label = 'random walk estimate')
    MSE_IMM = np.mean((mean_list[:,:2]-states[:,:2])**2)
    MSE = np.mean((mean_list_rand[:,:2]-states[:,:2])**2)
    print("MSE: %f for 2D filter"%MSE)
    print("MSE: %f for IMM filter"%MSE_IMM)
    plt.legend()
    plt.subplot(212)

    plt.plot(modes, label='True modes')
    plt.plot([i[1] for i in probs_list], label='predicted modes')
    plt.legend()
    plt.show()


================================================
FILE: paper_experiments/utils/iou_matching.py
================================================
# vim: expandtab:ts=4:sw=4
from __future__ import absolute_import
import numpy as np
from . import linear_assignment
import pdb


def iou(bbox, candidates):
    """Computer intersection over union.

    Parameters
    ----------
    bbox : ndarray
        A bounding box in format `(top left x, top left y, width, height)`.
    candidates : ndarray
        A matrix of candidate bounding boxes (one per row) in the same format
        as `bbox`.

    Returns
    -------
    ndarray
        The intersection over union in [0, 1] between the `bbox` and each
        candidate. A higher score means a larger fraction of the `bbox` is
        occluded by the candidate.

    """
    bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:]
    candidates_tl = candidates[:, :2]
    candidates_br = candidates[:, :2] + candidates[:, 2:]

    tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis],
               np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]]
    br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis],
               np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]]
    wh = np.maximum(0., br - tl)

    area_intersection = wh.prod(axis=1)
    area_bbox = bbox[2:].prod()
    area_candidates = candidates[:, 2:].prod(axis=1)
    return area_intersection / (area_bbox + area_candidates - area_intersection)


def iou_cost(tracks, detections, track_indices=None,
             detection_indices=None, use3d=False, kf=None):
    """An intersection over union distance metric.

    Parameters
    ----------
    tracks : List[deep_sort.track.Track]
        A list of tracks.
    detections : List[deep_sort.detection.Detection]
        A list of detections.
    track_indices : Optional[List[int]]
        A list of indices to tracks that should be matched. Defaults to
        all `tracks`.
    detection_indices : Optional[List[int]]
        A list of indices to detections that should be matched. Defaults
        to all `detections`.
    box_expansion_factor:
        Multiplier for box size to bias towards higher recall

    Returns
    -------
    ndarray
        Returns a cost matrix of shape
        len(track_indices), len(detection_indices) where entry (i, j) is
        `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`.

    """
    if track_indices is None:
        track_indices = np.arange(len(tracks))
    if detection_indices is None:
        detection_indices = np.arange(len(detections))

    cost_matrix = np.zeros((len(track_indices), len(detection_indices)))

    if cost_matrix.shape[0] == 0 or cost_matrix.shape[1] == 0:
        return cost_matrix
    if use3d:
        # Convert 3d detctions to tlwh format
        # @TODO: Should use a Detection3D class to do this
        candidates = np.array([detections[i].box_3d for i in detection_indices])
        candidates[:,:2] -= candidates[:,3:5] / 2
        candidates = candidates[:, [0,2,3,5]]
    else:
        candidates = np.asarray([detections[i].tlwh for i in detection_indices])

    for row, track_idx in enumerate(track_indices):
        if use3d:
            bbox = tracks[track_idx].to_tlwh3d()
            bbox[:2] -= bbox[3:5] / 2
            bbox = bbox[[0,2,3,5]]
        else:
            bbox = tracks[track_idx].to_tlwh(kf)
        cost_matrix[row, :] = 1. - iou(bbox, candidates)
    return cost_matrix


================================================
FILE: paper_experiments/utils/kf_2d.py
================================================
# vim: expandtab:ts=4:sw=4
import numpy as np
import scipy.linalg
import utils.EKF as EKF
import pdb
np.set_printoptions(precision=4, suppress=True)

class KalmanFilter2D(EKF.EKF):
    """
    A simple Kalman filter for tracking bounding boxes in image space.

    The 8-dimensional state space

        x, y, w, h, vx, vy, vw, vh

    contains the bounding box center position (x, y), width w, height h,
    and their respective velocities.

    Object motion follows a constant velocity model. The bounding box location
    (x, y, w, h) is taken as direct observation of the state space (linear
    observation model).

    """

    def __init__(self, pos_weight, velocity_weight, std_process, std_measurement, initial_uncertainty, gate_limit):
        ndim, dt = 4, 1.
        self.ndim = ndim
        self.img_center = 1242
        # Create Kalman filter model matrices.
        # Motion model is constant velocity, i.e. x = x + Vx*dt
        self._motion_mat = np.eye(2 * ndim, 2 * ndim)
        for i in range(ndim):
            self._motion_mat[i, ndim + i] = dt
        # Sensor model is direct observation, i.e. x = x
        self._observation_mat = np.eye(ndim, 2 * ndim)

        # Motion and observation uncertainty are chosen relative to the current
        # state estimate. These weights control the amount of uncertainty in
        # the model. This is a bit hacky.
        self._std_weight_process = std_process
        self._std_weight_measurement = std_measurement
        self._std_weight_pos = pos_weight
        self._std_weight_vel = velocity_weight
        self._initial_uncertainty = initial_uncertainty
        self.LIMIT = gate_limit

    def initiate(self, measurement, flow):
        """Create track from unassociated measurement.

        Parameters
        ----------
        measurement : ndarray
            Bounding box coordinates (x, y, a, h) with center position (x, y),
            aspect ratio a, and height h.

        Returns
        -------
        (ndarray, ndarray)
            Returns the mean vector (8 dimensional) and covariance matrix (8x8
            dimensional) of the new track. Unobserved velocities are initialized
            to 0 mean.

        """
        mean_pos = measurement
        mean_vel = np.zeros_like(mean_pos)
        if flow is not None:
            vel = np.mean(np.reshape(flow[int(mean_pos[1]):int(mean_pos[1]+mean_pos[3]), 
                    int(mean_pos[0]):int(mean_pos[0]+mean_pos[2]), :], (-1, 2)), axis=0)
            mean_vel[:2] = vel
        mean = np.r_[mean_pos, mean_vel]

        # Initialize covariance based on w, h and configured std
        std = [
            (1 + abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_pos * measurement[2],
            (1 + abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_pos * measurement[3],
            (1 + abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_pos * measurement[2],
            (1 + abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_pos * measurement[3],

            (1 + 1.5*abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_vel * measurement[2],
            (1 + 1.5*abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_vel * measurement[3],
            (1 + 1.5*abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_vel * measurement[2],
            (1 + 1.5*abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_vel * measurement[3]]

        covariance = np.diag(np.square(std))*(self._initial_uncertainty*self._std_weight_process)**2
        return mean, covariance

    def predict_mean(self, mean):
        # Updates predicted state from previous state (function g)
        # Calculates motion update Jacobian (Gt)
        # Returns (g(mean), Gt)
        return np.dot(self._motion_mat, mean)
    
    def predict_covariance(self, mean, covariance, last_detection, next_to_last_detection):
        # Updates predicted state from previous state (function g)
        # Calculates motion update Jacobian (Gt)
        # Returns (g(mean), Gt)
        process_noise = self.get_process_noise(mean, last_detection, next_to_last_detection)
        return (np.linalg.multi_dot((self._motion_mat, covariance, self._motion_mat.T)) 
                     + process_noise)

    def get_process_noise(self, mean, last_detection, next_to_last_detection):
        # Returns Rt the motion noise covariance

        depth_scale = 1
        if last_detection.box_3d is not None:
            dist = last_detection.get_3d_distance()
            depth_scale = max(1,1+(16-dist)/10)
            if next_to_last_detection is not None and next_to_last_detection.box_3d is not None:
                b1 = last_detection.box_3d
                b2 = next_to_last_detection.box_3d
                vel = ((b1[0]-b2[0])**2 + (b1[2]-b2[2])**2)**(1/2)
                if vel > 2: # Fast moving (car) nearby, increase uncertainty
                    depth_scale *= 2
                    pass
                # print(vel)
            # print(dist, depth_scale)

        depth_scale = 1
        # depth_scale *= max(1, 1+(40-mean[2])/50, 1+(40-mean[3])/50) # Note: Scales up small boxes bc higher uncertainty

        # Motion uncertainty scaled by estimated height
        std_pos = [
            depth_scale * self._std_weight_pos * mean[2],
            depth_scale * self._std_weight_pos * mean[3],
            depth_scale * self._std_weight_pos * mean[2],
            depth_scale * self._std_weight_pos * mean[3]]
        std_vel = [
            depth_scale * self._std_weight_vel * mean[2],
            depth_scale * self._std_weight_vel * mean[3],
            depth_scale * self._std_weight_vel * mean[2],
            depth_scale * self._std_weight_vel * mean[3]]
        motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))*self._std_weight_process**2

        return motion_cov

    def project_mean(self, mean):
        # Measurement prediction from state (function h)
        # Calculations sensor update Jacobian (Ht)
        # Returns (h(mean), Ht)
        return np.dot(self._observation_mat, mean)


    def get_measurement_noise(self, measurement):
        # Returns Qt the sensor noise covariance
                
        # Measurement uncertainty scaled by estimated height
        std = [
                self._std_weight_pos*measurement[2],
                self._std_weight_pos*measurement[3],
                self._std_weight_pos*measurement[2],
                self._std_weight_pos*measurement[3]]
        innovation_cov = np.diag(np.square(std))*self._std_weight_measurement**2
        return innovation_cov
    
    def project_cov(self, mean, covariance):
        # Returns S the innovation covariance (projected covariance)
                
        measurement_noise = self.get_measurement_noise(mean)
        innovation_cov = (np.linalg.multi_dot((self._observation_mat, covariance,
                                          self._observation_mat.T))
                     + measurement_noise)
        return innovation_cov

    def gating_distance(self, mean, covariance, measurements,
                        only_position=False, use_3d=False):
        """Compute gating distance between state distribution and measurements.

        A suitable distance threshold can be obtained from `chi2inv95`. If
        `only_position` is False, the chi-square distribution has 4 degrees of
        freedom, otherwise 2.

        Parameters
        ----------
        mean : ndarray
            Mean vector over the state distribution (8 dimensional).
        covariance : ndarray
            Covariance of the state distribution (8x8 dimensional).
        measurements : ndarray
            An Nx4 dimensional matrix of N measurements, each in
            format (x, y, a, h) where (x, y) is the bounding box center
            position, a the aspect ratio, and h the height.
        only_position : Optional[bool]
            If True, distance computation is done with respect to the bounding
            box center position only.

        Returns
        -------
        ndarray
            Returns an array of length N, where the i-th element contains the
            squared Mahalanobis distance between (mean, covariance) and
            `measurements[i]`.

        """
        projected_mean, projected_covariance = self.project(mean, covariance)
        if only_position:
            projected_mean, projected_covariance = projected_mean[:2], projected_covariance[:2, :2]
            measurements = measurements[:, :2]
        max_val = np.amax(projected_covariance)
        # LIMIT = max(mean[2], mean[3]) #*(1 + abs(3*mean[0]/self.img_center - 1))
        # print(projected_covariance)
        if max_val > self.LIMIT:
            projected_covariance *= self.LIMIT / max_val
        return EKF.squared_mahalanobis_distance(projected_mean, projected_covariance, measurements)

class RandomWalkKalmanFilter2D(KalmanFilter2D): #TODO UPDATE THIS DOCUMENTATION
    """
    A simple Kalman filter for tracking bounding boxes in image space.

    The 8-dimensional state space

        x, y, w, h

    contains the bounding box center position (x, y), aspect ratio a, height h,
    and their respective velocities.

    Object motion follows a constant velocity model. The bounding box location
    (x, y, a, h) is taken as direct observation of the state space (linear
    observation model).

    """
    def __init__(self, pos_weight, velocity_weight, std_process, std_measurement, initial_uncertainty, img_center=1242):
        ndim, dt = 4, 1.
        self.ndim = ndim
        self.img_center = img_center
        # Create Kalman filter model matrices.
        # Motion model is constant velocity, i.e. x = x + Vx*dt
        self._motion_mat = np.eye(2*ndim, 2*ndim)
        self._motion_mat[ndim:, ndim:] = 0
        # Sensor model is direct observation, i.e. x = x
        self._observation_mat = np.eye(ndim, 2*ndim)

        # Motion and observation uncertainty are chosen relative to the current
        # state estimate. These weights control the amount of uncertainty in
        # the model. This is a bit hacky.
        self._std_weight_process = std_process
        self._std_weight_measurement = std_measurement
        self._std_weight_pos = pos_weight
        self._std_weight_vel = velocity_weight
        self._initial_uncertainty = initial_uncertainty


================================================
FILE: paper_experiments/utils/kf_3d.py
================================================
# vim: expandtab:ts=4:sw=4
import numpy as np
import scipy.linalg
import EKF
import pdb


class KalmanFilter3D(EKF.EKF):
    """
    A simple 3D Kalman filter for tracking bounding cuboids in 3d.

    The 12-dimensional state space

        x, y, l, h, w, theta, Vx, Vy, Vl, Vh, Vw, Vtheta

    contains the bounding box center position (x, y), width w, height h,
    length l, heading theta, and their respective velocities.

    Object motion follows a constant velocity model. The bounding box location
    is taken as direct observation of the state space (linear observation model).

    """

    def __init__(self):
        ndim, dt = 6, 1.
        self.ndim = ndim

        # Create Kalman filter model matrices.
        # Motion model is constant velocity, i.e. x = x + Vx*dt
        self._motion_mat = np.eye(2 * ndim, 2 * ndim)
        for i in range(ndim):
            self._motion_mat[i, ndim + i] = dt
        # Sensor model is direct observation, i.e. x = x
        self._update_mat = np.eye(ndim, 2 * ndim)

        # Motion and observation uncertainty are chosen relative to the current
        # state estimate. These weights control the amount of uncertainty in
        # the model. This is a bit hacky.
        self._std_motion_pos = 0.8
        self._std_motion_vel = 0.1
        self._std_motion_theta= 0.017*1 # ~1 degrees
        self._std_motion_omega = 0.017*0.1 # ~0.1 degrees

        self._std_sensor_pos = 0.8
        self._std_sensor_vel = 0.1
        self._std_sensor_theta= 0.017*5 # ~5 degrees

        std_pos = [
            self._std_motion_pos, # x
            self._std_motion_pos, # y
            self._std_motion_pos, # l
            self._std_motion_pos, # h
            self._std_motion_pos, # w            
            self._std_motion_theta # theta
            ]
        std_vel = [
            self._std_motion_vel, # x
            self._std_motion_vel, # y
            self._std_motion_vel, # l
            self._std_motion_vel, # h
            self._std_motion_vel, # w            
            self._std_motion_omega # omega
            ]
        self._motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))

        std = [
            self._std_sensor_pos, # x
            self._std_sensor_pos, # y
            self._std_sensor_pos, # l
            self._std_sensor_pos, # h
            self._std_sensor_pos, # w
            self._std_sensor_theta # theta
            ]
        self._innovation_cov = np.diag(np.square(std))

    def initiate(self, measurement):
        """Create track from unassociated measurement.

        Parameters
        ----------
        measurement : ndarray
            Bounding box coordinates (x, y, l, h, w, theta) 

        Returns
        -------
        (ndarray, ndarray)
            Returns the mean vector (12 dimensional) and covariance matrix (12x12
            dimensional) of the new track. Unobserved velocities are initialized
            to 0 mean.

        """
        mean_pos = measurement
        mean_vel = np.zeros_like(mean_pos)
        mean = np.r_[mean_pos, mean_vel]

        # Initialize covariance 
        std = [ 2,  2,  2,  2,  2,  2,
               10, 10, 10, 10, 10, 10
              ]
        covariance = self._motion_cov * np.diag(np.square(std))
        return mean, covariance

    def motion_update(self, mean, covariance):
        # Updates predicted state from previous state (function g)
        # Calculates motion update Jacobian (Gt)
        # Returns (g(mean), Gt)
        mean = np.dot(self._motion_mat, mean)
        return mean, self._motion_mat

    def get_motion_cov(self, mean, covariance):
        # Returns Rt the motion noise covariance

        return self._motion_cov

    def sensor_update(self, mean, covariance):
        # Measurement prediction from state (function h)
        # Calculations sensor update Jacobian (Ht)
        # Returns (h(mean), Ht)
        mean = np.dot(self._update_mat, mean)
        return mean, self._update_mat

    def get_innovation_cov(self, mean, covariance):
        # Returns Qt the sensor noise covariance
        return self._innovation_cov

    def adjust_angle(self, measured, target):
        step = 2*np.pi
        measured += step*np.round((target - measured)/step)
        return measured

    def update(self, mean, covariance, meas_in, marginalization=None, JPDA=False):
        measurement = np.copy(meas_in)
        if measurement.ndim == 1:
            measurement[5] = self.adjust_angle(measurement[5], mean[5])
        else:
            measurement[:,5] = self.adjust_angle(measurement[:,5], mean[5])
        return EKF.EKF.update(self, mean, covariance, measurement, marginalization, JPDA)

    def gating_distance(self, mean, covariance, measurements,
                        only_position=False):
        """Compute gating distance between state distribution and measurements.

        A suitable distance threshold can be obtained from `chi2inv95`. If
        `only_position` is False, the chi-square distribution has 6 degrees of
        freedom, otherwise 2.

        Parameters
        ----------
        mean : ndarray
            Mean vector over the state distribution (8 dimensional).
        covariance : ndarray
            Covariance of the state distribution (8x8 dimensional).
        measurements : ndarray
            An Nx4 dimensional matrix of N measurements, each in
            format (x, y, a, h) where (x, y) is the bounding box center
            position, a the aspect ratio, and h the height.
        only_position : Optional[bool]
            If True, distance computation is done with respect to the bounding
            box center position only.

        Returns
        -------
        ndarray
            Returns an array of length N, where the i-th element contains the
            squared Mahalanobis distance between (mean, covariance) and
            `measurements[i]`.

        """
        mean, covariance = self.project(mean, covariance)
        if only_position:
            mean, covariance = mean[:2], covariance[:2, :2]
            measurements = measurements[:, :2]

        return EKF.squared_mahalanobis_distance(mean, covariance, measurements)


================================================
FILE: paper_experiments/utils/linear_assignment.py
================================================
# vim: expandtab:ts=4:sw=4
from __future__ import absolute_import
import numpy as np
from sklearn.utils.linear_assignment_ import linear_assignment
import EKF
import pdb
from mbest_ilp import new_m_best_sol
from multiprocessing import Pool
from functools import partial
#from mbest_ilp import m_best_sol as new_m_best_sol

INFTY_COST = 1e+5

def min_marg_matching(marginalizations, track_indices=None, max_distance=1):
    cost_matrix = 1 - marginalizations
    num_tracks, num_detections = cost_matrix.shape
    if track_indices is None:
        track_indices = np.arange(num_tracks)

    detection_indices = np.arange(num_detections-1)

    if num_tracks == 0 or num_detections == 0:
        return [], track_indices, detection_indices  # Nothing to match.

    extra_dummy_cols = np.tile(cost_matrix[:,0,np.newaxis], (1, num_tracks-1))
    expanded_cost_matrix = np.hstack((extra_dummy_cols, cost_matrix))
    indices = linear_assignment(expanded_cost_matrix)

    matches, unmatched_tracks, unmatched_detections = [], [], []

    # gather unmatched detections (new track)
    for col, detection_idx in enumerate(detection_indices):
        if col+num_tracks not in indices[:, 1]:
            unmatched_detections.append(detection_idx)

    # gather unmatched tracks (no detection)
    for row, track_idx in enumerate(track_indices):
        if row not in indices[:, 0]:
            unmatched_tracks.append(track_idx)

    # thresholding and matches
    for row, col in indices:

        track_idx = track_indices[row]
        detection_idx = col - num_tracks
        if detection_idx < 0:
            unmatched_tracks.append(track_idx)
            continue

        if expanded_cost_matrix[row, col] > max_distance:
            # apply thresholding
            unmatched_tracks.append(track_idx)
            unmatched_detections.append(detection_idx)
        else:
            # associate matches
            matches.append((track_idx, detection_idx))

    return matches, unmatched_tracks, unmatched_detections

def min_cost_matching(
        distance_metric, max_distance, tracks, detections, track_indices=None,
        detection_indices=None, compare_2d = False, detections_3d=None):
    """Solve linear assignment problem.

    Parameters
    ----------
    distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
        The distance metric is given a list of tracks and detections as well as
        a list of N track indices and M detection indices. The metric should
        return the NxM dimensional cost matrix, where element (i, j) is the
        association cost between the i-th track in the given track indices and
        the j-th detection in the given detection_indices.
    max_distance : float
        Gating threshold. Associations with cost larger than this value are
        disregarded.
    tracks : List[track.Track]
        A list of predicted tracks at the current time step.
    detections : List[detection.Detection]
        A list of detections at the current time step.
    track_indices : List[int]
        List of track indices that maps rows in `cost_matrix` to tracks in
        `tracks` (see description above).
    detection_indices : List[int]
        List of detection indices that maps columns in `cost_matrix` to
        detections in `detections` (see description above).

    Returns
    -------
    (List[(int, int)], List[int], List[int])
        Returns a tuple with the following three entries:
        * A list of matched track and detection indices.
        * A list of unmatched track indices.
        * A list of unmatched detection indices.

    """
    if track_indices is None:
        track_indices = np.arange(len(tracks))
    if detection_indices is None:
        detection_indices = np.arange(len(detections))

    if len(detection_indices) == 0 or len(track_indices) == 0:
        return [], track_indices, detection_indices  # Nothing to match.

    cost_matrix = distance_metric(
        tracks, detections, track_indices, detection_indices, compare_2d, detections_3d)
    cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5

    #print("\n\nCascade Cost Matrix: ", cost_matrix)

    indices = linear_assignment(cost_matrix)

    matches, unmatched_tracks, unmatched_detections = [], [], []

    # gather unmatched detections (new track)
    for col, detection_idx in enumerate(detection_indices):
        if col not in indices[:, 1]:
            unmatched_detections.append(detection_idx)

    # gather unmatched trackes (no detection)
    for row, track_idx in enumerate(track_indices):
        if row not in indices[:, 0]:
            unmatched_tracks.append(track_idx)

    # thresholding and matches
    for row, col in indices:

        track_idx = track_indices[row]
        detection_idx = detection_indices[col]

        if cost_matrix[row, col] > max_distance:
            # apply thresholding
            unmatched_tracks.append(track_idx)
            unmatched_detections.append(detection_idx)
        else:
            # associate matches
            matches.append((track_idx, detection_idx))

    return matches, unmatched_tracks, unmatched_detections

# @profile
def JPDA(
        distance_metric, dummy_node_cost_app, dummy_node_cost_iou, tracks, detections, track_indices=None,
        detection_indices=None, m=1, compare_2d = False, windowing = False):
    """Solve linear assignment problem.

    Parameters
    ----------
    distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
        The distance metric is given a list of tracks and detections as well as
        a list of N track indices and M detection indices. The metric should
        return the NxM dimensional cost matrix, where element (i, j) is the
        association cost between the i-th track in the given track indices and
        the j-th detection in the given detection_indices.
    max_distance : float
        Gating threshold. Associations with cost larger than this value are
        disregarded.
    tracks : List[track.Track]
        A list of predicted tracks at the current time step.
    detections : List[detection.Detection]
        A list of detections at the current time step.
    track_indices : List[int]
        List of track indices that maps rows in `cost_matrix` to tracks in
        `tracks` (see description above).
    detection_indices : List[int]
        List of detection indices that maps columns in `cost_matrix` to
        detections in `detections` (see description above).

    Returns
    -------
    (List[(int, int)], List[int], List[int])
        Returns a tuple with the following three entries:
        * A list of matched track and detection indices.
        * A list of unmatched track indices.
        * A list of unmatched detection indices.

    """
    if track_indices is None:
        track_indices = np.arange(len(tracks))
    if detection_indices is None:
        detection_indices = np.arange(len(detections))

    if len(detection_indices) == 0 or len(track_indices) == 0:
        return np.zeros((0, len(detections) + 1))  # Nothing to match.
    cost_matrix, gate_mask = distance_metric(
        tracks, detections, track_indices, detection_indices, compare_2d)
    
    num_tracks, num_detections = cost_matrix.shape[0], cost_matrix.shape[1]
    cost_matrix[gate_mask] = INFTY_COST
    
    # print("\nIOU Cost Matrix:", cost_matrix[:,:,0])
    # print("App:", cost_matrix[:,:,1])

    clusters = find_clusters(cost_matrix[:,:,0], INFTY_COST - 0.0001)
    # print('\n', clusters)

    jpda_output = []
    for cluster in clusters:
        jpda_output.append(get_JPDA_output(cluster, cost_matrix, dummy_node_cost_app, dummy_node_cost_iou, INFTY_COST - 0.0001, m))
    if not jpda_output:
        mc = np.zeros((num_tracks, num_detections + 1))
        mc[:, 0] = 1
        return mc
    assignments, assignment_cost = zip(*jpda_output)
    assignments = np.vstack([item for sublist in assignments for item in sublist])
    assignment_cost = np.array([item for sublist in assignment_cost for item in sublist])

    marginalised_cost = np.sum(assignments*np.exp(-np.expand_dims(assignment_cost, 1)), axis = 0)
    marginalised_cost = np.reshape(marginalised_cost, (num_tracks, num_detections+1))
    # print('\n', marginalised_cost)
    return marginalised_cost

def calculate_entropy(matrix, idx, idy):
    mask = np.ones(matrix.shape)
    mask[idx, idy] = 0
    entropy = matrix/np.sum(mask*matrix, axis=1, keepdims=True)
    entropy = (-entropy*np.log(entropy)) * mask
    entropy = np.mean(np.sum(entropy, axis=1))
    return entropy

def get_JPDA_output(cluster, cost_matrix, dummy_node_cost_app, dummy_node_cost_iou, cutoff, m):
    if len(cluster[1]) == 0:
        assignment = np.zeros((cost_matrix.shape[0], cost_matrix.shape[1]+1))
        assignment[cluster[0], 0] = 1
        assignment = assignment.reshape(1,-1)
        return [assignment], np.array([0])
    
    new_cost_matrix_appearance = np.reshape(cost_matrix[np.repeat(cluster[0], len(cluster[1])), 
                                        np.tile(cluster[1] - 1, len(cluster[0])), 
                                        [0]*(len(cluster[1])*len(cluster[0]))], 
                                        (len(cluster[0]), len(cluster[1])))
    new_cost_matrix_iou = np.reshape(cost_matrix[np.repeat(cluster[0], len(cluster[1])), np.tile(cluster[1] - 1, len(cluster[0])), 1], 
                (len(cluster[0]), len(cluster[1])))
    idx_x, idx_y = np.where(new_cost_matrix_appearance > cutoff)
    appearance_entropy = calculate_entropy(new_cost_matrix_appearance, idx_x, idx_y)
    iou_entropy = calculate_entropy(new_cost_matrix_iou, idx_x, idx_y)
    if appearance_entropy < iou_entropy:
        new_cost_matrix = new_cost_matrix_appearance
        new_cost_matrix = 2*np.ones(new_cost_matrix.shape)/(new_cost_matrix+1) - 1
        dummy_node_cost = -np.log(2/(dummy_node_cost_app+1) - 1)
    else:
        new_cost_matrix = new_cost_matrix_iou
        new_cost_matrix[new_cost_matrix==1] -= 1e-3
        new_cost_matrix = 1 - new_cost_matrix
        dummy_node_cost = -np.log(1-dummy_node_cost_iou)
    new_cost_matrix = -np.log(new_cost_matrix)
    new_cost_matrix[idx_x, idx_y] = cutoff
    if len(cluster[0]) == 1:
        new_cost_matrix = np.concatenate([np.ones((new_cost_matrix.shape[0], 1))*dummy_node_cost, new_cost_matrix], axis = 1)
        total_cost = np.sum(np.exp(-new_cost_matrix))
        new_assignment = np.zeros((cost_matrix.shape[0], cost_matrix.shape[1]+1))
        new_assignment[np.repeat(cluster[0], len(cluster[1])+1), np.tile(
                        np.concatenate([np.zeros(1, dtype = np.int32), cluster[1]]), len(cluster[0]))] = np.exp(-new_cost_matrix)/total_cost
        new_assignment = new_assignment.reshape(1, -1)
        return  [new_assignment], np.array([0])
    if new_cost_matrix.ndim <= 1:
        new_cost_matrix = np.expand_dims(new_cost_matrix, 1)

    # print(new_cost_matrix)
    assignments, assignment_cost = new_m_best_sol(new_cost_matrix, m, dummy_node_cost)
    offset = np.amin(assignment_cost)
    assignment_cost -= offset
    new_assignments = []
    total_cost = np.sum(np.exp(-assignment_cost))
    for assignment in assignments:
        new_assignment = np.zeros((cost_matrix.shape[0], cost_matrix.shape[1]+1))
        new_assignment[np.repeat(cluster[0], len(cluster[1])+1), np.tile(
                    np.concatenate([np.zeros(1, dtype = np.int32), cluster[1]]), len(cluster[0]))] = \
                                                assignment/total_cost
        new_assignments.append(new_assignment.reshape(1, -1))
    return new_assignments, assignment_cost


def matching_cascade(
        distance_metric, max_distance, cascade_depth, tracks, detections,
        track_indices=None, detection_indices=None, compare_2d = False, detections_3d=None):
    """Run matching cascade.

    Parameters
    ----------
    distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
        The distance metric is given a list of tracks and detections as well as
        a list of N track indices and M detection indices. The metric should
        return the NxM dimensional cost matrix, where element (i, j) is the
        association cost between the i-th track in the given track indices and
        the j-th detection in the given detection indices.
    max_distance : float
        Gating threshold. Associations with cost larger than this value are
        disregarded.
    cascade_depth: int
        The cascade depth, should be se to the maximum track age.
    tracks : List[track.Track]
        A list of predicted tracks at the current time step.
    detections : List[detection.Detection]
        A list of detections at the current time step.
    track_indices : Optional[List[int]]
        List of track indices that maps rows in `cost_matrix` to tracks in
        `tracks` (see description above). Defaults to all tracks.
    detection_indices : Optional[List[int]]
        List of detection indices that maps columns in `cost_matrix` to
        detections in `detections` (see description above). Defaults to all
        detections.

    Returns
    -------
    (List[(int, int)], List[int], List[int])
        Returns a tuple with the following three entries:
        * A list of matched track and detection indices.
        * A list of unmatched track indices.
        * A list of unmatched detection indices.

    """
    if track_indices is None:
        track_indices = list(range(len(tracks)))
    if detection_indices is None:
        detection_indices = list(range(len(detections)))

    unmatched_detections = detection_indices
    matches = []
    for level in range(cascade_depth):
        if len(unmatched_detections) == 0:  # No detections left
            break

        track_indices_l = [
            k for k in track_indices
            if tracks[k].time_since_update == 1 + level
        ]
        if len(track_indices_l) == 0:  # Nothing to match at this level
            continue

        matches_l, _, unmatched_detections = \
            min_cost_matching(
                distance_metric, max_distance, tracks, detections,
                track_indices_l, unmatched_detections, compare_2d, detections_3d=detections_3d)
        matches += matches_l
    unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))
    return matches, unmatched_tracks, unmatched_detections

# @profile
def gate_cost_matrix(
        kf, tracks, detections, track_indices, detection_indices,
        gated_cost=INFTY_COST, only_position=False, use3d=False, windowing = False):
    """Invalidate infeasible entries in cost matrix based on the state
    distributions obtained by Kalman filtering.

    Parameters
    ----------
    kf : The Kalman filter.
    cost_matrix : ndarray
        The NxM dimensional cost matrix, where N is the number of track indices
        and M is the number of detection indices, such that entry (i, j) is the
        association cost between `tracks[track_indices[i]]` and
        `detections[detection_indices[j]]`.
    tracks : List[track.Track]
        A list of predicted tracks at the current time step.
    detections : List[detection.Detection]
        A list of detections at the current time step.
    track_indices : List[int]
        List of track indices that maps rows in `cost_matrix` to tracks in
        `tracks` (see description above).
    detection_indices : List[int]
        List of detection indices that maps columns in `cost_matrix` to
        detections in `detections` (see description above).
    gated_cost : Optional[float]
        Entries in the cost matrix corresponding to infeasible associations are
        set this value. Defaults to a very large value.
    only_position : Optional[bool]
        If True, only the x, y position of the state distribution is considered
        during gating. Defaults to False.

    Returns
    -------
    ndarray
        Returns the modified cost matrix.

    """
   
    # assert (len(track_indices) == cost_matrix.shape[0]), "Cost matrix shape does not match track indices"
    # assert (len(detection_indices) == cost_matrix.shape[1]), "Cost matrix shape does match detection indices"

    if len(track_indices) == 0 or len(detection_indices) == 0:
        return None

    if use3d:
        measurements = np.array([det.box_3d for i, det in enumerate(detections) if i in detection_indices])
    else:
        measurements = np.asarray(
            [detections[i].to_xywh() for i in detection_indices])
    if use3d and only_position:
        gating_dim = 3
    elif use3d:
        gating_dim =  measurements.shape[1]
    elif only_position:
        gating_dim = 2
    else:
        gating_dim =  measurements.shape[1]
    gating_threshold = EKF.chi2inv95[gating_dim]
    gate_mask = []
    for track_idx in track_indices:
        track = tracks[track_idx]
        gating_distance = kf.gating_distance(
            track.mean, track.covariance, measurements, only_position, use3d)
        gated_set = gating_distance > gating_threshold
        if np.all(gated_set):
            gated_set = gating_distance > gating_threshold * 3
        # print(track.track_id, gating_threshold, gating_distance)
        gate_mask.append(gated_set)
        # print(gated_set)
    return np.vstack(gate_mask)

def find_clusters(cost_matrix, cutoff):
    num_tracks, _ = cost_matrix.shape
    clusters = []
    total_tracks = 0
    total_detections = 0
    all_tracks = set(range(num_tracks))
    all_visited_tracks = set()
    while total_tracks < num_tracks:
        visited_detections = set()
        visited_tracks = set()
        potential_track = next(iter(all_tracks - all_visited_tracks))
        potential_tracks = set()
        potential_tracks.add(potential_track)
        while potential_tracks:
            current_track = potential_tracks.pop()
            visited_detections.update((np.where(cost_matrix[current_track] < cutoff)[0])+1)
            visited_tracks.add(current_track)
            for detection in visited_detections:
                connected_tracks = np.where(cost_matrix[:, detection - 1] < cutoff)[0]
                for track in connected_tracks:
                    if track in visited_tracks or track in potential_tracks:
                        continue
                    potential_tracks.add(track)
        total_tracks += len(visited_tracks)
        total_detections += len(visited_detections)
        all_visited_tracks.update(visited_tracks)
        clusters.append((np.array(list(visited_tracks), dtype = np.int32), np.array(list(visited_detections), dtype = np.int32)))
    return clusters


================================================
FILE: paper_experiments/utils/logger.py
================================================
import io, os
import numpy as np
from PIL import Image
import tensorflow as tf
#courtesy https://becominghuman.ai/logging-in-tensorboard-with-pytorch-or-any-other-library-c549163dee9e
class Logger:
    def __init__(self, logdir):
        if not os.path.exists(logdir):
            os.makedirs(logdir)
        self.writer = tf.summary.FileWriter(logdir)

    def close(self):
        self.writer.close()

    def log_scalar(self, tag, value, global_step):
        summary = tf.Summary()
        summary.value.add(tag=tag, simple_value=value)
        self.writer.add_summary(summary, global_step=global_step)
        self.writer.flush()
        
    def log_image(self, tag, img, global_step):
        s = io.BytesIO()
        Image.fromarray(img).save(s, format='png')

        img_summary = tf.Summary.Image(encoded_image_string=s.getvalue(),
                                   height=img.shape[0],
                                   width=img.shape[1])

        summary = tf.Summary()
        summary.value.add(tag=tag, image=img_summary)
        self.writer.add_summary(summary, global_step=global_step)
        self.writer.flush()


================================================
FILE: paper_experiments/utils/mbest_ilp.py
================================================
from gurobipy import Model, quicksum, LinExpr, GRB
import numpy as np
import copy
import time
from sklearn.utils.linear_assignment_ import linear_assignment
import pickle
import itertools
import pdb
from copy import deepcopy
import math
"""
Fn: ilp_assignment
------------------
Solves ILP problem using gurobi
"""
def ilp_assignment(model):
    
    model.optimize()
    if(model.status == 3):
        return -1
    return

"""
Fn: initialize_model
--------------------
Initializes gurobi ILP model by setting the base objective
"""
# @profile
def initialize_model(cost_matrix, cutoff, model = None):
    #Add dummy detection
    cost_matrix = np.insert(cost_matrix,0, np.ones(cost_matrix.shape[0])*cutoff, axis=1)
    M,N = cost_matrix.shape
    if model is None:
        model = Model()
    else:
        model.remove(model.getVars())
        model.remove(model.getConstrs())
    model.setParam('OutputFlag', False)
    # y = []
    # for i in range(M):
    #     y.append([])
    #     for j in range(N):
    #         y[i].append(m.addVar(vtype=GRB.BINARY, name = 'y_%d%d'%(i,j)))
    y = model.addVars(M,N, vtype=GRB.BINARY, name = 'y')
    model.setObjective(quicksum(quicksum([y[i,j]*cost_matrix[i][j] for j in range(N)]) for i in range(M)), GRB.MINIMIZE)
    # for i in range(M):
    model.addConstrs((quicksum(y[i,j] for j in range(N))==1 for i in range(M)), name='constraint for track')
    # for j in range(1,N):
    model.addConstrs((quicksum(y[i,j] for i in range(M))<=1 for j in range(1, N)), name='constraint for detection')
    y = list(y.values())
    return model, M, N, y

"""
Fn: m_best_sol
--------------
Finds m_best solutions for object/track association givent the
input cost matrix. Solves constrained ILP problems using gurobi solver.
"""
def cache(func):
    cache = {}
    def cached_function(*args):
        cost_matrix = args[0]
        cost_matrix = np.hstack((np.ones((cost_matrix.shape[0], 1))*args[1], cost_matrix))
        if (cost_matrix.shape[0], cost_matrix.shape[1]) in cache:
            solution_list = cache[(cost_matrix.shape[0], cost_matrix.shape[1])]
            solution_vals = np.sum(solution_list*cost_matrix.reshape(1, -1), axis = 1)
            return solution_list, solution_vals
        else: 
            solution_list, solution_vals = func(*args)
            cache[(cost_matrix.shape[0], cost_matrix.shape[1])] = solution_list
            return solution_list, solution_vals
    return cached_function
# @profile
def num_solutions(cost_matrix):
    M,N = cost_matrix.shape
    N += 1
    count = 0
    for i in range(min(M+1, N)):
        count += np.prod(range(M-i+1, M+1))*np.prod(range(N-i, N))//math.factorial(i)
        if count > 2000:
            break
    return int(count)

@cache
def enumerate_solutions(cost_matrix, cutoff, num_solutions):
    # num_solutions = [[2, 3, 4, 5, 6, 7],[3, 7, 13, 21, 31],[4, 13, 34, 73, 136],[5, 21, 73, 209, 501],[6, 31, 136, 501, 1546], [7]]
    cost_matrix = np.hstack((np.ones((cost_matrix.shape[0], 1))*cutoff, cost_matrix))
    M,N = cost_matrix.shape
    solution_list = np.zeros((num_solutions, M, N), dtype = np.int32)
    solution_list[:, :, 0] = 1
    count = 0
    for i in range(min(M+1, N)):
        for chosen in itertools.combinations(range(M), i):
            for perm in itertools.permutations(range(1,N), i):
                if chosen:
                    solution_list[[count]*len(chosen), chosen, perm] = 1
                    solution_list[[count]*len(chosen), chosen, [0]*len(chosen)] = 0
                count += 1
    solution_vals = np.sum(np.sum(solution_list*np.expand_dims(cost_matrix, 0), axis = 1), axis = 1)
    solution_list = np.reshape(solution_list, (num_solutions, -1))
    return solution_list, solution_vals


def new_m_best_sol(cost_matrix, m_sol, cutoff, model = None):
    sols = num_solutions(cost_matrix)
    if sols <= 2000:
        return enumerate_solutions(cost_matrix, cutoff, sols)
    model, M, N, y = initialize_model(cost_matrix, cutoff, model)
    X = np.zeros((m_sol, M*N))
    xv = []
    if (ilp_assignment(model) == -1):
        xv.append(0)
    else:
        x = model.getAttr("X", y)
        X[0] = x
        xv.append(model.objVal)
    if m_sol > 1:
        model.addConstr(LinExpr(x,y) <= M-1, name = 'constraint_0')
        if (ilp_assignment(model) == -1):
            xv.append(0)
        else:
            x = model.getAttr("X", y)
            X[1] = x
            xv.append(model.objVal)
    if m_sol > 2:
        model.remove(model.getConstrByName('constraint_0'))
        second_best_solutions = []
        second_best_solution_vals = []
        partitions = []
        j = np.argmax(np.logical_xor(X[0], X[1]))
        partitions.append([j])
        partitions.append([j])
        model.addConstr(y[j]==X[0][j], name = 'partition_constraint')
        model.addConstr(LinExpr(X[0], y) <= M-1, name = 'non_equality_constraint')
        ilp_assignment(model)
        second_best_solutions.append(model.getAttr("X", y))
        second_best_solution_vals.append(model.objVal)
        model.remove(model.getConstrByName('non_equality_constraint'))
        model.remove(model.getConstrByName('partition_constraint'))
        model.addConstr(y[j]==X[1][j], name = 'partition_constraint')
        model.addConstr(LinExpr(X[1], y) <= M-1, name = 'non_equality_constraint')
        ilp_assignment(model)
        second_best_solution_vals.append(model.objVal)
        second_best_solutions.append(model.getAttr("X", y))
        model.remove(model.getConstrByName('non_equality_constraint'))
        model.remove(model.getConstrByName('partition_constraint'))
        
        for m in range(2, m_sol):
            l_k = np.argmin(second_best_solution_vals)
            X[m] = second_best_solutions[l_k]
            xv.append(second_best_solution_vals[l_k])
            if m==m_sol-1:
                break
            j = np.argmax(np.logical_xor(X[m], X[l_k]))
            parent_partition = partitions[l_k]
            constrs = []
            for idx in parent_partition:
                constrs.append(model.addConstr(y[idx]==X[l_k, idx]))
            model.addConstr(y[j]==X[m][j], name = 'partition_constraint_new')
            model.addConstr(LinExpr(X[m], y) <= M-1, name = 'non_equality_constraint')
            if(ilp_assignment(model) == -1):
                second_best_solutions.append(np.ones((M,N)))
                second_best_solution_vals.append(np.inf)
            else:
                second_best_solutions.append(model.getAttr("X", y))
                second_best_solution_vals.append(model.objVal)
            model.remove(model.getConstrByName('partition_constraint_new'))
            model.remove(model.getConstrByName('non_equality_constraint'))
            model.addConstr(LinExpr(X[l_k], y) <= M-1, name = 'non_equality_constraint')
            model.addConstr(y[j]==X[l_k][j], name = 'partition_constraint_new')
            if(ilp_assignment(model) == -1):
                second_best_solution_vals[l_k] = np.inf
                second_best_solutions[l_k] = np.ones((M,N))
            else:
                second_best_solution_vals[l_k] = model.objVal
                second_best_solutions[l_k] = model.getAttr("X", y)
            model.remove(model.getConstrByName('partition_constraint_new'))
            model.remove(model.getConstrByName('non_equality_constraint'))
            partitions[l_k].append(j)
            partitions.append(copy.deepcopy(partitions[l_k]))
            for constr in constrs:
                model.remove(constr)


    # X = np.asarray(X)
    xv = np.asarray(xv)
    return X, xv
def linear_assignment_wrapper(a):
    return linear_assignment(a)

if __name__=='__main__':
    # a = np.random.randn(100,100)
    # # cProfile.run('m_best_sol(a,1,10)', 'mbest.profile')
    # # cProfile.run('linear_assignment(a)', 'hungarian.profile')
    # total = 0
    # for i in range(10):
    #     start = time.time()
    #     _, sol_cost = m_best_sol(a, 1, 10)
    #     end = time.time()
    #     total+= end-start
    # print("Time for JPDA m=1, is %f"%(total/10))
    # total = 0
    # for i in range(10):
    #     start = time.time()
    #     ass = linear_assignment(a)
    #     end = time.time()
    #     total+= end-start
    # print("Time for Hungarian, is %f"%(total/10))
    
    np.random.seed(14295)
    # Check JPDA matches Hungarian
    # while True:
    #     print('*******')
    #     a = np.random.randn(100,100)
    #     X, _ = new_m_best_sol(a, 1, 10)
    #     X = np.reshape(X[0], (100,101))[:,1:]
    #     ass = linear_assignment(a)
    #     output_hungarian = np.zeros(a.shape)
    #     output_hungarian[ass[:,0], ass[:, 1]] = 1
    #     assert(np.all(output_hungarian==X))
    #
    # Output to file to check

    #  np.random.seed(14295)
    # vals = []
    # a = np.random.randn(5,5)
    a = np.array([[0.1,0.6,0.2,0.3],[0.4,0.1,0.9,0.4],[0.3,0.5,0.1,0.7],[0.8,0.2,0.2,0.1]])
    num_solutions(a)
    # enumerate_solutions(a.shape[0], a.shape[1]+1)
    # ass = linear_assignment_wrapper(a)
    # m = Model()
    sols, vals = new_m_best_sol(a, 100, 10)
    for i, val in enumerate(vals):
        print(np.reshape(sols[i], (4,5)), val)
    # print(np.reshape(sols[1], (4,5)), vals[1])
    # print(np.reshape(sols[2], (4,5)), vals[2])
    # print(np.reshape(sols[3], (4,5)), vals[3])

    # with open('test.pkl', 'wb') as f:
    #     pickle.dump(vals, f)


================================================
FILE: paper_experiments/utils/nn_matching.py
================================================
# vim: expandtab:ts=4:sw=4
import numpy as np
import pdb
import torch

def _pdist(a, b):
    """Compute pair-wise squared distance between points in `a` and `b`.

    Parameters
    ----------
    a : array_like
        An NxM matrix of N samples of dimensionality M.
    b : array_like
        An LxM matrix of L samples of dimensionality M.

    Returns
    -------
    ndarray
        Returns a matrix of size len(a), len(b) such that eleement (i, j)
        contains the squared distance between `a[i]` and `b[j]`.

    """
    a, b = np.asarray(a), np.asarray(b)
    if len(a) == 0 or len(b) == 0:
        return np.zeros((len(a), len(b)))
    a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1)
    r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :]
    r2 = np.clip(r2, 0., float(np.inf))
    return r2


def _cosine_distance(a, b, data_is_normalized=False):
    """Compute pair-wise cosine distance between points in `a` and `b`.

    Parameters
    ----------
    a : array_like
        An NxM matrix of N samples of dimensionality M.
    b : array_like
        An LxM matrix of L samples of dimensionality M.
    data_is_normalized : Optional[bool]
        If True, assumes rows in a and b are unit length vectors.
        Otherwise, a and b are explicitly normalized to lenght 1.

    Returns
    -------
    ndarray
        Returns a matrix of size len(a), len(b) such that eleement (i, j)
        contains the squared distance between `a[i]` and `b[j]`.

    """
    if not data_is_normalized:
        a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True)
        b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True)
    return 1. - np.dot(a, b.T)

def _cosine_distance_torch(a, b, data_is_normalized=False):
    '''
    _cosine_distance but torched
    '''
    if not data_is_normalized:
        a = a / torch.norm(a, dim=1, keepdim=True)
        b = b / torch.norm(b, dim=1, keepdim=True)
    return 1. - torch.matmul(a, torch.transpose(b,0,1))

def _nn_euclidean_distance(x, y):
    """ Helper function for nearest neighbor distance metric (Euclidean).

    Parameters
    ----------
    x : ndarray
        A matrix of N row-vectors (sample points).
    y : ndarray
        A matrix of M row-vectors (query points).

    Returns
    -------
    ndarray
        A vector of length M that contains for each entry in `y` the
        smallest Euclidean distance to a sample in `x`.

    """
    distances = _pdist(x, y)
    return np.maximum(0.0, distances.min(axis=0))

def _nn_euclidean_distance_torch(x, y):
    """ Helper function for nearest neighbor distance metric (Euclidean).

    Parameters
    ----------
    x : ndarray
        A matrix of N row-vectors (sample points).
    y : ndarray
        A matrix of M row-vectors (query points).

    Returns
    -------
    ndarray
        A vector of length M that contains for each entry in `y` the
        smallest Euclidean distance to a sample in `x`.

    """
    x = x/((x*x).sum(1, keepdim = True)).sqrt()
    y = y/((y*y).sum(1, keepdim = True)).sqrt()
    sim = (x.unsqueeze(1) - y.unsqueeze(0)).pow(2).sum(2)
    sim = sim.exp()
    sim = (sim - 1)/(sim + 1)
    sim = torch.min(sim, 0)[0]
    return sim
    
def _nn_cosine_distance(x, y):
    """ Helper function for nearest neighbor distance metric (cosine).

    Parameters
    ----------
    x : ndarray
        A matrix of N row-vectors (sample points).
    y : ndarray
        A matrix of M row-vectors (query points).

    Returns
    -------
    ndarray
        A vector of length M that contains for each entry in `y` the
        smallest cosine distance to a sample in `x`.

    """
    distances = _cosine_distance(x, y)
    return distances.min(axis=0)

def _nn_cosine_distance_torch(x,y):
    '''
    Same as _nn_cosine_distance except torched
    '''
    distances = _cosine_distance_torch(x,y)
    return torch.min(distances, 0)[0]

class NearestNeighborDistanceMetric(object):
    """
    A nearest neighbor distance metric that, for each target, returns
    the closest distance to any sample that has been observed so far.

    Parameters
    ----------
    metric : str
        Either "euclidean" or "cosine".
    matching_threshold: float
        The matching threshold. Samples with larger distance are considered an
        invalid match.
    budget : Optional[int]
        If not None, fix samples per class to at most this number. Removes
        the oldest samples when the budget is reached.

    Attributes
    ----------
    samples : Dict[int -> List[ndarray]]
        A dictionary that maps from target identities to the list of samples
        that have been observed so far.

    """

    def __init__(self, metric, budget=None):


        if metric == "euclidean":
            self._metric = _nn_euclidean_distance
            self._metric_torch = _nn_euclidean_distance_torch
        elif metric == "cosine":
            self._metric = _nn_cosine_distance
            self._metric_torch = _nn_cosine_distance_torch
        else:
            raise ValueError(
                "Invalid metric; must be either 'euclidean' or 'cosine'")
        self.budget = budget
        self.samples = {}
        self.samples_2d = {}

    def partial_fit(self, features, features_2d, targets, targets_2d, active_targets):
        """Update the distance metric with new data.

        Parameters
        ----------
        features : ndarray
            An NxM matrix of N features of dimensionality M.
        targets : ndarray
            An integer array of associated target identities.
        active_targets : List[int]
            A list of targets that are currently present in the scene.

        """
        for feature, target in zip(features, targets):
            if feature is not None:
                self.samples.setdefault(target, []).append(feature)
            else:
                self.samples.setdefault(target, [])
            if self.budget is not None:
                self.samples[target] = self.samples[target][-self.budget:]
        self.samples = {k: self.samples[k] for k in active_targets if k in targets}
        for target in active_targets:
            self.samples.setdefault(target, [])
        
        for feature_2d, target in zip(features_2d, targets_2d):
            self.samples_2d.setdefault(target, []).append(feature_2d)
            if self.budget is not None:
                self.samples_2d[target] = self.samples_2d[target][-self.budget:]

        self.samples_2d = {k: self.samples_2d[k] for k in active_targets}

    def distance(self, features, targets, compare_2d=False):
        """Compute distance between features and targets.

        Parameters
        ----------
        features : ndarray
            An NxM matrix of N features of dimensionality M.
        targets : List[int]
            A list of targets to match the given `features` against.

        Returns
        -------
        ndarray
            Returns a cost matrix of shape len(targets), len(features), where
            element (i, j) contains the closest squared distance between
            `targets[i]` and `features[j]`.

        """
        cost_matrix = np.zeros((len(targets), len(features)))
        for i, target in enumerate(targets):
            if compare_2d:            
                cost_matrix[i, :] = self._metric(self.samples_2d[target], features)
            else:
                cost_matrix[i, :] = self._metric(self.samples[target], features)
        return cost_matrix

    def distance_torch(self, features, targets, compare_2d=False):
        '''
        Same as distance except torched.
        '''
        features = torch.from_numpy(features).cuda()
        cost_matrix = torch.zeros(len(targets), len(features)).cuda()
        for i, target in enumerate(targets):
            if compare_2d:
                cost_matrix[i, :] = self._metric_torch(torch.from_numpy(np.array(self.samples_2d[target])).cuda(), features)
            else:
                cost_matrix[i, :] = self._metric_torch(torch.from_numpy(np.array(self.samples[target])).cuda(), features)
        return cost_matrix.cpu().numpy()

    def check_samples(self, targets):
        for target in targets:
            if len(self.samples[target]) == 0:
                return True
        return False


================================================
FILE: paper_experiments/utils/pointnet_tf_util.py
================================================
""" Wrapper functions for TensorFlow layers.

Author: Charles R. Qi
Date: November 2016
"""

import numpy as np
import tensorflow as tf

def _variable_on_cpu(name, shape, initializer, use_fp16=False):
  """Helper to create a Variable stored on CPU memory.
  Args:
    name: name of the variable
    shape: list of ints
    initializer: initializer for Variable
  Returns:
    Variable Tensor
  """
  with tf.device('/cpu:0'):
    dtype = tf.float16 if use_fp16 else tf.float32
    var = tf.get_variable(name, shape, initializer=initializer, dtype=dtype)
  return var

def _variable_with_weight_decay(name, shape, stddev, wd, use_xavier=True):
  """Helper to create an initialized Variable with weight decay.

  Note that the Variable is initialized with a truncated normal distribution.
  A weight decay is added only if one is specified.

  Args:
    name: name of the variable
    shape: list of ints
    stddev: standard deviation of a truncated Gaussian
    wd: add L2Loss weight decay multiplied by this float. If None, weight
        decay is not added for this Variable.
    use_xavier: bool, whether to use xavier initializer

  Returns:
    Variable Tensor
  """
  if use_xavier:
    initializer = tf.contrib.layers.xavier_initializer()
  else:
    initializer = tf.truncated_normal_initializer(stddev=stddev)
  var = _variable_on_cpu(name, shape, initializer)
  if wd is not None:
    weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss')
    tf.add_to_collection('losses', weight_decay)
  return var


def conv1d(inputs,
           num_output_channels,
           kernel_size,
           scope,
           stride=1,
           padding='SAME',
           use_xavier=True,
           stddev=1e-3,
           weight_decay=0.0,
           activation_fn=tf.nn.relu,
           bn=False,
           bn_decay=None,
           is_training=None):
  """ 1D convolution with non-linear operation.

  Args:
    inputs: 3-D tensor variable BxLxC
    num_output_channels: int
    kernel_size: int
    scope: string
    stride: int
    padding: 'SAME' or 'VALID'
    use_xavier: bool, use xavier_initializer if true
    stddev: float, stddev for truncated_normal init
    weight_decay: float
    activation_fn: function
    bn: bool, whether to use batch norm
    bn_decay: float or float tensor variable in [0,1]
    is_training: bool Tensor variable

  Returns:
    Variable tensor
  """
  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
    num_in_channels = inputs.get_shape()[-1].value
    kernel_shape = [kernel_size,
                    num_in_channels, num_output_channels]
    kernel = _variable_with_weight_decay('weights',
                                         shape=kernel_shape,
                                         use_xavier=use_xavier,
                                         stddev=stddev,
                                         wd=weight_decay)
    outputs = tf.nn.conv1d(inputs, kernel,
                           stride=stride,
                           padding=padding)
    biases = _variable_on_cpu('biases', [num_output_channels],
                              tf.constant_initializer(0.0))
    outputs = tf.nn.bias_add(outputs, biases)

    if bn:
      outputs = batch_norm_for_conv1d(outputs, is_training,
                                      bn_decay=bn_decay, scope='bn')

    if activation_fn is not None:
      outputs = activation_fn(outputs)
    return outputs


def conv2d(inputs,
           num_output_channels,
           kernel_size,
           scope,
           stride=[1, 1],
           padding='SAME',
           use_xavier=True,
           stddev=1e-3,
           weight_decay=0.0,
           activation_fn=tf.nn.relu,
           bn=False,
           bn_decay=None,
           is_training=None):
  """ 2D convolution with non-linear operation.

  Args:
    inputs: 4-D tensor variable BxHxWxC
    num_output_channels: int
    kernel_size: a list of 2 ints
    scope: string
    stride: a list of 2 ints
    padding: 'SAME' or 'VALID'
    use_xavier: bool, use xavier_initializer if true
    stddev: float, stddev for truncated_normal init
    weight_decay: float
    activation_fn: function
    bn: bool, whether to use batch norm
    bn_decay: float or float tensor variable in [0,1]
    is_training: bool Tensor variable

  Returns:
    Variable tensor
  """
  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
      kernel_h, kernel_w = kernel_size
      num_in_channels = inputs.get_shape()[-1].value
      kernel_shape = [kernel_h, kernel_w,
                      num_in_channels, num_output_channels]
      kernel = _variable_with_weight_decay('weights',
                                           shape=kernel_shape,
                                           use_xavier=use_xavier,
                                           stddev=stddev,
                                           wd=weight_decay)
      stride_h, stride_w = stride
      outputs = tf.nn.conv2d(inputs, kernel,
                             [1, stride_h, stride_w, 1],
                             padding=padding)
      biases = _variable_on_cpu('biases', [num_output_channels],
                                tf.constant_initializer(0.0))
      outputs = tf.nn.bias_add(outputs, biases)

      if bn:
        outputs = batch_norm_for_conv2d(outputs, is_training,
                                        bn_decay=bn_decay, scope='bn')

      if activation_fn is not None:
        outputs = activation_fn(outputs)
      return outputs


def conv2d_transpose(inputs,
                     num_output_channels,
                     kernel_size,
                     scope,
                     stride=[1, 1],
                     padding='SAME',
                     use_xavier=True,
                     stddev=1e-3,
                     weight_decay=0.0,
                     activation_fn=tf.nn.relu,
                     bn=False,
                     bn_decay=None,
                     is_training=None):
  """ 2D convolution transpose with non-linear operation.

  Args:
    inputs: 4-D tensor variable BxHxWxC
    num_output_channels: int
    kernel_size: a list of 2 ints
    scope: string
    stride: a list of 2 ints
    padding: 'SAME' or 'VALID'
    use_xavier: bool, use xavier_initializer if true
    stddev: float, stddev for truncated_normal init
    weight_decay: float
    activation_fn: function
    bn: bool, whether to use batch norm
    bn_decay: float or float tensor variable in [0,1]
    is_training: bool Tensor variable

  Returns:
    Variable tensor

  Note: conv2d(conv2d_transpose(a, num_out, ksize, stride), a.shape[-1], ksize, stride) == a
  """
  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
      kernel_h, kernel_w = kernel_size
      num_in_channels = inputs.get_shape()[-1].value
      kernel_shape = [kernel_h, kernel_w,
                      num_output_channels, num_in_channels] # reversed to conv2d
      kernel = _variable_with_weight_decay('weights',
                                           shape=kernel_shape,
                                           use_xavier=use_xavier,
                                           stddev=stddev,
                                           wd=weight_decay)
      stride_h, stride_w = stride
      
      # from slim.convolution2d_transpose
      def get_deconv_dim(dim_size, stride_size, kernel_size, padding):
          dim_size *= stride_size

          if padding == 'VALID' and dim_size is not None:
            dim_size += max(kernel_size - stride_size, 0)
          return dim_size

      # caculate output shape
      batch_size = inputs.get_shape()[0].value
      height = inputs.get_shape()[1].value
      width = inputs.get_shape()[2].value
      out_height = get_deconv_dim(height, stride_h, kernel_h, padding)
      out_width = get_deconv_dim(width, stride_w, kernel_w, padding)
      output_shape = [batch_size, out_height, out_width, num_output_channels]

      outputs = tf.nn.conv2d_transpose(inputs, kernel, output_shape,
                             [1, stride_h, stride_w, 1],
                             padding=padding)
      biases = _variable_on_cpu('biases', [num_output_channels],
                                tf.constant_initializer(0.0))
      outputs = tf.nn.bias_add(outputs, biases)

      if bn:
        outputs = batch_norm_for_conv2d(outputs, is_training,
                                        bn_decay=bn_decay, scope='bn')

      if activation_fn is not None:
        outputs = activation_fn(outputs)
      return outputs

   
def conv3d(inputs,
           num_output_channels,
           kernel_size,
           scope,
           stride=[1, 1, 1],
           padding='SAME',
           use_xavier=True,
           stddev=1e-3,
           weight_decay=0.0,
           activation_fn=tf.nn.relu,
           bn=False,
           bn_decay=None,
           is_training=None):
  """ 3D convolution with non-linear operation.

  Args:
    inputs: 5-D tensor variable BxDxHxWxC
    num_output_channels: int
    kernel_size: a list of 3 ints
    scope: string
    stride: a list of 3 ints
    padding: 'SAME' or 'VALID'
    use_xavier: bool, use xavier_initializer if true
    stddev: float, stddev for truncated_normal init
    weight_decay: float
    activation_fn: function
    bn: bool, whether to use batch norm
    bn_decay: float or float tensor variable in [0,1]
    is_training: bool Tensor variable

  Returns:
    Variable tensor
  """
  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
    kernel_d, kernel_h, kernel_w = kernel_size
    num_in_channels = inputs.get_shape()[-1].value
    kernel_shape = [kernel_d, kernel_h, kernel_w,
                    num_in_channels, num_output_channels]
    kernel = _variable_with_weight_decay('weights',
                                         shape=kernel_shape,
                                         use_xavier=use_xavier,
                                         stddev=stddev,
                                         wd=weight_decay)
    stride_d, stride_h, stride_w = stride
    outputs = tf.nn.conv3d(inputs, kernel,
                           [1, stride_d, stride_h, stride_w, 1],
                           padding=padding)
    biases = _variable_on_cpu('biases', [num_output_channels],
                              tf.constant_initializer(0.0))
    outputs = tf.nn.bias_add(outputs, biases)
    
    if bn:
      outputs = batch_norm_for_conv3d(outputs, is_training,
                                      bn_decay=bn_decay, scope='bn')

    if activation_fn is not None:
      outputs = activation_fn(outputs)
    return outputs

def fully_connected(inputs,
                    num_outputs,
                    scope,
                    use_xavier=True,
                    stddev=1e-3,
                    weight_decay=0.0,
                    activation_fn=tf.nn.relu,
                    bn=False,
                    bn_decay=None,
                    is_training=None):
  """ Fully connected layer with non-linear operation.
  
  Args:
    inputs: 2-D tensor BxN
    num_outputs: int
  
  Returns:
    Variable tensor of size B x num_outputs.
  """
  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
    num_input_units = inputs.get_shape()[-1].value
    weights = _variable_with_weight_decay('weights',
                                          shape=[num_input_units, num_outputs],
                                          use_xavier=use_xavier,
                                          stddev=stddev,
                                          wd=weight_decay)
    outputs = tf.matmul(inputs, weights)
    biases = _variable_on_cpu('biases', [num_outputs],
                             tf.constant_initializer(0.0))
    outputs = tf.nn.bias_add(outputs, biases)
     
    if bn:
      outputs = batch_norm_for_fc(outputs, is_training, bn_decay, 'bn')

    if activation_fn is not None:
      outputs = activation_fn(outputs)
    return outputs


def max_pool2d(inputs,
               kernel_size,
               scope,
               stride=[2, 2],
               padding='VALID'):
  """ 2D max pooling.

  Args:
    inputs: 4-D tensor BxHxWxC
    kernel_size: a list of 2 ints
    stride: a list of 2 ints
  
  Returns:
    Variable tensor
  """
  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
    kernel_h, kernel_w = kernel_size
    stride_h, stride_w = stride
    outputs = tf.nn.max_pool(inputs,
                             ksize=[1, kernel_h, kernel_w, 1],
                             strides=[1, stride_h, stride_w, 1],
                             padding=padding,
                             name=sc.name)
    return outputs

def avg_pool2d(inputs,
               kernel_size,
               scope,
               stride=[2, 2],
               padding='VALID'):
  """ 2D avg pooling.

  Args:
    inputs: 4-D tensor BxHxWxC
    kernel_size: a list of 2 ints
    stride: a list of 2 ints
  
  Returns:
    Variable tensor
  """
  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
    kernel_h, kernel_w = kernel_size
    stride_h, stride_w = stride
    outputs = tf.nn.avg_pool(inputs,
                             ksize=[1, kernel_h, kernel_w, 1],
                             strides=[1, stride_h, stride_w, 1],
                             padding=padding,
                             name=sc.name)
    return outputs


def max_pool3d(inputs,
               kernel_size,
               scope,
               stride=[2, 2, 2],
               padding='VALID'):
  """ 3D max pooling.

  Args:
    inputs: 5-D tensor BxDxHxWxC
    kernel_size: a list of 3 ints
    stride: a list of 3 ints
  
  Returns:
    Variable tensor
  """
  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
    kernel_d, kernel_h, kernel_w = kernel_size
    stride_d, stride_h, stride_w = stride
    outputs = tf.nn.max_pool3d(inputs,
                               ksize=[1, kernel_d, kernel_h, kernel_w, 1],
                               strides=[1, stride_d, stride_h, stride_w, 1],
                               padding=padding,
                               name=sc.name)
    return outputs

def avg_pool3d(inputs,
               kernel_size,
               scope,
               stride=[2, 2, 2],
               padding='VALID'):
  """ 3D avg pooling.

  Args:
    inputs: 5-D tensor BxDxHxWxC
    kernel_size: a list of 3 ints
    stride: a list of 3 ints
  
  Returns:
    Variable tensor
  """
  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
    kernel_d, kernel_h, kernel_w = kernel_size
    stride_d, stride_h, stride_w = stride
    outputs = tf.nn.avg_pool3d(inputs,
                               ksize=[1, kernel_d, kernel_h, kernel_w, 1],
                               strides=[1, stride_d, stride_h, stride_w, 1],
                               padding=padding,
                               name=sc.name)
    return outputs


def batch_norm_template(inputs, is_training, scope, moments_dims, bn_decay):
  """ Batch normalization on convolutional maps and beyond...
  Ref.: http://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow
  
  Args:
      inputs:        Tensor, k-D input ... x C could be BC or BHWC or BDHWC
      is_training:   boolean tf.Varialbe, true indicates training phase
      scope:         string, variable scope
      moments_dims:  a list of ints, indicating dimensions for moments calculation
      bn_decay:      float or float tensor variable, controling moving average weight
  Return:
      normed:        batch-normalized maps
  """
  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
    num_channels = inputs.get_shape()[-1].value
    beta = tf.Variable(tf.constant(0.0, shape=[num_channels]),
                       name='beta', trainable=True)
    gamma = tf.Variable(tf.constant(1.0, shape=[num_channels]),
                        name='gamma', trainable=True)
    batch_mean, batch_var = tf.nn.moments(inputs, moments_dims, name='moments')
    decay = bn_decay if bn_decay is not None else 0.9
    ema = tf.train.ExponentialMovingAverage(decay=decay)
    # Operator that maintains moving averages of variables.
    ema_apply_op = tf.cond(is_training,
                           lambda: ema.apply([batch_mean, batch_var]),
                           lambda: tf.no_op())
    
    # Update moving average and return current batch's avg and var.
    def mean_var_with_update():
      with tf.control_dependencies([ema_apply_op]):
        return tf.identity(batch_mean), tf.identity(batch_var)
    
    # ema.average returns the Variable holding the average of var.
    mean, var = tf.cond(is_training,
                        mean_var_with_update,
                        lambda: (ema.average(batch_mean), ema.average(batch_var)))
    normed = tf.nn.batch_normalization(inputs, mean, var, beta, gamma, 1e-3)
  return normed


def batch_norm_for_fc(inputs, is_training, bn_decay, scope):
  """ Batch normalization on FC data.
  
  Args:
      inputs:      Tensor, 2D BxC input
      is_training: boolean tf.Varialbe, true indicates training phase
      bn_decay:    float or float tensor variable, controling moving average weight
      scope:       string, variable scope
  Return:
      normed:      batch-normalized maps
  """
  return batch_norm_template(inputs, is_training, scope, [0,], bn_decay)


def batch_norm_for_conv1d(inputs, is_training, bn_decay, scope):
  """ Batch normalization on 1D convolutional maps.
  
  Args:
      inputs:      Tensor, 3D BLC input maps
      is_training: boolean tf.Varialbe, true indicates training phase
      bn_decay:    float or float tensor variable, controling moving average weight
      scope:       string, variable scope
  Return:
      normed:      batch-normalized maps
  """
  return batch_norm_template(inputs, is_training, scope, [0,1], bn_decay)


def batch_norm_for_conv2d(inputs, is_training, bn_decay, scope):
  """ Batch normalization on 2D convolutional maps.
  
  Args:
      inputs:      Tensor, 4D BHWC input maps
      is_training: boolean tf.Varialbe, true indicates training phase
      bn_decay:    float or float tensor variable, controling moving average weight
      scope:       string, variable scope
  Return:
      normed:      batch-normalized maps
  """
  return batch_norm_template(inputs, is_training, scope, [0,1,2], bn_decay)


def batch_norm_for_conv3d(inputs, is_training, bn_decay, scope):
  """ Batch normalization on 3D convolutional maps.
  
  Args:
      inputs:      Tensor, 5D BDHWC input maps
      is_training: boolean tf.Varialbe, true indicates training phase
      bn_decay:    float or float tensor variable, controling moving average weight
      scope:       string, variable scope
  Return:
      normed:      batch-normalized maps
  """
  return batch_norm_template(inputs, is_training, scope, [0,1,2,3], bn_decay)


def dropout(inputs,
            is_training,
            scope,
            keep_prob=0.5,
            noise_shape=None):
  """ Dropout layer.

  Args:
    inputs: tensor
    is_training: boolean tf.Variable
    scope: string
    keep_prob: float in [0,1]
    noise_shape: list of ints

  Returns:
    tensor variable
  """
  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
    outputs = tf.cond(is_training,
                      lambda: tf.nn.dropout(inputs, keep_prob, noise_shape),
                      lambda: inputs)
    return outputs


================================================
FILE: paper_experiments/utils/pointnet_transform_nets.py
================================================
import tensorflow as tf
import numpy as np
import sys
import os
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(BASE_DIR)
sys.path.append(os.path.join(BASE_DIR, '../utils'))
import pointnet_tf_util

def input_transform_net(point_cloud, is_training, bn_decay=None, K=3):
    """ Input (XYZ) Transform Net, input is BxNx3 gray image
        Return:
            Transformation matrix of size 3xK """
    batch_size = point_cloud.get_shape()[0].value

    input_image = tf.expand_dims(point_cloud, -1)
    net = pointnet_tf_util.conv2d(input_image, 64, [1,3],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='tconv1', bn_decay=bn_decay)
    net = pointnet_tf_util.conv2d(net, 128, [1,1],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='tconv2', bn_decay=bn_decay)
    net = pointnet_tf_util.conv2d(net, 1024, [1,1],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='tconv3', bn_decay=bn_decay)
    net = tf.reduce_max(net, axis = 1)


    net = tf.reshape(net, [batch_size, -1])
    net = pointnet_tf_util.fully_connected(net, 512, bn=True, is_training=is_training,
                                  scope='tfc1', bn_decay=bn_decay)
    net = pointnet_tf_util.fully_connected(net, 256, bn=True, is_training=is_training,
                                  scope='tfc2', bn_decay=bn_decay)

    with tf.variable_scope('transform_XYZ') as sc:
        assert(K==3)
        weights = tf.get_variable('weights', [256, 3*K],
                                  initializer=tf.constant_initializer(0.0),
                                  dtype=tf.float32)
        biases = tf.get_variable('biases', [3*K],
                                 initializer=tf.constant_initializer(0.0),
                                 dtype=tf.float32)
        biases = biases + tf.constant([1,0,0,0,1,0,0,0,1], dtype=tf.float32)
        transform = tf.matmul(net, weights)
        transform = tf.nn.bias_add(transform, biases)

    transform = tf.reshape(transform, [-1, 3, K])
    return transform


def feature_transform_net(inputs, is_training, bn_decay=None, K=64):
    """ Feature Transform Net, input is BxNx1xK
        Return:
            Transformation matrix of size KxK """
    batch_size = inputs.get_shape()[0].value
    num_point = inputs.get_shape()[1].value

    net = pointnet_tf_util.conv2d(inputs, 64, [1,1],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='tconv1', bn_decay=bn_decay)
    net = pointnet_tf_util.conv2d(net, 128, [1,1],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='tconv2', bn_decay=bn_decay)
    net = pointnet_tf_util.conv2d(net, 1024, [1,1],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='tconv3', bn_decay=bn_decay)
    net = tf.reduce_max(net, axis = 1)

    net = tf.reshape(net, [batch_size, -1])
    net = pointnet_tf_util.fully_connected(net, 512, bn=True, is_training=is_training,
                                  scope='tfc1', bn_decay=bn_decay)
    net = pointnet_tf_util.fully_connected(net, 256, bn=True, is_training=is_training,
                                  scope='tfc2', bn_decay=bn_decay)

    with tf.variable_scope('transform_feat') as sc:
        weights = tf.get_variable('weights', [256, K*K],
                                  initializer=tf.constant_initializer(0.0),
                                  dtype=tf.float32)
        biases = tf.get_variable('biases', [K*K],
                                 initializer=tf.constant_initializer(0.0),
                                 dtype=tf.float32)
        biases = biases + tf.constant(np.eye(K).flatten(), dtype=tf.float32)
        transform = tf.matmul(net, weights)
        transform = tf.nn.bias_add(transform, biases)

    transform = tf.reshape(transform, [batch_size, K, K])
    return transform


================================================
FILE: paper_experiments/utils/read_detections.py
================================================
import numpy as np
import pdb
from deep_sort_utils import non_max_suppression as deepsort_nms

def read_ground_truth_2d_detections(detection_path_2d, frame_idx, detection_matrix = None, threshold = -np.inf, nms_threshold = 0.75):
    if detection_matrix is None:
        detection_matrix = np.loadtxt(detection_path_2d, delimiter=',')

    if len(detection_matrix) == 0:
        return [], [], [], []
    if len(detection_matrix.shape) == 1:
        detection_matrix = np.expand_dims(detection_matrix, axis=0)

    frame_indices = detection_matrix[:, 0].astype(np.int32)
    if frame_idx is not None:
        mask = frame_indices == frame_idx
        detection_file = detection_matrix[mask]
    else:
        detection_file = detection_matrix

    frame_indices = detection_matrix[:, 0].astype(np.int32)
    if frame_idx is not None:
        conf = np.expand_dims(detection_file[:,6].astype(np.float32), 1)
        mask = conf[:,0] > threshold
        detection_file = detection_file[mask]
        object_ids = np.expand_dims(detection_file[:,1].astype(np.float32), 1)
        x1 = np.expand_dims(detection_file[:,2].astype(np.float32), 1)
        y1 = np.expand_dims(detection_file[:,3].astype(np.float32), 1)
        w = np.expand_dims(detection_file[:,4].astype(np.float32), 1)
        h = np.expand_dims(detection_file[:,5].astype(np.float32), 1)
        conf = np.expand_dims(detection_file[:,6].astype(np.float32), 1)
        cls_conf = -np.ones(conf.shape)
        cls_pred = -np.ones(conf.shape)
        detections = np.hstack([x1,y1,x1+w,y1+h, conf, cls_conf, cls_pred])
        boxes = np.hstack([x1, y1, w, h])
        indices = deepsort_nms(boxes, nms_threshold, np.squeeze(conf))
        detections_out = []
        for i in range(len(boxes)):
            if i in indices:
                detections_out.append(detections[i])
        if detections_out:
            detections = np.vstack(detections_out)
        else:
            detections = []
        return detections, object_ids, detection_matrix
    else:
        detections = []
        total_ids = []
        object_ids = np.expand_dims(detection_file[:,1].astype(np.float32), 1)
        for frame in np.unique(frame_indices):
            frame_mask = frame_indices==frame
            x1 = np.expand_dims(detection_file[frame_mask,2].astype(np.float32), 1)
            y1 = np.expand_dims(detection_file[frame_mask,3].astype(np.float32), 1)
            w = np.expand_dims(detection_file[frame_mask,4].astype(np.float32), 1)
            h = np.expand_dims(detection_file[frame_mask,5].astype(np.float32), 1)
            conf = np.expand_dims(detection_file[frame_mask,6].astype(np.float32), 1)
            boxes = np.hstack([x1, y1, w, h])
            cls_conf = -np.ones(conf.shape)
            cls_pred = -np.ones(conf.shape)
            frame_detections = np.hstack([x1,y1,x1+w,y1+h, conf, cls_conf, cls_pred])
            indices = deepsort_nms(boxes, nms_threshold, np.squeeze(conf))
            frame_detections_out = []
            ids = np.zeros((x1.shape[0], 1))
            for i in range(len(object_ids)):
                if i in indices:
                    frame_detections_out.append(frame_detections[i])
                elif i < ids.shape[0]:
                    ids[i] = -1
            if frame_detections_out:
                frame_detections = np.vstack(frame_detections_out)
                detections.append(frame_detections)
            total_ids.append(ids)

    detections = np.vstack(detections)
    ids = np.vstack(total_ids)

    frame_indices = frame_indices[np.squeeze(ids != -1)]
    object_ids = object_ids[np.squeeze(ids != -1)]

    return detections, object_ids, frame_indices

def read_ground_truth_3d_detections(detection_path_3d, frame_idx):
    
    detection_file = np.loadtxt(detection_path_3d, delimiter=',')
    frame_indices = detection_file[:, 0].astype(np.int32)
    if frame_idx is not None:
        mask = frame_indices == frame_idx
        detection_file = detection_file[mask]

    x = np.expand_dims(detection_file[:,2].astype(np.float32), 1)
    y = np.expand_dims(detection_file[:,3].astype(np.float32), 1)
    z = np.expand_dims(detection_file[:,4].astype(np.float32), 1)
    l = np.expand_dims(detection_file[:,5].astype(np.float32), 1)
    h = np.expand_dims(detection_file[:,6].astype(np.float32), 1)
    w = np.expand_dims(detection_file[:,7].astype(np.float32), 1)
    theta = np.expand_dims(detection_file[:,8].astype(np.float32), 1)
    ids = np.expand_dims(detection_file[:,1].astype(np.float32), 1)

    boxes_3d = np.hstack([x, y, z, l, h, w, theta])
    if frame_idx is None:
        return boxes_3d, ids, frame_indices
    return boxes_3d, ids


================================================
FILE: paper_experiments/utils/resnet_reid_utils.py
================================================
import torch
import os
import sys
import torch.nn as nn
from torch.autograd import Variable
from torchvision import transforms
from PIL import Image
import numpy as np
PACKAGE_PARENT = '..'
SCRIPT_DIR = os.path.dirname(os.path.realpath(os.path.join(os.getcwd(), os.path.expanduser(__file__))))
sys.path.append(os.path.normpath(os.path.join(SCRIPT_DIR, PACKAGE_PARENT)))
from models.resnet_reid_models import ICT_ResNet

class Feature_ResNet(nn.Module):
    def __init__(self,n_layer,output_color):
        super(Feature_ResNet,self).__init__()
        all_model = ICT_ResNet(1,10,9,n_layer,pretrained=False)
        for name,modules in all_model._modules.items():
            if name.find('fc') == -1 :
                self.add_module(name,modules)
        if output_color == True:
            self.fc_c = all_model.fc_c
        self.output_color = output_color
    def forward(self,x):
        for name,module in self._modules.items():
            if name.find('fc') == -1:
                x = module(x)
        x = x.view(x.size(0),-1)
        if self.output_color == False:  return x
        else:
            output  = self.fc_c(x)
            color = torch.max(self.fc_c(x),dim=1)[1]
            return x,color

class ResNet_Loader(object):
    def __init__(self,model_path,n_layer=50,batch_size=4,output_color=False):
        self.batch_size = batch_size
        self.output_color = output_color

        self.model = Feature_ResNet(n_layer,output_color)
        state_dict = torch.load(model_path)
        for key in list(state_dict.keys()):
            if key.find('fc') != -1 and key.find('fc_c') == -1 :
                del state_dict[key]
            elif output_color == False and key.find('fc_c') != -1:
                del state_dict[key]
        
        self.model.load_state_dict(state_dict)
        self.model.eval()
        # print('loading resnet%d model'%(n_layer))
        self.compose = transforms.Compose([transforms.Resize((224,224)),transforms.ToTensor(),
                                           transforms.Normalize(mean=[0.485,0.456,0.406],std=[0.229,0.224,0.225])])
        self.upsample = nn.Upsample(size=(224,224),mode='bilinear')

    # @profile
    def inference(self,patches):
        self.model.cuda()
        feature_list = []
        color_list = []
        batch_list = []
        self.batch_size = len(patches)

        for i, patch in enumerate(patches):
            img = self.compose(transforms.ToPILImage()((patch.cpu().numpy()*255).astype(np.uint8)))
            # img = self.upsample(patch.permute(2,0,1).unsqueeze_(0)).squeeze(0)

            batch_list.append(img)
            if (i+1)% self.batch_size == 0:
                if self.output_color == False:
                    features = self.model(Variable(torch.stack(batch_list)).cuda())
                    for feature in features:
                        feature_list.append(feature.data)
                else:
                    features,colors = self.model(Variable(torch.stack(batch_list)).cuda())
                    feature_list.append(features.data)
                    color_list.append(colors.data)
                batch_list = []
        if len(batch_list)>0:
            if self.output_color == False:
                features = self.model(Variable(torch.stack(batch_list)).cuda())
                for feature in features:
                    feature_list.append(feature.data)
            else:
                features,colors = self.model(Variable(torch.stack(batch_list)).cuda())
                feature_list.append(features.data)
                color_list.append(colors.data)
            batch_list = []
        # self.model.cpu() TODO: What does this do? Why would we move model to CPU?
        if self.output_color == False:
            # feature_list = torch.cat(feature_list,dim=0)
            return feature_list
        else:
            feature_list = torch.cat(feature_list,dim=0)
            color_list = torch.cat(color_list,dim=0)
            return feature_list,color_list


================================================
FILE: paper_experiments/utils/test_jpda.py
================================================
from gurobipy import *
from numpy import *
'''
def mycallback(model, where):
    if where == GRB.callback.MIP:
        print model.cbGet(GRB.callback.MIP_NODCNT)
        print model.cbGet(GRB.callback.MIP_ITRCNT),'HEY MOTHERFUCKER'
    if where == GRB.callback.MIPNODE:
        print model.cbGet(GRB.callback.MIPNODE_OBJBST),'BEST OBJ'
'''


numT = 100
numC = 100

Assignment = random.random((numT,numC))

m=Model("Assignment")

X = []
for t in range(numT):
    X.append([])
    for c in range(numC):
        X[t].append(m.addVar(vtype=GRB.BINARY,name="X%d%d"% (t, c)))
m.update()
m.modelSense = GRB.MAXIMIZE
constraintT = []
constraintC = []
for t in range(numT):
    constraintT.append(m.addConstr(quicksum(X[t][c] for c in range(numC)) == 1 ,'constraintT%d' % t))
    
for c in range(numC):
    constraintT.append(m.addConstr(quicksum(X[t][c] for t in range(numT)) == 1 ,'constraintC%d' % t))

m.setObjective(quicksum(quicksum([X[t][c]*Assignment[t][c]    for c in range(numC)]) for t in range(numT)))
    
m.update()

#m.optimize(mycallback)
m.optimize()


print('runtime is %f'%m.Runtime)


================================================
FILE: paper_experiments/utils/test_kf/.gitignore
================================================
*.p


================================================
FILE: paper_experiments/utils/test_kf/run_kf_test.py
================================================
import sys
sys.path.insert(0, '..')
import kalman_filter
import kf_simple3d
import numpy as np
import matplotlib.pyplot as plt
import pickle
import os.path
import pdb
np.set_printoptions(precision=4)

class Track:
    def __init__(self, track_id, first_detection, kf_type):
        # initiate kf
        if kf_type == "2d":
            self.kf = kalman_filter.KalmanFilter()
        elif kf_type == "simple3d":
            self.kf = kf_simple3d.KalmanFilterSimple3D()

        self.mean, self.cov = self.kf.initiate(first_detection)

        self.id = track_id
        n = len(self.mean)
        self.n = n
        m = len(first_detection)
        self.m = m

        # initialize data stores
        self.frame_log = np.zeros((0))
        self.measurement_log = np.zeros((0, m))
        self.gt_log = np.zeros((0, m))
        self.mean_log = np.zeros((0, n))
        self.cov_log = np.zeros((0, n, n))
        self.gating_distance_log = np.zeros((0))

    def update(self, measurement, gt, frame):

        # log data
        self.mean_log = np.vstack((self.mean_log, self.mean))
        self.cov_log = np.concatenate((self.cov_log, self.cov[np.newaxis,:,:]))
        self.measurement_log = np.vstack((self.measurement_log, measurement))
        self.gt_log = np.vstack((self.gt_log, gt))
        self.frame_log = np.append(self.frame_log, frame)

        gating_distance = self.kf.gating_distance(self.mean, self.cov, measurement)
        self.gating_distance_log = np.append(self.gating_distance_log, gating_distance)

        # KF predict and update
        self.mean, self.cov = self.kf.predict(self.mean, self.cov)
        self.mean, self.cov = self.kf.update(self.mean, self.cov, measurement)


    def plot(self):
        t = self.frame_log
        gt = self.gt_log
        meas = self.measurement_log
        state = self.mean_log

        plt.subplot(321)
        plt.plot(t, gt[:,0], label='Ground Truth')
        plt.plot(t, meas[:,0], label='Measured')
        plt.plot(t, state[:,0], label='filtered')
        plt.xlabel('time')
        plt.ylabel('x')
        plt.legend()

        plt.subplot(322)
        plt.plot(t, gt[:,1], label='Ground Truth')
        plt.plot(t, meas[:,1], label='Measured')
        plt.plot(t, state[:,1], label='filtered')
        plt.xlabel('time')
        plt.ylabel('y')
        plt.legend()

        plt.subplot(323)
        plt.plot(gt[:,0], gt[:,1], label='Ground Truth')
        plt.plot(meas[:,0], meas[:,1], label='Measured')
        plt.plot(state[:,0], state[:,1], label='filtered')
        plt.xlabel('x')
        plt.ylabel('y')
        plt.legend()


        plt.subplot(324)
        plt.plot(t, state[:,self.m], label='filtered')
        plt.xlabel('time')
        plt.ylabel('Vx')
        plt.legend()

        plt.subplot(325)
        plt.plot(t, state[:,self.m+1], label='filtered')
        plt.xlabel('time')
        plt.ylabel('Vy')
        plt.legend()

        plt.show()


def file2data(fname):
    # data should be a list of lists of numpy arrays
    # Each element in the list represents a frame
    # Each frame is a list of detections
    # Each detection is a numpy array of measurements. 
    with open(fname, "rb") as f:
        data = pickle.load(f)
    return data

def cmp_tracks(track1, track2):
    # Expect perfect match in mean log and gating distance
    mean_log_pass = np.max(np.abs(track1.mean_log == track2.mean_log)) > 1e-12
    gating_distance_pass = np.max(np.abs(track1.gating_distance_log == track2.gating_distance_log)) > 1e-12
    return mean_log_pass and gating_distance_pass

def cmp(data, val):
    any_fail = False
    for itrack in data:
        passed = cmp_tracks(data[itrack], val[itrack])
        if not passed:
            print("Mismatch found in track: ", itrack)
            # pdb.set_trace()
            any_fail = True
        else: 
            print("Tracks matched: ", itrack)

    return not any_fail


def validate(data, fname):
    if os.path.isfile(fname):
        val_data = file2data(fname)
        return cmp(data, val_data)
    else:
        with open(fname, "wb") as f:
            pickle.dump(data, f)   
        return True

def run_kf_test(fname, kf_type):
    print("Running test for: {}".format(fname))
    data = file2data(fname)

    first_frame = data[0]

    tracks = {}
    for detection in first_frame:
        meas, gt, gt_id = (detection[0], detection[1], detection[2])
        tracks[gt_id] = Track(gt_id, meas, kf_type)

    frame_cnt = 0; 
    for frame in data:
        for detection in frame:
            meas, gt, gt_id = (detection[0], detection[1], detection[2])
            tracks[gt_id].update(meas, gt, frame_cnt)

        frame_cnt += 1

    passed = validate(tracks, fname + ".val")

    if not passed:
        for track_id in tracks:
            tracks[track_id].plot()


if __name__=='__main__':
    run_kf_test("single_track_4state_test.p", "2d")
    run_kf_test("two_track_4state_test.p", "2d")
    run_kf_test("single_track_6state_test.p", "simple3d")


================================================
FILE: paper_experiments/utils/test_kf/write_kf_test.py
================================================
import sys
sys.path.insert(0, '..')
import kalman_filter
import numpy as np
import pickle
import pdb

def data2file(data, fname):
    # data should be a list
    # Each element in the list represents a frame
    # Each frame is a list of detections
    # Each detection is a list of [measurement, ground truth, gt track id]

    with open(fname, "wb") as f:
        pickle.dump(data, f)   

def add_noise(center, std):
    out = np.zeros_like(center)
    for i in range(len(center)):
        out[i] = np.random.normal(center[i], std[i])
    return out

def single_track_4state_test():
    np.random.seed(0)

    data = []

    # Iterate over frames
    for i in range(100):
        frame = []
        track_id = 0

        # Track 0
        x_gt = i*10
        y_gt = i*10
        a_gt = 2
        h_gt = 400

        gt = np.array([x_gt, y_gt, a_gt, h_gt])
        meas = add_noise(gt, [15, 15, 0.1, 5])

        detection = [meas, gt, 0]
        frame.append(detection)

        data.append(frame)

    data2file(data, "single_track_4state_test.p")

def two_track_4state_test():
    np.random.seed(0)

    data = []

    # Iterate over frames
    for i in range(100):
        frame = []
        track_id = 0

        # Track 0
        x_gt = i*10
        y_gt = i*10
        a_gt = 2
        h_gt = 400

        gt = np.array([x_gt, y_gt, a_gt, h_gt])
        meas = add_noise(gt, [15, 15, 0.1, 5])

        detection = [meas, gt, track_id]
        frame.append(detection)

        # Track 1
        track_id += 1
        x_gt = i*i/10
        y_gt = i**0.5*30
        a_gt = 2
        h_gt = 400

        gt = np.array([x_gt, y_gt, a_gt, h_gt])
        meas = add_noise(gt, [20, 10, 0.2, 8])

        detection = [meas, gt, track_id]
        frame.append(detection)

        data.append(frame)

    data2file(data, "two_track_4state_test.p")

def single_track_6state_test():
    np.random.seed(0)

    data = []

    # Iterate over frames
    for i in range(100):
        frame = []
        track_id = 0

        # Track 0
        x_gt = i*10
        y_gt = i*10
        l_gt = 400
        h_gt = 400
        w_gt = 400
        theta_gt = i/10

        gt = np.array([x_gt, y_gt, l_gt, h_gt, w_gt, theta_gt])
        gt = add_noise(gt, [3, 3, 1, 1, 1, 1*0.017])
        meas = add_noise(gt, [15, 15, 5, 5, 5, 5*0.017])

        detection = [meas, gt, 0]
        frame.append(detection)

        data.append(frame)

    data2file(data, "single_track_6state_test.p")

if __name__=='__main__':
    single_track_4state_test()
    two_track_4state_test()
    single_track_6state_test()

================================================
FILE: paper_experiments/utils/track.py
================================================
# vim: expandtab:ts=4:sw=4
import numpy as np
import pdb
import torch
import copy

from .imm import IMMFilter2D

class TrackState:
    """
    Enumeration type for the single target track state. Newly created tracks are
    classified as `tentative` until enough evidence has been collected. Then,
    the track state is changed to `confirmed`. Tracks that are no longer alive
    are classified as `deleted` to mark them for removal from the set of active
    tracks.

    """

    Tentative = 1
    Confirmed = 2
    Deleted = 3


class Track:
    """
    A single target track with state space `(x, y, a, h)` and associated
    velocities, where `(x, y)` is the center of the bounding box, `a` is the
    aspect ratio and `h` is the height.

    Parameters
    ----------
    mean : ndarray
        Mean vector of the initial state distribution.
    covariance : ndarray
        Covariance matrix of the initial state distribution.
    track_id : int
        A unique track identifier.
    n_init : int
        Number of consecutive detections before the track is confirmed. The
        track state is set to `Deleted` if a miss occurs within the first
        `n_init` frames.
    max_age : int
        The maximum number of consecutive misses before the track state is
        set to `Deleted`.
    feature : Optional[ndarray]
        Feature vector of the detection this track originates from. If not None,
        this feature is added to the `features` cache.

    Attributes
    ----------
    mean : ndarray
        Mean vector of the initial state distribution.
    covariance : ndarray
        Covariance matrix of the initial state distribution.
    track_id : int
        A unique track identifier.
    hits : int
        Total number of measurement updates.
    age : int
        Total number of frames since first occurance.
    time_since_update : int
        Total number of frames since last measurement update.
    state : TrackState
        The current track state.
    features : List[ndarray]
        A cache of features. On each measurement update, the associated feature
        vector is added to this list.

    """
    def __init__(self, mean, covariance, model_probabilities, track_id, n_init, max_age,
                 feature=None, appearance_feature = None, cuda = False, lstm = None, kf_appearance_feature=False, last_det = None):

        self.mean = mean
        self.covariance = covariance
        self.model_probabilities = model_probabilities
        self.track_id = track_id
        self.hits = 1
        self.age = 1
        self.time_since_update = 0
        self.tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
        self.cuda = cuda
        self.state = TrackState.Tentative
        self.features = []
        self.features_2d = []
        self.hidden = None
        self.kf_appearance_feature = kf_appearance_feature
        if lstm is None:
            self.features.append(feature)
            self.features_2d.append(appearance_feature)
        else:
            self.feature_update(feature, appearance_feature, lstm)
        if self.model_probabilities is not None:
            self.first_detection = mean[:,:4]
        else:
            self.first_detection = mean[:4]

        self._n_init = n_init
        if self.state == TrackState.Tentative and self.hits >= self._n_init:
            self.state = TrackState.Confirmed
        self._max_age = max_age
        self.matched = True
        self.exiting = False
        self.next_to_last_detection = None
        self.last_detection = last_det
        self.last_2d_det = last_det

    def to_tlwh(self, kf):
        """Get current position in bounding box format `(top left x, top left y,
        width, height)`.

        Returns
        -------
        ndarray
            The bounding box.

        """
        if self.model_probabilities is None:
            if self.last_2d_det is not None: #TODO: This part
                # print(self.last_2d_det.to_xywh(), self.mean[:4])
                ret = self.last_2d_det.to_xywh()
            else:
                ret = self.mean[:4].copy()
        else:
            mean, _ = IMMFilter2D.combine_states(self.mean, self.covariance, self.model_probabilities)
            ret = mean[:4].copy()
        ret[:2] -= ret[2:] / 2
        return ret

    def to_tlbr(self):
        """Get current position in bounding box format `(min x, miny, max x,
        max y)`.

        Returns
        -------
        ndarray
            The bounding box.

        """
        ret = self.to_tlwh(None)
        ret[2:] = ret[:2] + ret[2:]
        return ret

    def update_feature(self, img, appearance_model):

        x = round(self.mean[0])
        y = round(self.mean[1])
        a = self.mean[2]
        box_h = int(round(self.mean[3]))

        x1 = int(round(x - (x / 2)))
        y1 = int(round(y - (y / 2)))
        box_w = int(round(a * box_h))

        Tensor = torch.cuda.FloatTensor if self.cuda else torch.FloatTensor

        # patch = torch.Tensor(img[y1:y1+box_h, x1:x1+box_w, :]).type(Tensor).permute(2,0,1)
        patch = img[:, y1:y1+box_h, x1:x1+box_w]

        if patch is None or patch.nelement()==0:
            return None
        patch = patch.unsqueeze(0)

        with torch.no_grad():
            feature ,_ = appearance_model(patch)

            return feature.squeeze(0)

    def predict(self, kf):
        """Propagate the state distribution to the current time step using a
        Kalman filter prediction step.

        Parameters
        ----------
        kf : kalman_filter.KalmanFilter
            The Kalman filter.

        """
        if self.model_probabilities is None:
            self.mean, self.covariance = kf.predict(self.mean, self.covariance, self.last_detection, self.next_to_last_detection)
        else:
            self.mean, self.covariance, self.model_probabilities = kf.predict(self.mean, self.covariance, self.model_probabilities)
        self.age += 1
        self.time_since_update += 1

    # @profile
    def update(self, kf, detection, detections_3d=None,
                marginalization=None, detection_idx=None, JPDA=False,
                cur_frame = None, appearance_model = None, lstm = None,
                only_feature=False):
        """Perform Kalman filter measurement update step and update the feature
        cache.

        Parameters
        ----------
        kf : kalman_filter.KalmanFilter
            The Kalman filter.
        detection : Detection
            The associated detection.

        """
        if JPDA:
            detections = [det.to_xywh() for det in detection]
            if self.model_probabilities is None:
                self.mean, self.covariance = kf.update(
                    self.mean, self.covariance, detections, marginalization, JPDA)
            else:
                self.mean, self.covariance, self.model_probabilities = kf.update(self.mean, self.covariance, detections, self.model_probabilities, marginalization, JPDA)
            self.feature_update(detection, detection_idx, lstm)
            if np.argmax(marginalization) != 0:
                self.matched=True
            else:
                self.matched=False
            if detection_idx < 0:
                self.last_2d_det = None
                return
            self.hits += 1
            self.time_since_update = 0
            detection = detection[detection_idx]
            self.last_2d_det = detection

        else:
            detection = detection[detection_idx]
            if self.model_probabilities is None:
                self.mean, self.covariance = kf.update(
                    self.mean, self.covariance, detection.to_xywh())
            else:
                self.mean, self.covariance, self.model_probabilities = kf.update(self.mean, self.covariance, detection.to_xyah(), self.model_probabilities)
            self.feature_update(detection.feature, detection.appearance_feature, lstm)
            self.hits += 1
            self.time_since_update = 0
        if detection.box_3d is not None:
            self.next_to_last_detection = self.last_detection
            self.last_detection = detection
        if self.age==2:
            self.update_velocity(detection.to_xywh())
        if self.state == TrackState.Tentative and self.hits >= self._n_init:
            self.state = TrackState.Confirmed
    
    def delete_track(self):
        self.state = TrackState.Deleted
    
    def mark_missed(self):
        """Mark this track as missed (no association at the current time step).
        """
        if self.state == TrackState.Tentative:
            self.state = TrackState.Deleted
        elif self.time_since_update > self._max_age:
            self.state = TrackState.Deleted

    def update_velocity(self, new_detection):
        if self.model_probabilities is not None:
            for kf_n in range(2):
                velocity_estimate = new_detection - self.first_detection
                self.mean[kf_n,4:] = velocity_estimate[kf_n,:4]
                # Reduce covariance of velocity by 4 times (half the standard deviation)
                self.covariance[kf_n,:,4:] /= 4
                self.covariance[kf_n,4:,:] /= 4
        else:
            velocity_estimate = new_detection - self.first_detection
            self.mean[4:] = velocity_estimate[:4]
            # Reduce covariance of velocity by 4 times (half the standard deviation)
            self.covariance[:,4:] /= 4
            self.covariance[4:,:4] /= 4

    def is_tentative(self):
        """Returns True if this track is tentative (unconfirmed).
        """
        return self.state == TrackState.Tentative

    def is_confirmed(self):
        """Returns True if this track is confirmed."""
        return self.state == TrackState.Confirmed

    def is_deleted(self):
        """Returns True if this track is dead and should be deleted."""
        return self.state == TrackState.Deleted
    
    def is_exiting(self):
        return self.exiting
    
    def mark_exiting(self):
        self.exiting = True

    def feature_update(self, detections, detection_idx, lstm, JPDA=False, marginalization=None):
        if JPDA:
            features=[d.feature for d in detections]
            appearance_features=[d.appearance_feature for d in detections]
            if len([i for i in features if i is None])==0:
                combined_feature=np.sum(np.array(features).reshape(len(features), -1)
                                        *marginalization[1:].reshape(-1, 1), axis=0).astype(np.float32)
                self.features.append(combined_feature)
            if len([i for i in appearance_features if i is None])==0:
                combined_feature=np.sum(
                                np.array(appearance_features).reshape(len(appearance_features), -1)
                                *marginalization[1:].reshape(-1, 1), axis=0).astype(np.float32)
                self.features_2d.append(combined_feature)
        else:
            feature = detections[detection_idx].feature
            appearance_feature = detections[detection_idx].appearance_feature
            if feature is not None:
                if lstm is not None:
                    input_feature = torch.Tensor(feature).type(self.tensor)
                    input_feature = input_feature.unsqueeze(0)
                    with torch.no_grad():
                        if self.hidden is None:
                            output_feature, self.hidden = lstm(input_feature)
                        else:
                            output_feature, self.hidden = lstm(input_feature, self.hidden)
                    output_feature = output_feature.cpu().numpy().squeeze(0)
                else:
                    output_feature = feature
                # print("track:", self.track_id, "original", len(self.features), "2D", len(self.features_2d))
                self.features.append(output_feature)
                # diffs = [] #TODO: REMOVE
                # for i in range(len(self.features)-1):
                #     diffs.append(np.linalg.norm(self.features[i],self.features[i+1]))
                # diffs = np.asarray(diffs)
                # print("track:", self.track_id, "count:", len(self.features),"mean", np.mean(diffs), "std", np.std(diffs))
            if appearance_feature is not None:
                self.features_2d.append(appearance_feature)


================================================
FILE: paper_experiments/utils/track_3d.py
================================================
# vim: expandtab:ts=4:sw=4
import numpy as np
import pdb
import torch

class TrackState:
    """
    Enumeration type for the single target track state. Newly created tracks are
    classified as `tentative` until enough evidence has been collected. Then,
    the track state is changed to `confirmed`. Tracks that are no longer alive
    are classified as `deleted` to mark them for removal from the set of active
    tracks.

    """

    Tentative = 1
    Confirmed = 2
    Deleted = 3


class Track_3d:
    """
    A single target track with state space `(x, y, a, h)` and associated
    velocities, where `(x, y)` is the center of the bounding box, `a` is the
    aspect ratio and `h` is the height.

    Parameters
    ----------
    mean : ndarray
        Mean vector of the initial state distribution.
    covariance : ndarray
        Covariance matrix of the initial state distribution.
    track_id : int
        A unique track identifier.
    n_init : int
        Number of consecutive detections before the track is confirmed. The
        track state is set to `Deleted` if a miss occurs within the first
        `n_init` frames.
    max_age : int
        The maximum number of consecutive misses before the track state is
        set to `Deleted`.
    feature : Optional[ndarray]
        Feature vector of the detection this track originates from. If not None,
        this feature is added to the `features` cache.

    Attributes
    ----------
    mean : ndarray
        Mean vector of the initial state distribution.
    covariance : ndarray
        Covariance matrix of the initial state distribution.
    track_id : int
        A unique track identifier.
    hits : int
        Total number of measurement updates.
    age : int
        Total number of frames since first occurance.
    time_since_update : int
        Total number of frames since last measurement update.
    state : TrackState
        The current track state.
    features : List[ndarray]
        A cache of features. On each measurement update, the associated feature
        vector is added to this list.

    """
    def __init__(self, mean, covariance, track_id, n_init, max_age,
                 feature=None, appearance_feature = None, cuda = False, lstm = None):

        self.mean = mean
        self.covariance = covariance
        self.track_id = track_id
        self.hits = 1
        self.age = 1
        self.time_since_update = 0
        self.tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
        self.cuda = cuda
        self.state = TrackState.Tentative
        self.features = []
        self.features_2d = []
        self.hidden = None
        if lstm is None:
            self.features.append(feature)
            self.features_2d.append(appearance_feature)
        else:
            self.feature_update(feature, appearance_feature, lstm)
        self.first_detection = mean[:7]
        self._n_init = n_init
        if self.state == TrackState.Tentative and self.hits >= self._n_init:
            self.state = TrackState.Confirmed
        self._max_age = max_age
        self.matched = True
        self.exiting = False


    def to_tlwh3d(self):
        """Get current position in bounding box format `(box center of bottom face [x, y, z], l, w, h)`.

        Returns
        -------
        ndarray
            The bounding box.

        """
        ret = self.mean[[0,1,2,3,4,5,6]].copy()

        return ret

    def to_tlwh(self, kf):
        """Get current position in bounding box format `(box center of bottom face [x, y, z], l, w, h)`.

        Returns
        -------
        ndarray
            The bounding box.

        """
        corner_points, _ = kf.calculate_corners(kf)
        min_x, min_y = np.amin(corner_points, axis = 0)[:2]
        max_x, max_y = np.amax(corner_points, axis = 0)[:2]
        ret = np.array([min_x, min_y, max_x - min_x, max_y - min_y])
        return ret

    def predict(self, kf):
        """Propagate the state distribution to the current time step using a
        Kalman filter prediction step.

        Parameters
        ----------
        kf : kalman_filter.KalmanFilter
            The Kalman filter.

        """
        self.mean, self.covariance = kf.predict(self.mean, self.covariance)
        self.age += 1
        self.time_since_update += 1

    # @profile
    def update(self, kf, detection, compare_2d=False,
                marginalization=None, detection_idx=None, JPDA=False, lstm = None):
        """Perform Kalman filter measurement update step and update the feature
        cache.

        Parameters
        ----------
        kf : kalman_filter.KalmanFilter
            The Kalman filter.
        detection : Detection
            The associated detection.

        """

        if JPDA:

            detections_2d = [det.tlwh for det in detection]
            if compare_2d:
                detections_3d = None
            else:
                detections_3d = [det.box_3d for det in detection]
            self.mean, self.covariance, self.mean_post_3d = kf.update(
                self.mean, self.covariance, detections_2d, detections_3d, marginalization, JPDA)

            if detection_idx < 0:
                return
            detection = detection[detection_idx]

        else:
            detection = detection[detection_idx]
            detections_3d = detections_3d[detection_idx]
            self.mean, self.covariance = kf.update(
                self.mean, self.covariance, detection.tlwh, detections_3d)

        self.hits += 1
        self.time_since_update = 0
        if self.state == TrackState.Tentative and self.hits >= self._n_init:
            self.state = TrackState.Confirmed

    def mark_missed(self):
        """Mark this track as missed (no association at the current time step).
        """
        if self.state == TrackState.Tentative:
            self.state = TrackState.Deleted
        elif self.time_since_update > self._max_age:
            self.state = TrackState.Deleted

    def is_tentative(self):
        """Returns True if this track is tentative (unconfirmed).
        """
        return self.state == TrackState.Tentative

    def is_confirmed(self):
        """Returns True if this track is confirmed."""
        return self.state == TrackState.Confirmed

    def is_deleted(self):
        """Returns True if this track is dead and should be deleted."""
        return self.state == TrackState.Deleted


================================================
FILE: paper_experiments/utils/tracker.py
================================================
# vim: expandtab:ts=4:sw=4
from __future__ import absolute_import
import numpy as np
import pdb
from . import kf_2d, kf_3d, double_measurement_kf, imm
from . import linear_assignment
from . import iou_matching
from .track import Track
from . import JPDA_matching
from . import tracking_utils
import math
from nn_matching import NearestNeighborDistanceMetric
import cv2


class Tracker:
    """
    This is the multi-target tracker.

    Parameters
    ----------
    metric : nn_matching.NearestNeighborDistanceMetric
        A distance metric for measurement-to-track association.
    max_age : int
        Maximum number of missed misses before a track is deleted.
    n_init : int
        Number of consecutive detections before the track is confirmed. The
        track state is set to `Deleted` if a miss occurs within the first
        `n_init` frames.

    Attributes
    ----------
    metric : nn_matching.NearestNeighborDistanceMetric
        The distance metric used for measurement to track association.
    max_age : int
        Maximum number of missed misses before a track is deleted.
    n_init : int
        Number of frames that a track remains in initialization phase.
    kf : EKF.KalmanFilter
        A Kalman filter to filter target trajectories in image space.
    tracks : List[Track]
        The list of active tracks at the current time step.

    """

    def __init__(self, max_age=5, n_init=3,
                 JPDA=False, m_best_sol=1, assn_thresh=0.0,
                 matching_strategy=None,
                 kf_appearance_feature=None,
                 gate_full_state=False, lstm = None, cuda = False, appearance_model = None,
                 calib = None, kf_vel_params=(1./20, 1./160, 1, 1, 2), dummy_node_cost_iou=0.4, dummy_node_cost_app=0.2, nn_budget = None, use_imm=False, kf_walk_params=(1./20, 1./160, 1, 1, 2),
                 markov=(0.9, 0.7), uncertainty_limit=1.8, optical_flow=False, gate_limit=400):

        self.max_age = max_age
        self.n_init = n_init
        self.metric = NearestNeighborDistanceMetric("euclidean", nn_budget)
        if not use_imm:
            self.kf = kf_2d.KalmanFilter2D(*kf_vel_params, gate_limit)
            self.use_imm = False
        else:
            self.kf = imm.IMMFilter2D(kf_vel_params, kf_walk_params, markov=markov)
            self.use_imm = True
        self.tracks = []
        self._next_id = 1
        self.JPDA = JPDA
        self.m_best_sol = m_best_sol
        self.assn_thresh = assn_thresh
        self.matching_strategy = matching_strategy
        self.kf_appearance_feature = kf_appearance_feature
        self.gate_only_position = not gate_full_state
        self.lstm = lstm
        self.cuda = cuda
        self.dummy_node_cost_app = dummy_node_cost_app
        self.dummy_node_cost_iou = dummy_node_cost_iou
        self.appearance_model = appearance_model
        self.prev_frame = None
        self.uncertainty_limit = uncertainty_limit
        self.optical_flow = optical_flow

    # @profile
    def gated_metric(self, tracks, dets, track_indices, detection_indices, compare_2d = False):
        targets = np.array([tracks[i].track_id for i in track_indices])
        if not compare_2d and self.metric.check_samples(targets):
            compare_2d = True
        if compare_2d:
            features = np.array([dets[i].appearance_feature for i in detection_indices])
        else:
            features = np.array([dets[i].feature for i in detection_indices])
        #cost_matrix = self.metric.distance(features, targets, compare_2d)
        cost_matrix_appearance = self.metric.distance_torch(features, targets, compare_2d)
        cost_matrix_iou = iou_matching.iou_cost(tracks, dets, track_indices, detection_indices)

        gate_mask = linear_assignment.gate_cost_matrix(
            self.kf, tracks, dets, track_indices,
            detection_indices, only_position=self.gate_only_position)
        cost_matrix = np.dstack((cost_matrix_appearance, cost_matrix_iou))

        return cost_matrix, gate_mask

    def predict(self):
        """Propagate track state distributions one time step forward.

        This function should be called once every time step, before `update`.
        """
        for track in self.tracks:
            track.predict(self.kf)

    # @profile
    def update(self, cur_frame, detections, compare_2d = False):
        """Perform measurement update and track management.

        Parameters
        ----------
        detections : List[deep_sort.detection.Detection]
            A list of detections at the current time step.

        """
        
        self.cur_frame = cv2.cvtColor((255*cur_frame).permute(1,2,0).cpu().numpy(), cv2.COLOR_BGR2GRAY)

        matches, unmatched_tracks, unmatched_detections = \
            self._match(detections, compare_2d)

        # update filter for each assigned track
        # Only do this for non-JPDA because in JPDA the kf states are updated
        # during the matching process
        if not self.JPDA:
            # Map matched tracks to detections
            track_detection_map = {t:d for (t,d) in matches}
            # Map unmatched tracks to -1 for no detection
            for t in unmatched_tracks:
                track_detection_map[t] = -1
            
            for track_idx, detection_idx in matches:
                self.tracks[track_idx].update(self.kf, detections,
                        detection_idx=detection_idx, JPDA=self.JPDA, 
                        cur_frame = self.cur_frame, appearance_model = self.appearance_model, 
                        lstm = self.lstm)

        # update track state for unmatched tracks
        for track_idx in unmatched_tracks:
            self.tracks[track_idx].mark_missed()

        # create new tracks
        self.prune_tracks()
        flow = None
        if unmatched_detections:
            if self.optical_flow and self.prev_frame is not None:
                flow = cv2.calcOpticalFlowFarneback(self.prev_frame, self.cur_frame, None, 0.5, 3, 15, 3, 5, 1.2, 0)

        for detection_idx in unmatched_detections:
            self._initiate_track(detections[detection_idx], flow)

        # Update distance metric.
        active_targets = [t.track_id for t in self.tracks]
        features, features_2d, targets, targets_2d = [], [], [], []
        for track in self.tracks:
            features += track.features
            features_2d += track.features_2d
            targets += [track.track_id for _ in track.features]
            targets_2d += [track.track_id for _ in track.features_2d]
            track.features = []
            track.features_2d = []

        self.metric.partial_fit(
            np.asarray(features), np.asarray(features_2d), np.asarray(targets), np.asarray(targets_2d), active_targets)
        self.prev_frame = self.cur_frame

    # @profile
    def _match(self, detections, compare_2d):

        # Associate all tracks using combined cost matrices.
        if self.JPDA:
            # Run JPDA on all tracks
            marginalizations = \
            linear_assignment.JPDA(self.gated_metric, self.dummy_node_cost_app, self.dummy_node_cost_iou, self.tracks, \
                detections, m=self.m_best_sol, compare_2d = compare_2d)
            # for track in self.tracks: #TODO: REMOVE
            #     print(track.track_id)
            # print(marginalizations)

            jpda_matcher = JPDA_matching.Matcher(
                detections, marginalizations, range(len(self.tracks)),
                self.matching_strategy, assignment_threshold=self.assn_thresh)
            matches_a, unmatched_tracks_a, unmatched_detections = jpda_matcher.match()

            # Map matched tracks to detections
            # Map matched tracks to detections
            track_detection_map = {t:d for (t,d) in matches_a}
            # Map unmatched tracks to -1 for no detection
            for t in unmatched_tracks_a:
                track_detection_map[t] = -1
            # update Kalman state
            if marginalizations.shape[0] > 0:
                for i in range(len(self.tracks)):
                    self.tracks[i].update(self.kf, detections,
                        marginalization=marginalizations[i,:], detection_idx=track_detection_map[i], 
                        JPDA=self.JPDA, cur_frame = self.cur_frame, appearance_model = self.appearance_model, lstm = self.lstm)
        else:
            confirmed_tracks = [i for i, t in enumerate(self.tracks) if t.is_confirmed()]
            matches_a, unmatched_tracks_a, unmatched_detections = \
                linear_assignment.matching_cascade(
                    self.gated_metric, self.dummy_node_cost_iou, self.max_age,
                    self.tracks, detections, confirmed_tracks, compare_2d = compare_2d)
        return matches_a, unmatched_tracks_a, unmatched_detections

    def _initiate_track(self, detection, flow=None):
        if self.use_imm:
            mean, covariance, model_probabilities = self.kf.initiate(detection.to_xywh(), flow)
        else:
            mean, covariance = self.kf.initiate(detection.to_xywh(), flow)
            model_probabilities = None

        self.tracks.append(Track(
            mean, covariance, model_probabilities, self._next_id, self.n_init, self.max_age,
            kf_appearance_feature = self.kf_appearance_feature,
            feature=detection.feature, appearance_feature = detection.appearance_feature,
            cuda = self.cuda, lstm = self.lstm, last_det = detection))
        self._next_id += 1
    
    def prune_tracks(self):
        h, w = self.cur_frame.shape
        for track in self.tracks:
            # Check if track is leaving
            if self.use_imm:
                predicted_mean, predicted_cov = self.kf.combine_states(track.mean, track.covariance, track.model_probabilities) #TODO: This doesn't predict. Mean should def predict
            else:
                predicted_mean = self.kf.predict_mean(track.mean)
                predicted_cov = track.covariance
            predicted_pos = predicted_mean[:2]
            predicted_vel = predicted_mean[4:6]
            predicted_pos[0] -= w/2
            predicted_pos[1] -= h/2

            cos_theta = np.dot(predicted_pos, predicted_vel)/(np.linalg.norm(predicted_pos)*
                                                    np.linalg.norm(predicted_vel) + 1e-6)
            predicted_pos[0] += w/2
            predicted_pos[1] += h/2
            # Thresholds for deciding whether track is outside image
            BORDER_VALUE = 0
            if (cos_theta > 0 and
                (predicted_pos[0] - track.mean[2]/2<= BORDER_VALUE or
                predicted_pos[0] + track.mean[2]/2 >= w - BORDER_VALUE)):
                if track.is_exiting() and not track.matched:
                    track.delete_track()
                else:
                    track.mark_exiting()
            # Check if track is too uncertain
            # cov_axis,_ = np.linalg.eigh(predicted_cov)
            # if np.abs(np.sqrt(cov_axis[-1]))*6 > self.uncertainty_limit*np.linalg.norm(predicted_mean[2:4]):
            #    track.delete_track()
        self.tracks = [t for t in self.tracks if not t.is_deleted()]


================================================
FILE: paper_experiments/utils/tracker_3d.py
================================================
# vim: expandtab:ts=4:sw=4
from __future__ import absolute_import
import numpy as np
import pdb
from . import double_measurement_kf
from . import linear_assignment
from . import iou_matching
from .track_3d import Track_3d
from . import JPDA_matching
from . import tracking_utils
import math
from nn_matching import NearestNeighborDistanceMetric

class Tracker_3d:
    """
    This is the multi-target tracker.

    Parameters
    ----------
    metric : nn_matching.NearestNeighborDistanceMetric
        A distance metric for measurement-to-track association.
    max_age : int
        Maximum number of missed misses before a track is deleted.
    n_init : int
        Number of consecutive detections before the track is confirmed. The
        track state is set to `Deleted` if a miss occurs within the first
        `n_init` frames.

    Attributes
    ----------
    metric : nn_matching.NearestNeighborDistanceMetric
        The distance metric used for measurement to track association.
    max_age : int
        Maximum number of missed misses before a track is deleted.
    n_init : int
        Number of frames that a track remains in initialization phase.
    kf : EKF.KalmanFilter
        A Kalman filter to filter target trajectories in image space.
    tracks : List[Track]
        The list of active tracks at the current time step.

    """

    def __init__(self, max_age=30, n_init=3,
                 JPDA=False, m_best_sol=1, assn_thresh=0.0,
                 matching_strategy=None, appearance_model = None,
                 gate_full_state=False, lstm = None, cuda = False, calib=None, omni=False,
                 kf_vel_params=(1./20, 1./160, 1, 1, 2), dummy_node_cost=0.2, nn_budget = None, use_imm=False,
                 markov=(0.9, 0.7), uncertainty_limit=1.8, optical_flow=False, gate_limit=400):

        self.metric = NearestNeighborDistanceMetric("euclidean", nn_budget)
        self.max_age = max_age
        self.n_init = n_init
        self.kf = double_measurement_kf.KF_3D(calib, *kf_vel_params, omni=omni)
        self.tracks = []
        self._next_id = 1
        self.JPDA = JPDA
        self.m_best_sol = m_best_sol
        self.assn_thresh = assn_thresh
        self.matching_strategy = matching_strategy
        self.gate_only_position = not gate_full_state
        self.lstm = lstm
        self.cuda = cuda
        self.dummy_node_cost = dummy_node_cost
        self.appearance_model = appearance_model

    # @profile
    def gated_metric(self, tracks, dets, track_indices, detection_indices, compare_2d=None):
        targets = np.array([tracks[i].track_id for i in track_indices])
        if not compare_2d and self.metric.check_samples(targets):
            compare_2d = True
        if compare_2d:
            features = np.array([dets[i].appearance_feature for i in detection_indices])
        else:
            features = np.array([dets[i].feature for i in detection_indices])
        #cost_matrix = self.metric.distance(features, targets, compare_2d)
        cost_matrix_appearance = self.metric.distance_torch(features, targets, compare_2d)
        use_3d = True
        for i in detection_indices:
            if dets[i].box_3d is None:
                use_3d = False
                break
        if use_3d:
            cost_matrix_iou = iou_matching.iou_cost(tracks, dets, track_indices, detection_indices, use3d=use_3d)
        else:
            cost_matrix_iou = np.ones(cost_matrix_appearance.shape)
        kf = self.kf
        dets_for_gating = dets

        gate_mask = linear_assignment.gate_cost_matrix(
            kf, tracks, dets_for_gating, track_indices,
            detection_indices, only_position=self.gate_only_position, use3d=use_3d)
        cost_matrix = np.dstack((cost_matrix_appearance, cost_matrix_iou))

        return cost_matrix, gate_mask

    def predict(self):
        """Propagate track state distributions one time step forward.

        This function should be called once every time step, before `update`.
        """
        for track in self.tracks:
            track.predict(self.kf)

    # @profile
    def update(self, input_img, detections, compare_2d):
        """Perform measurement update and track management.

        Parameters
        ----------
        detections : List[deep_sort.detection.Detection]
            A list of detections at the current time step.

        """

        matches, unmatched_tracks, unmatched_detections = \
            self._match(detections, compare_2d)

        # update filter for each assigned track
        # Only do this for non-JPDA because in JPDA the kf states are updated
        # during the matching process
        
        if not self.JPDA:
            # Map matched tracks to detections
            track_detection_map = {t:d for (t,d) in matches}
            # Map unmatched tracks to -1 for no detection
            for t in unmatched_tracks:
                track_detection_map[t] = -1
            
            for track_idx, detection_idx in matches:
                self.tracks[track_idx].update(self.kf, detections,
                        detection_idx=detection_idx, JPDA=self.JPDA, 
                        cur_frame = self.cur_frame, appearance_model = self.appearance_model, 
                        lstm = self.lstm)
        # update track state for unmatched tracks
        for track_idx in unmatched_tracks:
            self.tracks[track_idx].mark_missed()
        
        self.prune_tracks()
        # create new tracks
        for detection_idx in unmatched_detections:
            self._initiate_track(detections[detection_idx])

         # Update distance metric.
        active_targets = [t.track_id for t in self.tracks]
        features, features_2d, targets, targets_2d = [], [], [], []
        for track in self.tracks:
            features += track.features
            features_2d += track.features_2d
            targets += [track.track_id for _ in track.features]
            targets_2d += [track.track_id for _ in track.features_2d]
            track.features = []
            track.features_2d = []

        self.metric.partial_fit(
            np.asarray(features), np.asarray(features_2d), np.asarray(targets), np.asarray(targets_2d), active_targets)

    # @profile
    def _match(self, detections, compare_2d):

        # Associate confirmed tracks using appearance features.
        if self.JPDA:
            # Only run JPDA on confirmed tracks
            marginalizations = \
            linear_assignment.JPDA(self.gated_metric, self.dummy_node_cost, self.tracks, \
                detections, compare_2d=compare_2d)

            jpda_matcher = JPDA_matching.Matcher(
                detections, marginalizations, range(len(self.tracks)),
                self.matching_strategy, assignment_threshold=self.assn_thresh)
            matches_a, unmatched_tracks_a, unmatched_detections = jpda_matcher.match()

            # Map matched tracks to detections
            track_detection_map = {t:d for (t,d) in matches_a}

            # Map unmatched tracks to -1 for no detection
            for t in unmatched_tracks_a:
                track_detection_map[t] = -1

            # udpate Kalman state
            if marginalizations.shape[0] > 0:
                for i in range(len(self.tracks)):
                    self.tracks[i].update(self.kf, detections,
                        marginalization=marginalizations[i,:], detection_idx=track_detection_map[i], 
                        JPDA=self.JPDA, lstm = self.lstm)

        else:
            matches_a, unmatched_tracks_a, unmatched_detections = \
                linear_assignment.matching_cascade(
                    self.gated_metric, self.metric.matching_threshold, self.max_age,
                    self.tracks, detections, confirmed_tracks, compare_2d = compare_2d, detections_3d=detections_3d)

        return matches_a, unmatched_tracks_a, unmatched_detections

    def _initiate_track(self, detection):
        if detection.box_3d is None:
            return
        mean, covariance = self.kf.initiate(detection.box_3d)
        self.tracks.append(Track_3d(
            mean, covariance, self._next_id, self.n_init, self.max_age,
            feature=detection.feature, appearance_feature = detection.appearance_feature,
            cuda = self.cuda, lstm = self.lstm))
        self._next_id += 1
    
    def prune_tracks(self):

        # for track in self.tracks:
        #     # Check if track is leaving
        #     predicted_mean = self.kf.predict_mean(track.mean)
        #     predicted_cov = track.covariance
        #     predicted_pos = predicted_mean[:2]
        #     predicted_vel = predicted_mean[4:6]
        #     predicted_pos[0] -= w/2
        #     predicted_pos[1] -= h/2

        #     cos_theta = np.dot(predicted_pos, predicted_vel)/(np.linalg.norm(predicted_pos)*
        #                                             np.linalg.norm(predicted_vel) + 1e-6)
        #     predicted_pos[0] += w/2
        #     predicted_pos[1] += h/2
        #     # Thresholds for deciding whether track is outside image
        #     BORDER_VALUE = 0
        #     if (cos_theta > 0 and
        #         (predicted_pos[0] - track.mean[2]/2<= BORDER_VALUE or
        #         predicted_pos[0] + track.mean[2]/2 >= w - BORDER_VALUE)):
        #         if track.is_exiting() and not track.matched:
        #             track.delete_track()
        #         else:
        #             track.mark_exiting()
            # Check if track is too uncertain
            # cov_axis,_ = np.linalg.eigh(predicted_cov)
            # if np.abs(np.sqrt(cov_axis[-1]))*6 > self.uncertainty_limit*np.linalg.norm(predicted_mean[2:4]):
            #    track.delete_track()
        self.tracks = [t for t in self.tracks if not t.is_deleted()]


================================================
FILE: paper_experiments/utils/tracking_utils.py
================================================
import torch, sys, os, pdb
import numpy as np
from PIL import Image
from scipy.spatial import Delaunay
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir)))
from .aligned_reid_utils import load_state_dict
from models.yolo_models import Darknet
from .featurepointnet_model_util import rotate_pc_along_y
from .deep_sort_utils import non_max_suppression as deepsort_nms
import math
from .detection import Detection


def create_detector(config_path, weight_path, cuda):

    detector = Darknet(config_path)
    detector.load_weights(weight_path)
    if cuda:
        detector.cuda()
    detector.eval()
    return detector

def get_depth_patches(point_cloud, box_3d, ids_3d, rot_angles, num_point = 1024):
    #print(ids_3d)
    depth_patches = []
    for i, box in enumerate(box_3d):
        if ids_3d[i] == -1:
            depth_patches.append(None)
            continue
        box_center = np.asarray([ [box[0], box[1], box[2]] ])
        rotate_pc_along_y(box_center, np.pi/2 + np.squeeze(box[6]))
        box_center = box_center[0]
        rotate_pc_along_y(point_cloud, np.pi/2 + np.squeeze(box[6]))
        x = point_cloud[:, 0]
        y = point_cloud[:, 1]
        z = point_cloud[:, 2]
        idx_1 = np.logical_and(x >= float(box_center[0] - box[3]/2.0), x <= float(box_center[0] + box[3]/2.0))
        idx_2 = np.logical_and(y <= (box_center[1]+0.1), y >= float(box_center[1] - box[4]))
        idx_3 = np.logical_and(z >= float(box_center[2] - box[5]/2.0), z <= float(box_center[2] + box[5]/2.0))
        idx = np.logical_and(idx_1, idx_2)
        idx = np.logical_and(idx, idx_3)
        depth_patch = point_cloud[idx, :]
        rotate_pc_along_y(point_cloud, -(np.squeeze(box[6])+np.pi/2)) #unrotate to prep for next iteration
        rotate_pc_along_y(depth_patch, -(np.squeeze(box[6])+np.pi/2))

        if depth_patch.size == 0:
            ids_3d[i] = -1
            depth_patches.append(None)
        else:
            if depth_patch.shape[0] > num_point:
                pc_in_box_fov = np.expand_dims(depth_patch[np.random.choice(range(depth_patch.shape[0]), size = (num_point), replace=False)], 0)
            else:

                pc_in_box_fov = np.expand_dims(
                            np.vstack([depth_patch, 
                            depth_patch[np.random.choice(range(depth_patch.shape[0]), size = (num_point - depth_patch.shape[0]), replace=True)]])
                            , 0)
            depth_patches.append( get_center_view_point_set(pc_in_box_fov, rot_angles[i])[0])

    return depth_patches, ids_3d


def non_max_suppression_3D_prime(detections, boxes_3d, ids_3d, ids_2d, nms_thresh = 1, confidence = None):
    x = [boxes_3d[i][0] for i in range(len(boxes_3d))]
    z = [boxes_3d[i][2] for i in range(len(boxes_3d))]
    l = [boxes_3d[i][5] for i in range(len(boxes_3d))] #[3]
    w = [boxes_3d[i][3] for i in range(len(boxes_3d))] #[5]
    indices = deepsort_nms(boxes_3d, nms_thresh, np.squeeze(confidence))
    for i in range(len(ids_3d)):
        if i not in indices:
            ids_3d[i] = -1
            ids_2d[i] = -1
            boxes_3d[i] = None
            detections[i] = None
    return detections, boxes_3d, ids_2d, ids_3d

def non_max_suppression_3D(depth_patches, ids_3d, ids_2d, nms_thresh = 1, confidence = None):
    #depth_patches list of patches
    
    if len(depth_patches) == 0:
        return []

    pick = []

    if confidence is not None:
        idxs = np.argsort(confidence)
    else:
        idxs = list(range(len(depth_patches)))

    while len(idxs) > 0:
        last = len(idxs) - 1
        i = idxs[last]
        overlap = np.asarray([iou_3d(depth_patches[i], depth_patches[idxs[x]]) for x in range(last)])
        if np.any(overlap == -np.inf):
            idxs = np.delete(idxs, [last])
            continue
        pick.append(i)        
        idxs = np.delete(
            idxs, np.concatenate(
                ([last], np.where(overlap > nms_thresh)[0])))
    for i in range(len(depth_patches)):
        if i not in pick:
            if ids_3d[i]!=-1:
                ids_2d[i] = -1
            ids_3d[i] = -1
    return depth_patches, ids_3d, ids_2d

def iou_3d(patch_1, patch_2):
    #Expecting patches of shape (N, 4) or (N,3) (numpy arrays)
    if patch_2 is None:
        return np.inf
    elif patch_1 is None:
        return -np.inf
    # Unique points
    patch_unique_1 = np.unique(patch_1, axis = 0)
    patch_unique_2 = np.unique(patch_2, axis = 0)
    intersection_points = 0
    for point_1_idx in range(patch_unique_1.shape[0]):
        point_distance = np.sqrt(np.sum((patch_unique_1[point_1_idx]-patch_unique_2)**2, axis = 1))
        intersection_points += np.any(point_distance<0.3)

    union_points = patch_unique_1.shape[0] + patch_unique_2.shape[0] - intersection_points

    iou = intersection_points/union_points

    return iou

def convert_detections(detections, features, appearance_features, detections_3d):
    detection_list = []
    if detections_3d is None:
        detections_3d = [None] * len(detections)
    for detection, feature, appearance_feature, detection_3d in zip(detections, features, appearance_features, detections_3d):
        x1, y1, x2, y2, conf, _, _ = detection
        box_2d = [x1, y1, x2-x1, y2-y1]
        if detection_3d is not None:
            x, y, z, l, w, h, theta = detection_3d
            box_3d = [x, y, z, l, w, h, theta]
        else:
            box_3d = None
        if feature is None:
            detection_list.append(Detection(box_2d, None, conf, appearance_feature.cpu(), feature))
        else:
            detection_list.append(Detection(box_2d, box_3d, conf, appearance_feature.cpu(), feature.cpu()))

    return detection_list

def combine_features(features, depth_features, ids_3d, combination_model, depth_weight = 1):

    combined_features = []
    appearance_features = []
    for i, (appearance_feature, depth_feature) in enumerate(zip(features, depth_features)):
        if ids_3d[i] == -1:
            depth_feature = torch.zeros(512, device=torch.device("cuda"))
        combined_features.append(torch.cat([appearance_feature, depth_feature* depth_weight]))
        appearance_features.append(appearance_feature)

    if combination_model is not None and len(combined_features) > 0:
        combination_model.eval()
        combined_feature = torch.stack(combined_features)
        combined_features = combination_model(combined_feature).detach()
        combined_features = list(torch.unbind(combined_features))
    return combined_features, appearance_features

def filter(detections):
    for i, det in enumerate(detections): #Note image is 1242 x 375
        left = det[0]
        top = det[1]
        right = det[2]
        bottom = det[3]
        if (left < 10 or right > 1232) and (top < 10 or bottom > 365):
            detections[i] = None
    return detections

================================================
FILE: paper_experiments/utils/visualise.py
================================================
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patches as plt_patches
import numpy as np
import utils.imm as imm
from PIL import Image
import pdb

def draw_track(bbox, track = None, bbox_colors = None, det = True,
               do_ellipse = False, axis = None, id_num = 0, do_velocity=False):
    if axis is None:
        axis = plt.gca()
    if track is None:
        color = plt.get_cmap('tab20b')(8) if det else plt.get_cmap('tab20b')(6)
        # plt.imshow(original_img)
        width = bbox[2]
        height = bbox[3]
    else:
        color = bbox_colors[track.track_id]
        id_num = track.track_id
        width = bbox[2]
        height = bbox[3]

    plot_bbox = plt_patches.Rectangle((bbox[0], bbox[1]), width, height, linewidth=2,
                        edgecolor=color,
                        facecolor='none')
    ax = axis
    ax.add_patch(plot_bbox)
    ax.text(bbox[0], bbox[1], s = id_num, color='white', verticalalignment='top',
        bbox={'color': color, 'pad': 0})
  
    if do_ellipse:
        draw_ellipse(track, color)
    if do_velocity:
        draw_velocity(track, color)


def draw_detection(detection, color='k'):
    bbox = detection.tlwh
    plot_bbox = plt_patches.Rectangle((bbox[0], bbox[1]), bbox[2], bbox[3], linewidth=2,
                        edgecolor=color,
                        facecolor = 'w',
                        alpha = 0.5)
    ax = plt.gca()
    ax.add_patch(plot_bbox)
   
def draw_ellipse(track, color):
    ax = plt.gca()
    if track.model_probabilities is not None:
        mean, cov = imm.IMMFilter2D.combine_states(track.mean, track.covariance, track.model_probabilities)
        # print("New orig mat",track.covariance)
        # print("New",cov)
    else:
        mean = track.mean
        cov = track.covariance
        # print("Old",cov)

    lambda_, v = np.linalg.eig(cov[:2, :2])
    lambda_ = np.sqrt(lambda_)
    idx = np.argsort(lambda_)[::-1]
    lambda_ = lambda_[idx]
    v = v[:, idx]
    nsigma = np.sqrt(5.99)
    ell = plt_patches.Ellipse(xy=(mean[0], mean[1])
                              , width= lambda_[0]*2*nsigma 
                              , height=lambda_[1]*2*nsigma
                              , angle=np.rad2deg(np.arctan2(v[1, 0], v[0, 0]))
                              , edgecolor=color
                              , facecolor='none'
                              )
    ax.add_patch(ell)

def draw_velocity(track, color):
    ax = plt.gca()
    if track.model_probabilities is not None:
        mean, cov = imm.IMMFilter2D.combine_states(track.mean, track.covariance, track.model_probabilities)
    else:
        mean = track.mean
    ax.arrow(mean[0], mean[1], 
            mean[4], mean[5],
            edgecolor=color,
            head_width=5)

def draw_box3d(mu, color, alpha, facecolor='none', ax=None):
    if np.any(np.isnan(mu)):
        return
    if ax is None:
        ax = plt.gca()
    x, z, l, w, theta = mu[0], mu[2], mu[3], mu[5], mu[6]
    r = np.sqrt(w**2 + l**2)/2
    psi = np.arctan2(w, l)
    dx, dz = r*np.cos(psi), r*np.sin(psi)
    rect = plt_patches.Rectangle((-dx, -dz), l, w, linewidth=2,
                        edgecolor=color,
                        alpha=alpha,
                        facecolor=facecolor)
    t = matplotlib.transforms.Affine2D().translate(x, z)
    t = t.rotate_around(x, z, theta)
    t_start = ax.transData
    t_end =  t + t_start
    rect.set_transform(t_end)
    ax.add_patch(rect)

    
def draw_velocity_3d(track, color, ax=None):
    mean = track.mean
    if ax is None:
        ax = plt.gca()
    x, z, vx, vz = mean[0], mean[2], mean[7], mean[8]
    arr = plt.arrow(x, z, vx, vz,
                        color=color,
                        head_width=0.5,
                        head_length=0.5)
    ax.add_patch(arr)
                    
def draw_ellipse3d(covariance, x, y, color, ax=None):
    if np.any(np.isnan(covariance)):
        return
    if ax is None:
        ax = plt.gca()
    lambda_, v = np.linalg.eig(np.reshape(covariance[[0, 0, 2, 2], [0, 2, 0, 2]], (2,2)))
    lambda_ = np.sqrt(lambda_)
    idx = np.argsort(lambda_)[::-1]
    lambda_ = lambda_[idx]
    v = v[:, idx]
    nsigma = np.sqrt(5.99)
    ell = plt_patches.Ellipse(xy=(x,y)
                              , width= lambda_[0]*2*nsigma
                              , height=lambda_[1]*2*nsigma
                              , angle=np.rad2deg(np.arctan2(v[1, 0], v[0, 0]))
                              , edgecolor=color
                              , facecolor='none'
                              )
    ax.add_patch(ell)

def draw_track3d(track, color, ax=None):
    mu = track.mean
    draw_box3d(mu, color, 1, ax=ax)
    if ax is None:
        ax = plt.gca()
    x, z = mu[0], mu[2]
    ax.text(x, z, s = track.track_id, color='white', verticalalignment='top',
        bbox={'color': color, 'pad': 0})

    draw_ellipse3d(track.covariance, x, z, color, ax)
    draw_velocity_3d(track, color, ax)

def draw_detection3d(det, color, ax=None):
    draw_box3d(det.box_3d, color, 0.5, color, ax=ax)

================================================
FILE: paper_experiments/utils/yolo_utils/__init__.py
================================================


================================================
FILE: paper_experiments/utils/yolo_utils/datasets.py
================================================
import glob
import random
import os
import numpy as np

import torch

from torch.utils.data import Dataset
from PIL import Image
import torchvision.transforms as transforms

import matplotlib.pyplot as plt
import matplotlib.patches as patches

from skimage.transform import resize

import sys

class ImageFolder(Dataset):
    def __init__(self, folder_path, img_size=416):
        self.files = sorted(glob.glob('%s/*.*' % folder_path))
        self.img_shape = (img_size, img_size)

    def __getitem__(self, index):
        img_path = self.files[index % len(self.files)]
        # Extract image
        img = np.array(Image.open(img_path))
        h, w, _ = img.shape
        dim_diff = np.abs(h - w)
        # Upper (left) and lower (right) padding
        pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2
        # Determine padding
        pad = ((pad1, pad2), (0, 0), (0, 0)) if h <= w else ((0, 0), (pad1, pad2), (0, 0))
        # Add padding
        input_img = np.pad(img, pad, 'constant', constant_values=127.5) / 255.
        # Resize and normalize
        input_img = resize(input_img, (*self.img_shape, 3), mode='reflect', anti_aliasing = True)
        # Channels-first
        input_img = np.transpose(input_img, (2, 0, 1))
        # As pytorch tensor
        input_img = torch.from_numpy(input_img).float()

        return img_path, input_img

    def __len__(self):
        return len(self.files)


class ListDataset(Dataset):
    def __init__(self, list_path, img_size=416):
        with open(list_path, 'r') as file:
            self.img_files = file.readlines()
        self.label_files = [path.replace('images', 'labels').replace('.png', '.txt').replace('.jpg', '.txt') for path in self.img_files]
        self.img_shape = (img_size, img_size)
        self.max_objects = 50

    def __getitem__(self, index):

        #---------
        #  Image
        #---------

        img_path = self.img_files[index % len(self.img_files)].rstrip()
        img = np.array(Image.open(img_path))

        # Handles images with less than three channels
        while len(img.shape) != 3:
            index += 1
            img_path = self.img_files[index % len(self.img_files)].rstrip()
            img = np.array(Image.open(img_path))

        h, w, _ = img.shape
        dim_diff = np.abs(h - w)
        # Upper (left) and lower (right) padding
        pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2
        # Determine padding
        pad = ((pad1, pad2), (0, 0), (0, 0)) if h <= w else ((0, 0), (pad1, pad2), (0, 0))
        # Add padding
        input_img = np.pad(img, pad, 'constant', constant_values=128) / 255.
        padded_h, padded_w, _ = input_img.shape
        # Resize and normalize
        input_img = resize(input_img, (*self.img_shape, 3), mode='reflect')
        # Channels-first
        input_img = np.transpose(input_img, (2, 0, 1))
        # As pytorch tensor
        input_img = torch.from_numpy(input_img).float()

        #---------
        #  Label
        #---------

        label_path = self.label_files[index % len(self.img_files)].rstrip()

        labels = None
        if os.path.exists(label_path):
            labels = np.loadtxt(label_path).reshape(-1, 5)
            # Extract coordinates for unpadded + unscaled image
            x1 = w * (labels[:, 1] - labels[:, 3]/2)
            y1 = h * (labels[:, 2] - labels[:, 4]/2)
            x2 = w * (labels[:, 1] + labels[:, 3]/2)
            y2 = h * (labels[:, 2] + labels[:, 4]/2)
            # Adjust for added padding
            x1 += pad[1][0]
            y1 += pad[0][0]
            x2 += pad[1][0]
            y2 += pad[0][0]
            # Calculate ratios from coordinates
            labels[:, 1] = ((x1 + x2) / 2) / padded_w
            labels[:, 2] = ((y1 + y2) / 2) / padded_h
            labels[:, 3] *= w / padded_w
            labels[:, 4] *= h / padded_h
        # Fill matrix
        filled_labels = np.zeros((self.max_objects, 5))
        if labels is not None:
            filled_labels[range(len(labels))[:self.max_objects]] = labels[:self.max_objects]
        filled_labels = torch.from_numpy(filled_labels)

        return img_path, input_img, filled_labels

    def __len__(self):
        return len(self.img_files)


================================================
FILE: paper_experiments/utils/yolo_utils/parse_config.py
================================================


def parse_model_config(path):
    """Parses the yolo-v3 layer configuration file and returns module definitions"""
    file = open(path, 'r')
    lines = file.read().split('\n')
    lines = [x for x in lines if x and not x.startswith('#')]
    lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces
    module_defs = []
    for line in lines:
        if line.startswith('['): # This marks the start of a new block
            module_defs.append({})
            module_defs[-1]['type'] = line[1:-1].rstrip()
            if module_defs[-1]['type'] == 'convolutional':
                module_defs[-1]['batch_normalize'] = 0
        else:
            key, value = line.split("=")
            value = value.strip()
            module_defs[-1][key.rstrip()] = value.strip()

    return module_defs

def parse_data_config(path):
    """Parses the data configuration file"""
    options = dict()
    options['gpus'] = '0,1,2,3'
    options['num_workers'] = '10'
    with open(path, 'r') as fp:
        lines = fp.readlines()
    for line in lines:
        line = line.strip()
        if line == '' or line.startswith('#'):
            continue
        key, value = line.split('=')
        options[key.strip()] = value.strip()
    return options


================================================
FILE: paper_experiments/utils/yolo_utils/utils.py
================================================
from __future__ import division
import math
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np

import matplotlib.pyplot as plt
import matplotlib.patches as patches


def load_classes(path):
    """
    Loads class labels at 'path'
    """
    fp = open(path, "r")
    names = fp.read().split("\n")[:-1]
    return names


def weights_init_normal(m):
    classname = m.__class__.__name__
    if classname.find("Conv") != -1:
        torch.nn.init.normal_(m.weight.data, 0.0, 0.02)
    elif classname.find("BatchNorm2d") != -1:
        torch.nn.init.normal_(m.weight.data, 1.0, 0.02)
        torch.nn.init.constant_(m.bias.data, 0.0)


def compute_ap(recall, precision):
    """ Compute the average precision, given the recall and precision curves.
    Code originally from https://github.com/rbgirshick/py-faster-rcnn.

    # Arguments
        recall:    The recall curve (list).
        precision: The precision curve (list).
    # Returns
        The average precision as computed in py-faster-rcnn.
    """
    # correct AP calculation
    # first append sentinel values at the end
    mrec = np.concatenate(([0.0], recall, [1.0]))
    mpre = np.concatenate(([0.0], precision, [0.0]))

    # compute the precision envelope
    for i in range(mpre.size - 1, 0, -1):
        mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])

    # to calculate area under PR curve, look for points
    # where X axis (recall) changes value
    i = np.where(mrec[1:] != mrec[:-1])[0]

    # and sum (\Delta recall) * prec
    ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
    return ap


def bbox_iou(box1, box2, x1y1x2y2=True):
    """
    Returns the IoU of two bounding boxes
    """
    if not x1y1x2y2:
        # Transform from center and width to exact coordinates
        b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
        b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
        b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
        b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
    else:
        # Get the coordinates of bounding boxes
        b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
        b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]

    # get the corrdinates of the intersection rectangle
    inter_rect_x1 = torch.max(b1_x1, b2_x1)
    inter_rect_y1 = torch.max(b1_y1, b2_y1)
    inter_rect_x2 = torch.min(b1_x2, b2_x2)
    inter_rect_y2 = torch.min(b1_y2, b2_y2)
    # Intersection area
    inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp(
        inter_rect_y2 - inter_rect_y1 + 1, min=0
    )
    # Union Area
    b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
    b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)

    iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)

    return iou


def bbox_iou_numpy(box1, box2):
    """Computes IoU between bounding boxes.
    Parameters
    ----------
    box1 : ndarray
        (N, 4) shaped array with bboxes
    box2 : ndarray
        (M, 4) shaped array with bboxes
    Returns
    -------
    : ndarray
        (N, M) shaped array with IoUs
    """
    area = (box2[:, 2] - box2[:, 0]) * (box2[:, 3] - box2[:, 1])

    iw = np.minimum(np.expand_dims(box1[:, 2], axis=1), box2[:, 2]) - np.maximum(
        np.expand_dims(box1[:, 0], 1), box2[:, 0]
    )
    ih = np.minimum(np.expand_dims(box1[:, 3], axis=1), box2[:, 3]) - np.maximum(
        np.expand_dims(box1[:, 1], 1), box2[:, 1]
    )

    iw = np.maximum(iw, 0)
    ih = np.maximum(ih, 0)

    ua = np.expand_dims((box1[:, 2] - box1[:, 0]) * (box1[:, 3] - box1[:, 1]), axis=1) + area - iw * ih

    ua = np.maximum(ua, np.finfo(float).eps)

    intersection = iw * ih

    return intersection / ua


def non_max_suppression(prediction, num_classes, conf_thres=0.5, nms_thres=0.4):
    """
    Removes detections with lower object confidence score than 'conf_thres' and performs
    Non-Maximum Suppression to further filter detections.
    Returns detections with shape:
        (x1, y1, x2, y2, object_conf, class_score, class_pred)
    """

    # From (center x, center y, width, height) to (x1, y1, x2, y2)
    box_corner = prediction.new(prediction.shape)
    box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2
    box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2
    box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2
    box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2
    prediction[:, :, :4] = box_corner[:, :, :4]

    output = [None for _ in range(len(prediction))]
    for image_i, image_pred in enumerate(prediction):
        # Filter out confidence scores below threshold
        conf_mask = (image_pred[:, 4] >= conf_thres).squeeze()
        image_pred = image_pred[conf_mask]
        # If none are remaining => process next image
        if not image_pred.size(0):
            continue
        # Get score and class with highest confidence
        class_conf, class_pred = torch.max(image_pred[:, 5 : 5 + num_classes], 1, keepdim=True)
        # Detections ordered as (x1, y1, x2, y2, obj_conf, class_conf, class_pred)
        detections = torch.cat((image_pred[:, :5], class_conf.float(), class_pred.float()), 1)
        # Iterate through all predicted classes
        unique_labels = detections[:, -1].cpu().unique()
        if prediction.is_cuda:
            unique_labels = unique_labels.cuda()
        for c in unique_labels:
            # Get the detections with the particular class
            detections_class = detections[detections[:, -1] == c]
            # Sort the detections by maximum objectness confidence
            _, conf_sort_index = torch.sort(detections_class[:, 4], descending=True)
            detections_class = detections_class[conf_sort_index]
            # Perform non-maximum suppression
            max_detections = []
            while detections_class.size(0):
                # Get detection with highest confidence and save as max detection
                max_detections.append(detections_class[0].unsqueeze(0))
                # Stop if we're at the last detection
                if len(detections_class) == 1:
                    break
                # Get the IOUs for all boxes with lower confidence
                ious = bbox_iou(max_detections[-1], detections_class[1:])
                # Remove detections with IoU >= NMS threshold
                detections_class = detections_class[1:][ious < nms_thres]

            max_detections = torch.cat(max_detections).data
            # Add max detections to outputs
            output[image_i] = (
                max_detections if output[image_i] is None else torch.cat((output[image_i], max_detections))
            )

    return output


def build_targets(
    pred_boxes, pred_conf, pred_cls, target, anchors, num_anchors, num_classes, grid_size, ignore_thres, img_dim
):
    nB = target.size(0)
    nA = num_anchors
    nC = num_classes
    nG = grid_size
    mask = torch.zeros(nB, nA, nG, nG)
    conf_mask = torch.ones(nB, nA, nG, nG)
    tx = torch.zeros(nB, nA, nG, nG)
    ty = torch.zeros(nB, nA, nG, nG)
    tw = torch.zeros(nB, nA, nG, nG)
    th = torch.zeros(nB, nA, nG, nG)
    tconf = torch.ByteTensor(nB, nA, nG, nG).fill_(0)
    tcls = torch.ByteTensor(nB, nA, nG, nG, nC).fill_(0)

    nGT = 0
    nCorrect = 0
    for b in range(nB):
        for t in range(target.shape[1]):
            if target[b, t].sum() == 0:
                continue
            nGT += 1
            # Convert to position relative to box
            gx = target[b, t, 1] * nG
            gy = target[b, t, 2] * nG
            gw = target[b, t, 3] * nG
            gh = target[b, t, 4] * nG
            # Get grid box indices
            gi = int(gx)
            gj = int(gy)
            # Get shape of gt box
            gt_box = torch.FloatTensor(np.array([0, 0, gw, gh])).unsqueeze(0)
            # Get shape of anchor box
            anchor_shapes = torch.FloatTensor(np.concatenate((np.zeros((len(anchors), 2)), np.array(anchors)), 1))
            # Calculate iou between gt and anchor shapes
            anch_ious = bbox_iou(gt_box, anchor_shapes)
            # Where the overlap is larger than threshold set mask to zero (ignore)
            conf_mask[b, anch_ious > ignore_thres, gj, gi] = 0
            # Find the best matching anchor box
            best_n = np.argmax(anch_ious)
            # Get ground truth box
            gt_box = torch.FloatTensor(np.array([gx, gy, gw, gh])).unsqueeze(0)
            # Get the best prediction
            pred_box = pred_boxes[b, best_n, gj, gi].unsqueeze(0)
            # Masks
            mask[b, best_n, gj, gi] = 1
            conf_mask[b, best_n, gj, gi] = 1
            # Coordinates
            tx[b, best_n, gj, gi] = gx - gi
            ty[b, best_n, gj, gi] = gy - gj
            # Width and height
            tw[b, best_n, gj, gi] = math.log(gw / anchors[best_n][0] + 1e-16)
            th[b, best_n, gj, gi] = math.log(gh / anchors[best_n][1] + 1e-16)
            # One-hot encoding of label
            target_label = int(target[b, t, 0])
            tcls[b, best_n, gj, gi, target_label] = 1
            tconf[b, best_n, gj, gi] = 1

            # Calculate iou between ground truth and best matching prediction
            iou = bbox_iou(gt_box, pred_box, x1y1x2y2=False)
            pred_label = torch.argmax(pred_cls[b, best_n, gj, gi])
            score = pred_conf[b, best_n, gj, gi]
            if iou > 0.5 and pred_label == target_label and score > 0.5:
                nCorrect += 1

    return nGT, nCorrect, mask, conf_mask, tx, ty, tw, th, tconf, tcls


def to_categorical(y, num_classes):
    """ 1-hot encodes a tensor """
    return torch.from_numpy(np.eye(num_classes, dtype="uint8")[y])


================================================
FILE: requirements.txt
================================================
cycler==0.10.0
kiwisolver==1.1.0
matplotlib==3.1.2
numpy==1.21.0
opencv-python==4.2.0.32
pyparsing==2.4.6
python-dateutil==2.8.1
six==1.14.0
tqdm==4.41.1


================================================
FILE: src/3d_detector.py
================================================
#!/home/sibot/anaconda2/bin/python
""" yolo_bbox_to_sort.py
    Subscribe to the Yolo 2 bboxes, and publish the detections with a 2d appearance feature used for reidentification
"""
import time
import rospy
import ros_numpy
import sys
import numpy as np
import torch
import pdb
import time
import os
import cv2
from std_msgs.msg import Int8
import message_filters
from sensor_msgs.msg import PointCloud2, Image
from darknet_ros_msgs.msg import BoundingBoxes, BoundingBox
from featurepointnet_model_util import generate_detections_3d, convert_depth_features
from featurepointnet_model import create_depth_model
from calibration import OmniCalibration
from visualization_msgs.msg import MarkerArray, Marker
from cv_bridge import CvBridge, CvBridgeError
from geometry_msgs.msg import Pose, Vector3
from std_msgs.msg import ColorRGBA
from jpda_rospack.msg import detection3d_with_feature_array, detection3d_with_feature

class Detector_3d:
    def __init__(self):
        self.node_name = "fpointnet_detector_plus_feature"
        
        rospy.init_node(self.node_name)
        rospy.on_shutdown(self.cleanup)
        fpointnet_config = \
            rospy.get_param('~fpointnet_config',
                            '~/jr2_catkin_workspace/src/jpda_rospack/src/fpointnet_jrdb/model.ckpt')
        calibration_folder = rospy.get_param('~calib_3d', 'src/jpda_rospack/calib/')
        self.depth_model = create_depth_model('FPointNet', fpointnet_config)
        self.calib = OmniCalibration(calibration_folder)
        self.velodyne_sub_upper = \
            message_filters.Subscriber("/upper_velodyne/velodyne_points", PointCloud2, queue_size=2)
        self.velodyne_sub_lower = \
            message_filters.Subscriber("/lower_velodyne/velodyne_points", PointCloud2, queue_size=2)
        self.yolo_bbox_sub = \
            message_filters.Subscriber("/omni_yolo_bboxes", BoundingBoxes, queue_size=2)
        
        self.time_sync = \
            message_filters.ApproximateTimeSynchronizer([self.yolo_bbox_sub,
                                                         self.velodyne_sub_upper,
                                                         self.velodyne_sub_lower], 5, 0.06)
        self.time_sync.registerCallback(self.get_3d_feature)
    
        self.feature_3d_pub = rospy.Publisher("detection3d_with_feature", detection3d_with_feature_array, queue_size=10)
        self.pc_transform_pub = rospy.Publisher("/transformed_pointcloud", PointCloud2, queue_size=10)
        self.pc_pub = rospy.Publisher("/frustum", PointCloud2, queue_size=10)
        self.debug_pub = rospy.Publisher("/test", Int8, queue_size=1)
        self.marker_box_pub = rospy.Publisher("/3d_detection_markers", MarkerArray, queue_size=10)
        rospy.loginfo("3D detector ready.")
        
    def get_3d_feature(self, y1_bboxes, pointcloud_upper, pointcloud_lower):
        start = time.time()
        #rospy.loginfo('Processing Pointcloud with FPointNet')
        # Assumed that pointclouds have 64 bit floats!
        pc_upper = ros_numpy.numpify(pointcloud_upper).astype({'names':['x','y','z','intensity','ring'], 'formats':['f4','f4','f4','f4','f4'], 'offsets':[0,4,8,16,20], 'itemsize':32})
        pc_lower = ros_numpy.numpify(pointcloud_lower).astype({'names':['x','y','z','intensity','ring'], 'formats':['f4','f4','f4','f4','f4'], 'offsets':[0,4,8,16,20], 'itemsize':32})
        pc_upper = torch.from_numpy(pc_upper.view(np.float32).reshape(pc_upper.shape + (-1,)))[:, [0,1,2,4]]
        pc_lower = torch.from_numpy(pc_lower.view(np.float32).reshape(pc_lower.shape + (-1,)))[:, [0,1,2,4]]
        # move onto gpu if available
        try:
            pc_upper = pc_upper.cuda()
            pc_lower = pc_lower.cuda()
        except:
            pass
        # translate and rotate into camera frame using calib object
        # in message pointcloud has x pointing forward, y pointing to the left and z pointing upward
        # need to transform this such that x is pointing to the right, y pointing downwards, z pointing forward
        # also done inside calib
        pc_upper = self.calib.move_lidar_to_camera_frame(pc_upper, upper=True)
        pc_lower = self.calib.move_lidar_to_camera_frame(pc_lower, upper=False)
        pc = torch.cat([pc_upper, pc_lower], dim = 0)
        pc[:, 3] = 1
        # pc = pc.cpu().numpy()
        # self.publish_pointcloud_from_array(pc, self.pc_transform_pub, header = pointcloud_upper.header)
        # idx = torch.randperm(pc.shape[0]).cuda()
        # pc = pc[idx]
        detections_2d = []
        frame_det_ids = []
        count = 0
        for y1_bbox in y1_bboxes.bounding_boxes:
            if y1_bbox.Class == 'person':
                xmin = y1_bbox.xmin
                xmax = y1_bbox.xmax
                ymin = y1_bbox.ymin
                ymax = y1_bbox.ymax
                probability = y1_bbox.probability
                frame_det_ids.append(count)
                count += 1
                detections_2d.append([xmin, ymin, xmax, ymax, probability, -1, -1])
        features_3d = detection3d_with_feature_array()
        features_3d.header.stamp = y1_bboxes.header.stamp
        features_3d.header.frame_id = 'occam'
        boxes_3d_markers = MarkerArray()
        if not detections_2d:
            self.marker_box_pub.publish(boxes_3d_markers)
            self.feature_3d_pub.publish(features_3d)
            return
        boxes_3d, valid_3d, rot_angles, _, depth_features, frustums = \
            generate_detections_3d(self.depth_model, detections_2d, pc,
                                   self.calib, (3, 480, 3760), omni=True,
                                   peds=True)
        depth_features = convert_depth_features(depth_features, valid_3d)

        for box, feature, i in zip(boxes_3d, depth_features, frame_det_ids):
            #frustum = frustums[i]
            #frustum[:, [0,2]] = np.squeeze(np.matmul(
            #                 np.array([[np.cos(rot_angles[i]), np.sin(rot_angles[i])], 
            #                 [-np.sin(rot_angles[i]), np.cos(rot_angles[i])]]), 
            #                 np.expand_dims(frustum[:, [0,2]], 2)), 2)
            # frustum[:, 3] = np.amax(logits[i], axis = 1)
            #self.publish_pointcloud_from_array(frustum, self.pc_pub, header = pointcloud_upper.header)
            det_msg = detection3d_with_feature()
            det_msg.header.frame_id = 'occam'
            det_msg.header.stamp = features_3d.header.stamp
            det_msg.valid = True if valid_3d[i] != -1 else False
            det_msg.frame_det_id = i
            if det_msg.valid:
                det_msg.x = box[0]
                det_msg.y = box[1]
                det_msg.z = box[2]
                det_msg.l = box[3]
                det_msg.h = box[4]
                det_msg.w = box[5]
                det_msg.theta = box[6]
                det_msg.feature = feature
                features_3d.detection3d_with_features.append(det_msg)
                pose_msg = Pose()
                marker_msg = Marker()
                marker_msg.header.stamp = pointcloud_lower.header.stamp
                marker_msg.header.frame_id = 'occam'
                marker_msg.action = 0
                marker_msg.id = i
                marker_msg.lifetime = rospy.Duration(0.2)
                marker_msg.type = 1
                marker_msg.scale = Vector3(box[3], box[4], box[5])
                pose_msg.position.x = det_msg.x
                pose_msg.position.y = det_msg.y - det_msg.h/2
                pose_msg.position.z = det_msg.z
                marker_msg.pose = pose_msg
                marker_msg.color = ColorRGBA(g=1, a =0.5)
                boxes_3d_markers.markers.append(marker_msg)
            else:
                det_msg.y = -1
                det_msg.x = -1
                det_msg.z = -1
                det_msg.l = -1
                det_msg.w = -1
                det_msg.h = -1
                det_msg.theta = -1
                det_msg.feature = [-1]
            features_3d.detection3d_with_features.append(det_msg)

        
        self.marker_box_pub.publish(boxes_3d_markers)
        
        self.feature_3d_pub.publish(features_3d)
        
        # rospy.loginfo("3D detector time: {}".format(time.time() - start))

    def publish_pointcloud_from_array(self, pointcloud, publisher, frame = 'occam', header = None):
        list_pc = [tuple(j) for j in pointcloud]
        pc_output_msg = np.array(list_pc, dtype = [('x', 'f4'),('y', 'f4'),('z', 'f4'),('intensity', 'f4')])
        pc_msg = ros_numpy.msgify(PointCloud2, pc_output_msg)
        if header is not None:
            pc_msg.header.stamp = header.stamp
        pc_msg.header.frame_id = 'occam'
        publisher.publish(pc_msg)

    def cleanup(self):
        print("Shutting down 3D-Detection node.")
    
def main(args):       
    try:
        Detector_3d()
        rospy.spin()
    except KeyboardInterrupt:
        print("Shutting down 3D-Detection node.")

if __name__ == '__main__':
    main(sys.argv)


================================================
FILE: src/EKF.py
================================================
# vim: expandtab:ts=4:sw=4
import numpy as np
import scipy.linalg
import pdb

"""
Table for the 0.95 quantile of the chi-square distribution with N degrees of
freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
function and used as Mahalanobis gating threshold.
"""
chi2inv95 = {
    1: 3.8415,
    2: 5.9915,
    3: 7.8147,
    4: 9.4877,
    5: 11.070,
    6: 12.592,
    7: 14.067,
    8: 15.507,
    9: 16.919}

chi2inv90 = {
    1: 2.706,
    2: 4.605,
    3: 6.251,
    4: 7.779,
    5: 9.236,
    6: 10.645,
    7: 12.017,
    8: 13.363,
    9: 14.684}

chi2inv975 = {
    1: 5.025,
    2: 7.378,
    3: 9.348,
    4: 11.143,
    5: 12.833,
    6: 14.449,
    7: 16.013,
    8: 17.535,
    9: 19.023}

chi2inv10 = {
    1: .016,
    2: .221,
    3: .584,
    4: 1.064,
    5: 1.610,
    6: 2.204,
    7: 2.833,
    8: 3.490,
    9: 4.168}


chi2inv995 = {
    1: 0.0000393,
    2: 0.0100,
    3: .0717,
    4: .207,
    5: .412,
    6: .676,
    7: .989,
    8: 1.344,
    9: 1.735}


chi2inv75 = {
    1: 1.323,
    2: 2.773,
    3: 4.108,
    4: 5.385,
    5: 6.626,
    6: 7.841,
    7: 9.037,
    8: 10.22,
    9: 11.39}

def squared_mahalanobis_distance(mean, covariance, measurements):
    # cholesky factorization used to solve for 
    # z = d * inv(covariance)
    # so z is also the solution to 
    # covariance * z = d       
    d = measurements - mean
    # cholesky_factor = np.linalg.cholesky(covariance)
    # z = scipy.linalg.solve_triangular(
    #     cholesky_factor, d.T, lower=True, check_finite=False,
    #     overwrite_b=True)

    squared_maha = np.linalg.multi_dot([d, np.linalg.inv(covariance),
                                        d.T]).diagonal()
    return squared_maha


class EKF(object):
    """
    Generic extended kalman filter class

    """

    def __init__(self):
        pass

    def initiate(self, measurement):
        """Create track from unassociated measurement.

        Parameters
        ----------
        measurement : ndarray

        Returns
        -------
        (ndarray, ndarray)
            Returns the mean vector and covariance matrix of the new track. 
            Unobserved velocities are initialized to 0 mean.

        """
        pass


    def predict_mean(self, mean):
        # Updates predicted state from previous state (function g)
        # Calculates motion update Jacobian (Gt)
        # Returns (g(mean), Gt)
        pass

    def get_process_noise(self, mean, covariance):
        # Returns Rt the motion noise covariance
        pass
    def predict_covariance(self, mean, covariance):
        pass

    def project_mean(self, mean):
        # Measurement prediction from state (function h)
        # Calculations sensor update Jacobian (Ht)
        # Returns (h(mean), Ht)
        pass
    def project_cov(self, mean, covariance):
        pass

    def predict(self, mean, covariance):
        """Run Kalman filter prediction step.

        Parameters
        ----------
        mean : ndarray
            The mean vector of the object state at the previous
            time step.
        covariance : ndarray
            The covariance matrix of the object state at the
            previous time step.

        Returns
        -------
        (ndarray, ndarray)
            Returns the mean vector and covariance matrix of the predicted
            state. Unobserved velocities are initialized to 0 mean.

        """
        # Perform prediction
        covariance = self.predict_covariance(mean, covariance) 
        mean = self.predict_mean(mean)

        return mean, covariance
    def get_innovation_cov(self, covariance):
        pass

    def project(self, mean, covariance):
        """Project state distribution to measurement space.

        Parameters
        ----------
        mean : ndarray
            The state's mean vector 
        covariance : ndarray
            The state's covariance matrix

        Returns
        -------
        (ndarray, ndarray)
            Returns the projected mean and covariance matrix of the given state
            estimate.

        """

        # Measurement uncertainty scaled by estimated height
        return self.project_mean(mean), self.project_cov(mean, covariance)

    def update(self, mean, covariance, measurement_t, marginalization=None, JPDA=False):
        """Run Kalman filter correction step.

        Parameters
        ----------
        mean : ndarray
            The predicted state's mean vector (8 dimensional).
        covariance : ndarray
            The state's covariance matrix (8x8 dimensional).
        measurement : ndarray
            The 4 dimensional measurement vector (x, y, a, h), where (x, y)
            is the center position, a the aspect ratio, and h the height of the
            bounding box.

        Returns
        -------
        (ndarray, ndarray)
            Returns the measurement-corrected state distribution.

        """
        predicted_measurement, innovation_cov  = self.project(mean, covariance)
        # cholesky factorization used to solve for kalman gain since
        # K = covariance * update_mat.T * inv(innovation_cov)
        # so K is also the solution to 
        # innovation_cov * K = covariance * update_mat.T
        try:
            chol_factor, lower = scipy.linalg.cho_factor(
                innovation_cov, lower=True, check_finite=False)
            kalman_gain = scipy.linalg.cho_solve(
                (chol_factor, lower), np.dot(covariance, self._observation_mat.T).T,
                check_finite=False).T
        except:
            # in case cholesky factorization fails, revert to standard solver
            kalman_gain = np.linalg.solve(innovation_cov, np.dot(covariance, self._observation_mat.T).T).T

        if JPDA:
            # marginalization
            innovation = np.zeros((self.ndim)) 
            cov_soft = np.zeros((self.ndim, self.ndim))

            for measurement_idx, measurement in enumerate(measurement_t):

                p_ij = marginalization[measurement_idx + 1] # + 1 for dummy
                y_ij = measurement - predicted_measurement
                innovation += y_ij * p_ij
                cov_soft += p_ij * np.outer(y_ij, y_ij)

            cov_soft = cov_soft - np.outer(innovation, innovation)

            P_star = covariance - np.linalg.multi_dot((
                kalman_gain, innovation_cov, kalman_gain.T))

            p_0 = marginalization[0]
            P_0 = p_0 * covariance + (1 - p_0) * P_star

            new_covariance = P_0 + np.linalg.multi_dot((kalman_gain, cov_soft, kalman_gain.T))
            
        else:
            innovation = measurement_t - predicted_measurement

            new_covariance = covariance - np.linalg.multi_dot((
                kalman_gain, innovation_cov, kalman_gain.T))

        new_mean = mean + np.dot(innovation, kalman_gain.T)
        return new_mean, new_covariance


================================================
FILE: src/JPDA_matching.py
================================================
# vim: expandtab:ts=4:sw=4
from __future__ import absolute_import
import numpy as np
from linear_assignment import min_marg_matching
import pdb


def get_unmatched(all_idx, matches, i, marginalization=None):
    assigned = [match[i] for match in matches]
    unmatched = set(all_idx) - set(assigned)
    if marginalization is not None:
        # from 1 for dummy node
        in_gate_dets = np.nonzero(np.sum(
            marginalization[:, 1:], axis=0))[0].tolist()
        unmatched = [d for d in unmatched if d not in in_gate_dets]
    return list(unmatched)


class Matcher:

    def __init__(self, detections, marginalizations, confirmed_tracks,
                 matching_strategy,
                 assignment_threshold=None):
        self.detections = detections
        self.marginalizations = marginalizations
        self.confirmed_tracks = confirmed_tracks
        self.assignment_threshold = assignment_threshold
        self.detection_indices = np.arange(len(detections))
        self.matching_strategy = matching_strategy

    def match(self):
        self.get_matches()
        self.get_unmatched_tracks()
        self.get_unmatched_detections()
        return self.matches, self.unmatched_tracks, self.unmatched_detections

    def get_matches(self):

        if self.matching_strategy == "max_and_threshold":
            self.max_and_threshold_matching()
        elif self.matching_strategy == "hungarian":
            self.hungarian()
        elif self.matching_strategy == "max_match":
            self.max_match()
        elif self.matching_strategy == "none":
            self.matches = []
        else: 
            raise Exception('Unrecognized matching strategy: {}'.
                            format(self.matching_strategy))

    def get_unmatched_tracks(self):
        self.unmatched_tracks = get_unmatched(self.confirmed_tracks,
                                              self.matches, 0)

    def get_unmatched_detections(self):
        self.unmatched_detections = get_unmatched(self.detection_indices, self.matches, 1, self.marginalizations)

    def max_match(self):
        self.matches = []
        if self.marginalizations.shape[0] == 0:
            return

        detection_map = {}
        for i, track_idx in enumerate(self.confirmed_tracks):
            marginalization = self.marginalizations[i,:]
            detection_id = np.argmax(marginalization) - 1  # subtract one for dummy

            if detection_id < 0:
                continue

            if detection_id not in detection_map.keys():
                detection_map[detection_id] = track_idx
            else:
                cur_track = detection_map[detection_id]
                track_update = track_idx if self.marginalizations[track_idx, detection_id] > self.marginalizations[cur_track, detection_id] else cur_track
                detection_map[detection_id] = track_update
            threshold_p = marginalization[detection_id + 1]
            if threshold_p < self.assignment_threshold:
                continue

        for detection in detection_map.keys():
            self.matches.append((detection_map[detection], detection))

    def max_and_threshold_matching(self):

        self.matches = []
        if self.marginalizations.shape[0] == 0:
            return

        for i, track_idx in enumerate(self.confirmed_tracks):
            marginalization = self.marginalizations[i,:]
            detection_id = np.argmax(marginalization) - 1  # subtract one for dummy

            if detection_id < 0:
                continue

            threshold_p = marginalization[detection_id + 1]
            if threshold_p < self.assignment_threshold:
                continue

            self.matches.append((track_idx, detection_id))

    def hungarian(self):
        self.matches, _, _ = min_marg_matching(self.marginalizations,
                                               self.confirmed_tracks,
                                               self.assignment_threshold)
                               

================================================
FILE: src/__init__.py
================================================


================================================
FILE: src/aligned_reid_model.py
================================================
import torch
import torch.nn as nn
import torch.nn.init as init
import torch.nn.functional as F
import torch.utils.model_zoo as model_zoo
import os
import math


class Model(nn.Module):
  def __init__(self, local_conv_out_channels=128, num_classes=None):
    super(Model, self).__init__()
    self.base = resnet50(pretrained=True)
    planes = 2048
    self.local_conv = nn.Conv2d(planes, local_conv_out_channels, 1)
    self.local_bn = nn.BatchNorm2d(local_conv_out_channels)
    self.local_relu = nn.ReLU(inplace=True)

    if num_classes is not None:
      self.fc = nn.Linear(planes, num_classes)
      init.normal(self.fc.weight, std=0.001)
      init.constant(self.fc.bias, 0)

  def forward(self, x):
    """
    Returns:
      global_feat: shape [N, C]
      local_feat: shape [N, H, c]
    """
    # shape [N, C, H, W]
    feat = self.base(x)
    global_feat = F.avg_pool2d(feat, feat.size()[2:])
    # shape [N, C]
    # global_feat = global_feat.view(global_feat.size(0), -1)
    # shape [N, C, H, 1]
    # local_feat = torch.mean(feat, -1, keepdim=True)
    # local_feat = self.local_relu(self.local_bn(self.local_conv(local_feat)))
    # # shape [N, H, c]
    # local_feat = local_feat.squeeze(-1).permute(0, 2, 1)
    return global_feat


__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
           'resnet152']

model_urls = {
  'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
  'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
  'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
  'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
  'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
}
os.environ["TORCH_HOME"] = "./ResNet_Model"

def conv3x3(in_planes, out_planes, stride=1):
  """3x3 convolution with padding"""
  return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                   padding=1, bias=False)


class BasicBlock(nn.Module):
  expansion = 1

  def __init__(self, inplanes, planes, stride=1, downsample=None):
    super(BasicBlock, self).__init__()
    self.conv1 = conv3x3(inplanes, planes, stride)
    self.bn1 = nn.BatchNorm2d(planes)
    self.relu = nn.ReLU(inplace=True)
    self.conv2 = conv3x3(planes, planes)
    self.bn2 = nn.BatchNorm2d(planes)
    self.downsample = downsample
    self.stride = stride

  def forward(self, x):
    residual = x

    out = self.conv1(x)
    out = self.bn1(out)
    out = self.relu(out)

    out = self.conv2(out)
    out = self.bn2(out)

    if self.downsample is not None:
      residual = self.downsample(x)

    out += residual
    out = self.relu(out)

    return out


class Bottleneck(nn.Module):
  expansion = 4

  def __init__(self, inplanes, planes, stride=1, downsample=None):
    super(Bottleneck, self).__init__()
    self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
    self.bn1 = nn.BatchNorm2d(planes)
    self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
                           padding=1, bias=False)
    self.bn2 = nn.BatchNorm2d(planes)
    self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
    self.bn3 = nn.BatchNorm2d(planes * 4)
    self.relu = nn.ReLU(inplace=True)
    self.downsample = downsample
    self.stride = stride

  def forward(self, x):
    residual = x

    out = self.conv1(x)
    out = self.bn1(out)
    out = self.relu(out)

    out = self.conv2(out)
    out = self.bn2(out)
    out = self.relu(out)

    out = self.conv3(out)
    out = self.bn3(out)

    if self.downsample is not None:
      residual = self.downsample(x)

    out += residual
    out = self.relu(out)

    return out


class ResNet(nn.Module):

  def __init__(self, block, layers):
    self.inplanes = 64
    super(ResNet, self).__init__()
    self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
                           bias=False)
    self.bn1 = nn.BatchNorm2d(64)
    self.relu = nn.ReLU(inplace=True)
    self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
    self.layer1 = self._make_layer(block, 64, layers[0])
    self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
    self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
    self.layer4 = self._make_layer(block, 512, layers[3], stride=2)

    for m in self.modules():
      if isinstance(m, nn.Conv2d):
        n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
        m.weight.data.normal_(0, math.sqrt(2. / n))
      elif isinstance(m, nn.BatchNorm2d):
        m.weight.data.fill_(1)
        m.bias.data.zero_()

  def _make_layer(self, block, planes, blocks, stride=1):
    downsample = None
    if stride != 1 or self.inplanes != planes * block.expansion:
      downsample = nn.Sequential(
        nn.Conv2d(self.inplanes, planes * block.expansion,
                  kernel_size=1, stride=stride, bias=False),
        nn.BatchNorm2d(planes * block.expansion),
      )

    layers = []
    layers.append(block(self.inplanes, planes, stride, downsample))
    self.inplanes = planes * block.expansion
    for i in range(1, blocks):
      layers.append(block(self.inplanes, planes))

    return nn.Sequential(*layers)

  def forward(self, x):
    x = self.conv1(x)
    x = self.bn1(x)
    x = self.relu(x)
    x = self.maxpool(x)

    x = self.layer1(x)
    x = self.layer2(x)
    x = self.layer3(x)
    x = self.layer4(x)

    return x


def remove_fc(state_dict):
  """Remove the fc layer parameters from state_dict."""
  new_state_dict = state_dict.copy()
  for key, value in state_dict.items():
    if key.startswith('fc.'):
      del new_state_dict[key]
  return new_state_dict


def resnet18(pretrained=False):
  """Constructs a ResNet-18 model.

  Args:
      pretrained (bool): If True, returns a model pre-trained on ImageNet
  """
  model = ResNet(BasicBlock, [2, 2, 2, 2])
  if pretrained:
    model.load_state_dict(remove_fc(model_zoo.load_url(model_urls['resnet18'])))
  return model


def resnet34(pretrained=False):
  """Constructs a ResNet-34 model.

  Args:
      pretrained (bool): If True, returns a model pre-trained on ImageNet
  """
  model = ResNet(BasicBlock, [3, 4, 6, 3])
  if pretrained:
    model.load_state_dict(remove_fc(model_zoo.load_url(model_urls['resnet34'])))
  return model


def resnet50(pretrained=False):
  """Constructs a ResNet-50 model.

  Args:
      pretrained (bool): If True, returns a model pre-trained on ImageNet
  """
  model = ResNet(Bottleneck, [3, 4, 6, 3])
  if pretrained:
    model.load_state_dict(remove_fc(model_zoo.load_url(model_urls['resnet50'], model_dir="./ResNet_Model")))### ADDED MODEL_DIR
  return model


def resnet101(pretrained=False):
  """Constructs a ResNet-101 model.

  Args:
      pretrained (bool): If True, returns a model pre-trained on ImageNet
  """
  model = ResNet(Bottleneck, [3, 4, 23, 3])
  if pretrained:
    model.load_state_dict(
      remove_fc(model_zoo.load_url(model_urls['resnet101'])))
  return model


def resnet152(pretrained=False):
  """Constructs a ResNet-152 model.

  Args:
      pretrained (bool): If True, returns a model pre-trained on ImageNet
  """
  model = ResNet(Bottleneck, [3, 8, 36, 3])
  if pretrained:
    model.load_state_dict(
      remove_fc(model_zoo.load_url(model_urls['resnet152'])))
  return model


================================================
FILE: src/aligned_reid_utils.py
================================================
from __future__ import print_function
import os
import os.path as osp
import pickle
from scipy import io
import datetime
import time
from contextlib import contextmanager
import numpy as np
from PIL import Image
import torch
from torch.autograd import Variable
from aligned_reid_model import Model as aligned_reid_model
# from models.deep_sort_model import ImageEncoder as deep_sort_model

def time_str(fmt=None):
  if fmt is None:
    fmt = '%Y-%m-%d_%H:%M:%S'
  return datetime.datetime.today().strftime(fmt)


def load_pickle(path):
  """Check and load pickle object.
  According to this post: https://stackoverflow.com/a/41733927, cPickle and 
  disabling garbage collector helps with loading speed."""
  assert osp.exists(path)
  # gc.disable()
  with open(path, 'rb') as f:
    ret = pickle.load(f)
  # gc.enable()
  return ret


def save_pickle(obj, path):
  """Create dir and save file."""
  may_make_dir(osp.dirname(osp.abspath(path)))
  with open(path, 'wb') as f:
    pickle.dump(obj, f, protocol=2)


def save_mat(ndarray, path):
  """Save a numpy ndarray as .mat file."""
  io.savemat(path, dict(ndarray=ndarray))


def to_scalar(vt):
  """Transform a length-1 pytorch Variable or Tensor to scalar. 
  Suppose tx is a torch Tensor with shape tx.size() = torch.Size([1]), 
  then npx = tx.cpu().numpy() has shape (1,), not 1."""
  if isinstance(vt, Variable):
    return vt.data.cpu().numpy().flatten()[0]
  if torch.is_tensor(vt):
    return vt.cpu().numpy().flatten()[0]
  raise TypeError('Input should be a variable or tensor')


def transfer_optim_state(state, device_id=-1):
  """Transfer an optimizer.state to cpu or specified gpu, which means 
  transferring tensors of the optimizer.state to specified device. 
  The modification is in place for the state.
  Args:
    state: An torch.optim.Optimizer.state
    device_id: gpu id, or -1 which means transferring to cpu
  """
  for key, val in state.items():
    if isinstance(val, dict):
      transfer_optim_state(val, device_id=device_id)
    elif isinstance(val, Variable):
      raise RuntimeError("Oops, state[{}] is a Variable!".format(key))
    elif isinstance(val, torch.nn.Parameter):
      raise RuntimeError("Oops, state[{}] is a Parameter!".format(key))
    else:
      try:
        if device_id == -1:
          state[key] = val.cpu()
        else:
          state[key] = val.cuda(device=device_id)
      except:
        pass


def may_transfer_optims(optims, device_id=-1):
  """Transfer optimizers to cpu or specified gpu, which means transferring 
  tensors of the optimizer to specified device. The modification is in place 
  for the optimizers.
  Args:
    optims: A list, which members are either torch.nn.optimizer or None.
    device_id: gpu id, or -1 which means transferring to cpu
  """
  for optim in optims:
    if isinstance(optim, torch.optim.Optimizer):
      transfer_optim_state(optim.state, device_id=device_id)


def may_transfer_modules_optims(modules_and_or_optims, device_id=-1):
  """Transfer optimizers/modules to cpu or specified gpu.
  Args:
    modules_and_or_optims: A list, which members are either torch.nn.optimizer 
      or torch.nn.Module or None.
    device_id: gpu id, or -1 which means transferring to cpu
  """
  for item in modules_and_or_optims:
    if isinstance(item, torch.optim.Optimizer):
      transfer_optim_state(item.state, device_id=device_id)
    elif isinstance(item, torch.nn.Module):
      if device_id == -1:
        item.cpu()
      else:
        item.cuda(device=device_id)
    elif item is not None:
      print('[Warning] Invalid type {}'.format(item.__class__.__name__))


class TransferVarTensor(object):
  """Return a copy of the input Variable or Tensor on specified device."""

  def __init__(self, device_id=-1):
    self.device_id = device_id

  def __call__(self, var_or_tensor):
    return var_or_tensor.cpu() if self.device_id == -1 \
      else var_or_tensor.cuda(self.device_id)


class TransferModulesOptims(object):
  """Transfer optimizers/modules to cpu or specified gpu."""

  def __init__(self, device_id=-1):
    self.device_id = device_id

  def __call__(self, modules_and_or_optims):
    may_transfer_modules_optims(modules_and_or_optims, self.device_id)


def set_devices(sys_device_ids):
  """
  It sets some GPUs to be visible and returns some wrappers to transferring 
  Variables/Tensors and Modules/Optimizers.
  Args:
    sys_device_ids: a tuple; which GPUs to use
      e.g.  sys_device_ids = (), only use cpu
            sys_device_ids = (3,), use the 4th gpu
            sys_device_ids = (0, 1, 2, 3,), use first 4 gpus
            sys_device_ids = (0, 2, 4,), use the 1st, 3rd and 5th gpus
  Returns:
    TVT: a `TransferVarTensor` callable
    TMO: a `TransferModulesOptims` callable
  """
  # Set the CUDA_VISIBLE_DEVICES environment variable
  import os
  visible_devices = ''
  for i in sys_device_ids:
    visible_devices += '{}, '.format(i)
  os.environ['CUDA_VISIBLE_DEVICES'] = visible_devices
  # Return wrappers.
  # Models and user defined Variables/Tensors would be transferred to the
  # first device.
  device_id = 0 if len(sys_device_ids) > 0 else -1
  TVT = TransferVarTensor(device_id)
  TMO = TransferModulesOptims(device_id)
  return TVT, TMO


def set_devices_for_ml(sys_device_ids):
  """This version is for mutual learning.
  
  It sets some GPUs to be visible and returns some wrappers to transferring 
  Variables/Tensors and Modules/Optimizers.
  
  Args:
    sys_device_ids: a tuple of tuples; which devices to use for each model, 
      len(sys_device_ids) should be equal to number of models. Examples:
        
      sys_device_ids = ((-1,), (-1,))
        the two models both on CPU
      sys_device_ids = ((-1,), (2,))
        the 1st model on CPU, the 2nd model on GPU 2
      sys_device_ids = ((3,),)
        the only one model on the 4th gpu 
      sys_device_ids = ((0, 1), (2, 3))
        the 1st model on GPU 0 and 1, the 2nd model on GPU 2 and 3
      sys_device_ids = ((0,), (0,))
        the two models both on GPU 0
      sys_device_ids = ((0,), (0,), (1,), (1,))
        the 1st and 2nd model on GPU 0, the 3rd and 4th model on GPU 1
  
  Returns:
    TVTs: a list of `TransferVarTensor` callables, one for one model.
    TMOs: a list of `TransferModulesOptims` callables, one for one model.
    relative_device_ids: a list of lists; `sys_device_ids` transformed to 
      relative ids; to be used in `DataParallel`
  """
  import os

  all_ids = []
  for ids in sys_device_ids:
    all_ids += ids
  unique_sys_device_ids = list(set(all_ids))
  unique_sys_device_ids.sort()
  if -1 in unique_sys_device_ids:
    unique_sys_device_ids.remove(-1)

  # Set the CUDA_VISIBLE_DEVICES environment variable

  visible_devices = ''
  for i in unique_sys_device_ids:
    visible_devices += '{}, '.format(i)
  os.environ['CUDA_VISIBLE_DEVICES'] = visible_devices

  # Return wrappers

  relative_device_ids = []
  TVTs, TMOs = [], []
  for ids in sys_device_ids:
    relative_ids = []
    for id in ids:
      if id != -1:
        id = find_index(unique_sys_device_ids, id)
      relative_ids.append(id)
    relative_device_ids.append(relative_ids)

    # Models and user defined Variables/Tensors would be transferred to the
    # first device.
    TVTs.append(TransferVarTensor(relative_ids[0]))
    TMOs.append(TransferModulesOptims(relative_ids[0]))
  return TVTs, TMOs, relative_device_ids


def load_ckpt(modules_optims, ckpt_file, load_to_cpu=True, verbose=True):
  """Load state_dict's of modules/optimizers from file.
  Args:
    modules_optims: A list, which members are either torch.nn.optimizer 
      or torch.nn.Module.
    ckpt_file: The file path.
    load_to_cpu: Boolean. Whether to transform tensors in modules/optimizers 
      to cpu type.
  """
  map_location = (lambda storage, loc: storage) if load_to_cpu else None
  ckpt = torch.load(ckpt_file, map_location=map_location)
  for m, sd in zip(modules_optims, ckpt['state_dicts']):
    if 'fc.weight' in sd:
      del sd['fc.weight']
    if 'fc.bias' in sd:  
      del sd['fc.bias']
    load_state_dict(m, sd)
  if verbose:
    print('Resume from ckpt {}, \nepoch {}, \nscores {}'.format(
      ckpt_file, ckpt['ep'], ckpt['scores']))
  return ckpt['ep'], ckpt['scores']


def save_ckpt(modules_optims, ep, scores, ckpt_file):
  """Save state_dict's of modules/optimizers to file. 
  Args:
    modules_optims: A list, which members are either torch.nn.optimizer 
      or torch.nn.Module.
    ep: the current epoch number
    scores: the performance of current model
    ckpt_file: The file path.
  Note:
    torch.save() reserves device type and id of tensors to save, so when 
    loading ckpt, you have to inform torch.load() to load these tensors to 
    cpu or your desired gpu, if you change devices.
  """
  state_dicts = [m.state_dict() for m in modules_optims]
  ckpt = dict(state_dicts=state_dicts,
              ep=ep,
              scores=scores)
  may_make_dir(osp.dirname(osp.abspath(ckpt_file)))
  torch.save(ckpt, ckpt_file)


def load_state_dict(model, src_state_dict):
  """Copy parameters and buffers from `src_state_dict` into `model` and its 
  descendants. The `src_state_dict.keys()` NEED NOT exactly match 
  `model.state_dict().keys()`. For dict key mismatch, just
  skip it; for copying error, just output warnings and proceed.

  Arguments:
    model: A torch.nn.Module object. 
    src_state_dict (dict): A dict containing parameters and persistent buffers.
  Note:
    This is modified from torch.nn.modules.module.load_state_dict(), to make
    the warnings and errors more detailed.
  """
  from torch.nn import Parameter
  dest_state_dict = model.state_dict()
  for name, param in src_state_dict.items():
    ### CHANGED HERE FOR FINE TUNING
    if name not in dest_state_dict:
      continue
    if isinstance(param, Parameter):
      # backwards compatibility for serialized parameters
      param = param.data
    try:
      dest_state_dict[name].copy_(param)
    except Exception as e:
      print("Warning: Error occurs when copying '{}': {}"
            .format(name, str(e)))

  # src_missing = set(dest_state_dict.keys()) - set(src_state_dict.keys())
  # if len(src_missing) > 0:
  #   print("Keys not found in source state_dict: ")
  #   for n in src_missing:
  #     print('\t', n)

  # dest_missing = set(src_state_dict.keys()) - set(dest_state_dict.keys())
  # if len(dest_missing) > 0:
  #   print("Keys not found in destination state_dict: ")
  #   for n in dest_missing:
  #     print('\t', n)


def is_iterable(obj):
  return hasattr(obj, '__len__')


def may_set_mode(maybe_modules, mode):
  """maybe_modules: an object or a list of objects."""
  assert mode in ['train', 'eval']
  if not is_iterable(maybe_modules):
    maybe_modules = [maybe_modules]
  for m in maybe_modules:
    if isinstance(m, torch.nn.Module):
      if mode == 'train':
        m.train()
      else:
        m.eval()


def may_make_dir(path):
  """
  Args:
    path: a dir, or result of `osp.dirname(osp.abspath(file_path))`
  Note:
    `osp.exists('')` returns `False`, while `osp.exists('.')` returns `True`!
  """
  # This clause has mistakes:
  # if path is None or '':

  if path in [None, '']:
    return
  if not osp.exists(path):
    os.makedirs(path)


class AverageMeter(object):
  """Modified from Tong Xiao's open-reid. 
  Computes and stores the average and current value"""

  def __init__(self):
    self.val = 0
    self.avg = 0
    self.sum = 0
    self.count = 0

  def reset(self):
    self.val = 0
    self.avg = 0
    self.sum = 0
    self.count = 0

  def update(self, val, n=1):
    self.val = val
    self.sum += val * n
    self.count += n
    self.avg = float(self.sum) / (self.count + 1e-20)


class RunningAverageMeter(object):
  """Computes and stores the running average and current value"""

  def __init__(self, hist=0.99):
    self.val = None
    self.avg = None
    self.hist = hist

  def reset(self):
    self.val = None
    self.avg = None

  def update(self, val):
    if self.avg is None:
      self.avg = val
    else:
      self.avg = self.avg * self.hist + val * (1 - self.hist)
    self.val = val


class RecentAverageMeter(object):
  """Stores and computes the average of recent values."""

  def __init__(self, hist_size=100):
    self.hist_size = hist_size
    self.fifo = []
    self.val = 0

  def reset(self):
    self.fifo = []
    self.val = 0

  def update(self, val):
    self.val = val
    self.fifo.append(val)
    if len(self.fifo) > self.hist_size:
      del self.fifo[0]

  @property
  def avg(self):
    assert len(self.fifo) > 0
    return float(sum(self.fifo)) / len(self.fifo)


def get_model_wrapper(model, multi_gpu):
  from torch.nn.parallel import DataParallel
  if multi_gpu:
    return DataParallel(model)
  else:
    return model


class ReDirectSTD(object):
  """Modified from Tong Xiao's `Logger` in open-reid.
  This class overwrites sys.stdout or sys.stderr, so that console logs can
  also be written to file.
  Args:
    fpath: file path
    console: one of ['stdout', 'stderr']
    immediately_visible: If `False`, the file is opened only once and closed
      after exiting. In this case, the message written to file may not be
      immediately visible (Because the file handle is occupied by the
      program?). If `True`, each writing operation of the console will
      open, write to, and close the file. If your program has tons of writing
      operations, the cost of opening and closing file may be obvious. (?)
  Usage example:
    `ReDirectSTD('stdout.txt', 'stdout', False)`
    `ReDirectSTD('stderr.txt', 'stderr', False)`
  NOTE: File will be deleted if already existing. Log dir and file is created
    lazily -- if no message is written, the dir and file will not be created.
  """

  def __init__(self, fpath=None, console='stdout', immediately_visible=False):
    import sys
    import os
    import os.path as osp

    assert console in ['stdout', 'stderr']
    self.console = sys.stdout if console == 'stdout' else sys.stderr
    self.file = fpath
    self.f = None
    self.immediately_visible = immediately_visible
    if fpath is not None:
      # Remove existing log file.
      if osp.exists(fpath):
        os.remove(fpath)

    # Overwrite
    if console == 'stdout':
      sys.stdout = self
    else:
      sys.stderr = self

  def __del__(self):
    self.close()

  def __enter__(self):
    pass

  def __exit__(self, *args):
    self.close()

  def write(self, msg):
    self.console.write(msg)
    if self.file is not None:
      may_make_dir(os.path.dirname(osp.abspath(self.file)))
      if self.immediately_visible:
        with open(self.file, 'a') as f:
          f.write(msg)
      else:
        if self.f is None:
          self.f = open(self.file, 'w')
        self.f.write(msg)

  def flush(self):
    self.console.flush()
    if self.f is not None:
      self.f.flush()
      import os
      os.fsync(self.f.fileno())

  def close(self):
    self.console.close()
    if self.f is not None:
      self.f.close()


def set_seed(seed):
  import random
  random.seed(seed)
  print('setting random-seed to {}'.format(seed))

  import numpy as np
  np.random.seed(seed)
  print('setting np-random-seed to {}'.format(seed))

  import torch
  torch.backends.cudnn.enabled = False
  print('cudnn.enabled set to {}'.format(torch.backends.cudnn.enabled))
  # set seed for CPU
  torch.manual_seed(seed)
  print('setting torch-seed to {}'.format(seed))


def print_array(array, fmt='{:.2f}', end=' '):
  """Print a 1-D tuple, list, or numpy array containing digits."""
  s = ''
  for x in array:
    s += fmt.format(float(x)) + end
  s += '\n'
  print(s)
  return s


# Great idea from https://github.com/amdegroot/ssd.pytorch
def str2bool(v):
  return v.lower() in ("yes", "true", "t", "1")


def tight_float_str(x, fmt='{:.4f}'):
  return fmt.format(x).rstrip('0').rstrip('.')


def find_index(seq, item):
  for i, x in enumerate(seq):
    if item == x:
      return i
  return -1


def adjust_lr_exp(optimizer, base_lr, ep, total_ep, start_decay_at_ep):
  """Decay exponentially in the later phase of training. All parameters in the 
  optimizer share the same learning rate.
  
  Args:
    optimizer: a pytorch `Optimizer` object
    base_lr: starting learning rate
    ep: current epoch, ep >= 1
    total_ep: total number of epochs to train
    start_decay_at_ep: start decaying at the BEGINNING of this epoch
  
  Example:
    base_lr = 2e-4
    total_ep = 300
    start_decay_at_ep = 201
    It means the learning rate starts at 2e-4 and begins decaying after 200 
    epochs. And training stops after 300 epochs.
  
  NOTE: 
    It is meant to be called at the BEGINNING of an epoch.
  """
  assert ep >= 1, "Current epoch number should be >= 1"

  if ep < start_decay_at_ep:
    return

  for g in optimizer.param_groups:
    g['lr'] = (base_lr * (0.001 ** (float(ep + 1 - start_decay_at_ep)
                                    / (total_ep + 1 - start_decay_at_ep))))
  print('=====> lr adjusted to {:.10f}'.format(g['lr']).rstrip('0'))


def adjust_lr_staircase(optimizer, base_lr, ep, decay_at_epochs, factor):
  """Multiplied by a factor at the BEGINNING of specified epochs. All 
  parameters in the optimizer share the same learning rate.
  
  Args:
    optimizer: a pytorch `Optimizer` object
    base_lr: starting learning rate
    ep: current epoch, ep >= 1
    decay_at_epochs: a list or tuple; learning rate is multiplied by a factor 
      at the BEGINNING of these epochs
    factor: a number in range (0, 1)
  
  Example:
    base_lr = 1e-3
    decay_at_epochs = [51, 101]
    factor = 0.1
    It means the learning rate starts at 1e-3 and is multiplied by 0.1 at the 
    BEGINNING of the 51'st epoch, and then further multiplied by 0.1 at the 
    BEGINNING of the 101'st epoch, then stays unchanged till the end of 
    training.
  
  NOTE: 
    It is meant to be called at the BEGINNING of an epoch.
  """
  assert ep >= 1, "Current epoch number should be >= 1"

  if ep not in decay_at_epochs:
    return

  ind = find_index(decay_at_epochs, ep)
  for g in optimizer.param_groups:
    g['lr'] = base_lr * factor ** (ind + 1)
  print('=====> lr adjusted to {:.10f}'.format(g['lr']).rstrip('0'))


@contextmanager
def measure_time(enter_msg):
  st = time.time()
  print(enter_msg)
  yield
  print('Done, {:.2f}s'.format(time.time() - st))

# @profile
def generate_features(appearance_model, patches):
    features = []
    for patch in patches:
        patch = patch.unsqueeze(0)
        with torch.no_grad():
            feature = appearance_model(patch)
            feature = feature.squeeze(0).cpu().numpy()
        features.append(feature)
    return features

# @profile
def generate_features_batched(appearance_model, patches, object_ids = None):
    
    # return generate_features(appearance_model, patches)   #TODO: Fix batched appearance features. This currently gives bad features
  maxx = -1
  maxy = -1
  idxs = []
  features = []
  for i, patch in enumerate(patches):
      if patch is None or patch.nelement()==0:
          continue
      maxx = max(maxx, patch.size()[1])
      maxy = max(maxy, patch.size()[2])
      idxs.append(i)

  if(maxx==-1 and maxy==-1):
      return features
  batch = torch.zeros(len(idxs),3,maxx,maxy).to('cuda:1')
  padding = []
  for i, idx in enumerate(idxs):
      patch = patches[idx]
      patchx = patch.size()[1]
      patchy = patch.size()[2]
      batch[i,:,:patchx,:patchy] = patch
      padding.append((patchx, patchy))

  with torch.no_grad():
      features_torch = appearance_model(batch)
      # out_features = features_torch.mean()
      # for feat, pad in zip(features_torch, padding):
      #   out_features.append(feat[:, :pad[0], :pad[1]].mean())

      i = 0
      ctr = 0
      for idx in idxs:
          while(i < idx):
              features.append(None)
              i+=1
          features.append(features_torch[ctr])
          i+=1
          ctr+=1
      while(i<len(patches)):
          features.append(None)
          i+=1

  return features
    # else:
    #     print("Critical Error! Attempted to batch appearance features but no model was selected")

def get_image_patches(input_img, detections):
    #Generates patches and also converts detections
    patches = []
    for detection in detections:
        x1, y1, x2, y2, _, _, _ = detection
        box_h = y2-y1
        box_w = x2-x1
        x1=x1
        y1=y1

        patch = input_img[:, y1:y1+box_h, x1:x1+box_w]
        patches.append(patch)

    return patches

def create_appearance_model(alignreid_checkpoint, resnet_reid_checkpoint=None, cuda=True):
  appearance_model = aligned_reid_model()
  map_location = (lambda storage, loc: storage)
  sd = torch.load(alignreid_checkpoint, map_location=map_location)
  load_state_dict(appearance_model, sd['state_dicts'][0])
  if cuda:
    appearance_model.to('cuda:1')
  appearance_model.eval()
  return appearance_model


================================================
FILE: src/calibration.py
================================================
import numpy as np
import cv2
import os
import yaml
import torch
import pdb

class Calibration(object):
    ''' Calibration matrices and utils
        3d XYZ in <label>.txt are in rect camera coord.
        2d box xy are in image2 coord
        Points in <lidar>.bin are in Velodyne coord.
        y_image2 = P^2_rect * x_rect
        y_image2 = P^2_rect * R0_rect * Tr_velo_to_cam * x_velo
        x_ref = Tr_velo_to_cam * x_velo
        x_rect = R0_rect * x_ref
        P^2_rect = [f^2_u,  0,      c^2_u,  -f^2_u b^2_x;
                    0,      f^2_v,  c^2_v,  -f^2_v b^2_y;
                    0,      0,      1,      0]
                 = K * [1|t]
        image2 coord:
         ----> x-axis (u)
        |
        |
        v y-axis (v)
        velodyne coord:
        front x, left y, up z
        rect/ref camera coord:
        right x, down y, front z
        Ref (KITTI paper): http://www.cvlibs.net/publications/Geiger2013IJRR.pdf
        TODO(rqi): do matrix multiplication only once for each projection.
    '''
    def __init__(self, calib_filepath):

        calibs = self.read_calib_file(calib_filepath)
        # Projection matrix from rect camera coord to image2 coord
        self.P = calibs['P2'] 
        self.P = np.reshape(self.P, [3,4])
        self.P_torch = torch.from_numpy(self.P).float().cuda()

        # Rigid transform from Velodyne coord to reference camera coord
        try:
            self.V2C = calibs['Tr_velo_to_cam']
        except:
            self.V2C = calibs['Tr_velo_cam']

        self.V2C = np.reshape(self.V2C, [3,4])
        self.C2V = inverse_rigid_trans(self.V2C)
        # Rotation from reference camera coord to rect camera coord
        try:
            self.R0 = calibs['R0_rect']
        except:
            self.R0 = calibs['R_rect']
        self.R0 = np.reshape(self.R0,[3,3])
        self.R0_torch = torch.from_numpy(self.R0).float().cuda()

        RA = np.zeros((4,4))
        RA[:3,:3] = self.R0
        RA[3,3] = 1
        self.D = np.matmul(self.P,RA).T
        self.D_torch = torch.from_numpy(self.D).float().cuda()

        # Camera intrinsics and extrinsics
        self.c_u = self.P[0,2]
        self.c_v = self.P[1,2]
        self.f_u = self.P[0,0]
        self.f_v = self.P[1,1]
        self.b_x = self.P[0,3]/(-self.f_u) # relative 
        self.b_y = self.P[1,3]/(-self.f_v)

    def read_calib_file(self, filepath):
        ''' Read in a calibration file and parse into a dictionary.
        Ref: https://github.com/utiasSTARS/pykitti/blob/master/pykitti/utils.py
        '''
        data = {}
        with open(filepath, 'r') as f:
            for line in f.readlines():
                line = line.rstrip()
                if len(line)==0: continue
                key, value = line.split(' ', 1)
                if key.endswith(':'):
                    key = key[:-1]
                # The only non-float values in these files are dates, which
                # we don't care about anyway
                try:
                    data[key] = np.array([float(x) for x in value.split()])
                except ValueError:
                    pass

        return data
    
    def read_calib_from_video(self, calib_root_dir):
        ''' Read calibration for camera 2 from video calib files.
            there are calib_cam_to_cam and calib_velo_to_cam under the calib_root_dir
        '''
        data = {}
        cam2cam = self.read_calib_file(os.path.join(calib_root_dir, 'calib_cam_to_cam.txt'))
        velo2cam = self.read_calib_file(os.path.join(calib_root_dir, 'calib_velo_to_cam.txt'))
        Tr_velo_to_cam = np.zeros((3,4))
        Tr_velo_to_cam[0:3,0:3] = np.reshape(velo2cam['R'], [3,3])
        Tr_velo_to_cam[:,3] = velo2cam['T']
        data['Tr_velo_to_cam'] = np.reshape(Tr_velo_to_cam, [12])
        data['R0_rect'] = cam2cam['R_rect_00']
        data['P2'] = cam2cam['P_rect_02']
        return data

    def cart2hom(self, pts_3d):
        ''' Input: nx3 points in Cartesian
            Oupput: nx4 points in Homogeneous by appending 1
        '''
        n = pts_3d.shape[0]
        pts_3d_hom = np.hstack((pts_3d, np.ones((n,1))))
        return pts_3d_hom
 
    def cart2hom_torch(self, pts_3d):
        n = pts_3d.size()[0]
        pts_3d_hom = torch.cat((pts_3d, torch.ones(n,1).to("cuda:0")), 1)
        return pts_3d_hom

    # =========================== 
    # ------- 3d to 3d ---------- 
    # =========================== 
    def project_velo_to_ref(self, pts_3d_velo):
        pts_3d_velo = self.cart2hom(pts_3d_velo) # nx4
        return np.dot(pts_3d_velo, np.transpose(self.V2C))

    def project_ref_to_velo(self, pts_3d_ref):
        pts_3d_ref = self.cart2hom(pts_3d_ref) # nx4
        return np.dot(pts_3d_ref, np.transpose(self.C2V))

    def project_rect_to_ref(self, pts_3d_rect):
        ''' Input and Output are nx3 points '''
        return np.transpose(np.dot(np.linalg.inv(self.R0), np.transpose(pts_3d_rect)))
    
    def project_ref_to_rect(self, pts_3d_ref):
        ''' Input and Output are nx3 points '''
        return np.transpose(np.dot(self.R0, np.transpose(pts_3d_ref)))

    def project_ref_to_rect_torch(self, pts_3d_ref):
        ''' Input and Output are nx3 points '''
        return torch.transpose(torch.matmul(self.R0_torch, torch.transpose(pts_3d_ref,0,1)),0,1)
 
    def project_rect_to_velo(self, pts_3d_rect):
        ''' Input: nx3 points in rect camera coord.
            Output: nx3 points in velodyne coord.
        ''' 
        pts_3d_ref = self.project_rect_to_ref(pts_3d_rect)
        return self.project_ref_to_velo(pts_3d_ref)

    def project_velo_to_rect(self, pts_3d_velo):
        pts_3d_ref = self.project_velo_to_ref(pts_3d_velo)
        return self.project_ref_to_rect(pts_3d_ref)

    # =========================== 
    # ------- 3d to 2d ---------- 
    # =========================== 
    def project_rect_to_image(self, pts_3d_rect):
        ''' Input: nx3 points in rect camera coord.
            Output: nx2 points in image2 coord.
        '''
        pts_3d_rect = self.cart2hom(pts_3d_rect)
        pts_2d = np.dot(pts_3d_rect, np.transpose(self.P)) # nx3
        pts_2d[:,0] /= pts_2d[:,2]
        pts_2d[:,1] /= pts_2d[:,2]
        return pts_2d[:,0:2]

    def project_rect_to_image_torch(self, pts_3d_rect):
        ''' Input: nx3 points in rect camera coord.
            Output: nx2 points in image2 coord.
        '''
        pts_3d_rect = self.cart2hom_torch(pts_3d_rect)
        pts_2d = torch.matmul(pts_3d_rect, torch.transpose(self.P_torch,0,1)) # nx3
        pts_2d[:,0] /= pts_2d[:,2]
        pts_2d[:,1] /= pts_2d[:,2]
        return pts_2d[:,0:2]

    def project_ref_to_image_torch(self, pts_3d_ref):
        ''' Input: nx3 points in ref camera coord.
            Output: nx2 points in image2 coord.
        '''
        pts_3d_ref = self.cart2hom_torch(pts_3d_ref)
        pts_2d = torch.matmul(pts_3d_ref, self.D_torch) # nx3
        pts_2d[:,0] /= pts_2d[:,2]
        pts_2d[:,1] /= pts_2d[:,2]
        return pts_2d[:,0:2]

    def project_velo_to_image(self, pts_3d_velo):
        ''' Input: nx3 points in velodyne coord.
            Output: nx2 points in image2 coord.
        '''
        pts_3d_rect = self.project_velo_to_rect(pts_3d_velo)
        return self.project_rect_to_image(pts_3d_rect)

    # =========================== 
    # ------- 2d to 3d ---------- 
    # =========================== 
    def project_image_to_rect(self, uv_depth):
        ''' Input: nx3 first two channels are uv, 3rd channel
                   is depth in rect camera coord.
            Output: nx3 points in rect camera coord.
        '''
        n = uv_depth.shape[0]
        x = ((uv_depth[:,0]-self.c_u)*uv_depth[:,2])/self.f_u + self.b_x
        y = ((uv_depth[:,1]-self.c_v)*uv_depth[:,2])/self.f_v + self.b_y
        pts_3d_rect = np.zeros((n,3))
        pts_3d_rect[:,0] = x
        pts_3d_rect[:,1] = y
        pts_3d_rect[:,2] = uv_depth[:,2]
        return pts_3d_rect

    def project_image_to_velo(self, uv_depth):
        pts_3d_rect = self.project_image_to_rect(uv_depth)
        return self.project_rect_to_velo(pts_3d_rect)

def rotx(t):
    ''' 3D Rotation about the x-axis. '''
    c = np.cos(t)
    s = np.sin(t)
    return np.array([[1,  0,  0],
                     [0,  c, -s],
                     [0,  s,  c]])


def roty(t):
    ''' Rotation about the y-axis. '''
    c = np.cos(t)
    s = np.sin(t)
    return np.array([[c,  0,  s],
                     [0,  1,  0],
                     [-s, 0,  c]])


def rotz(t):
    ''' Rotation about the z-axis. '''
    c = np.cos(t)
    s = np.sin(t)
    return np.array([[c, -s,  0],
                     [s,  c,  0],
                     [0,  0,  1]])


def transform_from_rot_trans(R, t):
    ''' Transforation matrix from rotation matrix and translation vector. '''
    R = R.reshape(3, 3)
    t = t.reshape(3, 1)
    return np.vstack((np.hstack([R, t]), [0, 0, 0, 1]))


def inverse_rigid_trans(Tr):
    ''' Inverse a rigid body transform matrix (3x4 as [R|t])
        [R'|-R't; 0|1]
    '''
    inv_Tr = np.zeros_like(Tr) # 3x4
    inv_Tr[0:3,0:3] = np.transpose(Tr[0:3,0:3])
    inv_Tr[0:3,3] = np.dot(-np.transpose(Tr[0:3,0:3]), Tr[0:3,3])
    return inv_Tr

def read_label(label_filename):
    lines = [line.rstrip() for line in open(label_filename)]
    objects = [Object3d(line) for line in lines]
    return objects

def load_image(img_filename):
    return cv2.imread(img_filename)

def load_velo_scan(velo_filename):
    scan = np.fromfile(velo_filename, dtype=np.float32)
    scan = scan.reshape((-1, 4))
    return scan

def project_to_image(pts_3d, P):
    ''' Project 3d points to image plane.
    Usage: pts_2d = projectToImage(pts_3d, P)
      input: pts_3d: nx3 matrix
             P:      3x4 projection matrix
      output: pts_2d: nx2 matrix
      P(3x4) dot pts_3d_extended(4xn) = projected_pts_2d(3xn)
      => normalize projected_pts_2d(2xn)
      <=> pts_3d_extended(nx4) dot P'(4x3) = projected_pts_2d(nx3)
          => normalize projected_pts_2d(nx2)
    '''
    n = pts_3d.shape[0]
    pts_3d_extend = np.hstack((pts_3d, np.ones((n,1))))
    print(('pts_3d_extend shape: ', pts_3d_extend.shape))
    pts_2d = np.dot(pts_3d_extend, np.transpose(P)) # nx3
    pts_2d[:,0] /= pts_2d[:,2]
    pts_2d[:,1] /= pts_2d[:,2]
    return pts_2d[:,0:2]


def compute_box_3d(obj, P):
    ''' Takes an object and a projection matrix (P) and projects the 3d
        bounding box into the image plane.
        Returns:
            corners_2d: (8,2) array in left image coord.
            corners_3d: (8,3) array in in rect camera coord.
    '''
    # compute rotational matrix around yaw axis
    R = roty(obj.ry)    

    # 3d bounding box dimensions
    l = obj.l;
    w = obj.w;
    h = obj.h;
    
    # 3d bounding box corners
    x_corners = [l/2,l/2,-l/2,-l/2,l/2,l/2,-l/2,-l/2];
    y_corners = [0,0,0,0,-h,-h,-h,-h];
    z_corners = [w/2,-w/2,-w/2,w/2,w/2,-w/2,-w/2,w/2];
    
    # rotate and translate 3d bounding box
    corners_3d = np.dot(R, np.vstack([x_corners,y_corners,z_corners]))
    #print corners_3d.shape
    corners_3d[0,:] = corners_3d[0,:] + obj.t[0];
    corners_3d[1,:] = corners_3d[1,:] + obj.t[1];
    corners_3d[2,:] = corners_3d[2,:] + obj.t[2];
    #print 'cornsers_3d: ', corners_3d 
    # only draw 3d bounding box for objs in front of the camera
    if np.any(corners_3d[2,:]<0.1):
        corners_2d = None
        return corners_2d, np.transpose(corners_3d)
    
    # project the 3d bounding box into the image plane
    corners_2d = project_to_image(np.transpose(corners_3d), P);
    #print 'corners_2d: ', corners_2d
    return corners_2d, np.transpose(corners_3d)


def compute_orientation_3d(obj, P):
    ''' Takes an object and a projection matrix (P) and projects the 3d
        object orientation vector into the image plane.
        Returns:
            orientation_2d: (2,2) array in left image coord.
            orientation_3d: (2,3) array in in rect camera coord.
    '''
    
    # compute rotational matrix around yaw axis
    R = roty(obj.ry)
   
    # orientation in object coordinate system
    orientation_3d = np.array([[0.0, obj.l],[0,0],[0,0]])
    
    # rotate and translate in camera coordinate system, project in image
    orientation_3d = np.dot(R, orientation_3d)
    orientation_3d[0,:] = orientation_3d[0,:] + obj.t[0]
    orientation_3d[1,:] = orientation_3d[1,:] + obj.t[1]
    orientation_3d[2,:] = orientation_3d[2,:] + obj.t[2]
    
    # vector behind image plane?
    if np.any(orientation_3d[2,:]<0.1):
      orientation_2d = None
      return orientation_2d, np.transpose(orientation_3d)
    
    # project orientation into the image plane
    orientation_2d = project_to_image(np.transpose(orientation_3d), P);
    return orientation_2d, np.transpose(orientation_3d)

def draw_projected_box3d(image, qs, color=(255,255,255), thickness=2):
    ''' Draw 3d bounding box in image
        qs: (8,3) array of vertices for the 3d box in following order:
            1 -------- 0
           /|         /|
          2 -------- 3 .
          | |        | |
          . 5 -------- 4
          |/         |/
          6 -------- 7
    '''
    qs = qs.astype(np.int32)
    for k in range(0,4):
       # Ref: http://docs.enthought.com/mayavi/mayavi/auto/mlab_helper_functions.html
       i,j=k,(k+1)%4
       # use LINE_AA for opencv3
       cv2.line(image, (qs[i,0],qs[i,1]), (qs[j,0],qs[j,1]), color, thickness, cv2.CV_AA)

       i,j=k+4,(k+1)%4 + 4
       cv2.line(image, (qs[i,0],qs[i,1]), (qs[j,0],qs[j,1]), color, thickness, cv2.CV_AA)

       i,j=k,k+4
       cv2.line(image, (qs[i,0],qs[i,1]), (qs[j,0],qs[j,1]), color, thickness, cv2.CV_AA)
    return image


class OmniCalibration(Calibration):
    def __init__(self, calib_folder):

        global_config = os.path.join(calib_folder, 'defaults.yaml')
        camera_config = os.path.join(calib_folder, 'cameras.yaml')

        with open(global_config) as f:
            self.global_config_dict = yaml.safe_load(f)
        
        with open(camera_config) as f:
            self.camera_config_dict = yaml.safe_load(f)
        
        self.median_focal_length_y = self.calculate_median_param_value(param = 'f_y')
        self.median_optical_center_y = self.calculate_median_param_value(param = 't_y')
        # image shape is (color channels, height, width)
        self.img_shape = 3, self.global_config_dict['image']['height'], self.global_config_dict['image']['width']
    
    def project_ref_to_image_torch(self, pointcloud):

        theta = (torch.atan2(pointcloud[:, 0], pointcloud[:, 2]) + np.pi) %(2*np.pi)
        horizontal_fraction = theta/ (2*np.pi)
        x = (horizontal_fraction * self.img_shape[2]) % self.img_shape[2]
        y = -self.median_focal_length_y*(pointcloud[:, 1]*torch.cos(theta)/pointcloud[:, 2]) + self.median_optical_center_y
        pts_2d = torch.stack([x, y], dim=1)
        
        return pts_2d


    def project_image_to_rect(self, uvdepth):

        theta = (uvdepth[:, 0]/self.img_shape[2])*2*np.pi - np.pi
        z = uvdepth[:, 2]*np.cos(theta)
        x = uvdepth[:, 2]*np.sin(theta)
        y = z*-1*(uvdepth[:, 1] - self.median_optical_center_y)/(self.median_focal_length_y * np.cos(theta))

        return np.stack([x,y,z], axis=1)

    def project_velo_to_ref(self, pointcloud):

        pointcloud = pointcloud[:, [1, 2, 0]]
        pointcloud[:, 0] *= -1
        pointcloud[:, 1] *= -1

        return pointcloud

    def move_lidar_to_camera_frame(self, pointcloud, upper = True):
        # assumed only rotation about z axis
        
        if upper:
            pointcloud[:,:3] =  \
                pointcloud[:,:3] - torch.Tensor(self.global_config_dict['calibrated']
                                                ['lidar_upper_to_rgb']['translation']).type(pointcloud.type())
            theta = self.global_config_dict['calibrated']['lidar_upper_to_rgb']['rotation'][-1]
        else:
            pointcloud[:,:3] =  \
                pointcloud[:,:3] - torch.Tensor(self.global_config_dict['calibrated']
                                                ['lidar_lower_to_rgb']['translation']).type(pointcloud.type())
            theta = self.global_config_dict['calibrated']['lidar_lower_to_rgb']['rotation'][-1]

        rotation_matrix = torch.Tensor([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]]).type(pointcloud.type())
        pointcloud[:, :2] = torch.matmul(rotation_matrix, pointcloud[:, :2].unsqueeze(2)).squeeze()
        pointcloud[:, :3] = self.project_velo_to_ref(pointcloud[:, :3])
        return pointcloud
        
    
    def calculate_median_param_value(self, param):
        if param=='f_y':
            idx = 4
        elif param == 'f_x':
            idx = 0
        elif param == 't_y':
            idx = 5
        elif param == 't_x':
            idx = 2
        elif param == 's':
            idx = 1
        else:
            raise 'Wrong parameter!'

        omni_camera = ['sensor_0', 'sensor_2', 'sensor_4', 'sensor_6', 'sensor_8']
        parameter_list = []
        for sensor, camera_params in self.camera_config_dict['cameras'].items():
            if sensor not in omni_camera:
                continue
            K_matrix = camera_params['K'].split(' ')
            parameter_list.append(float(K_matrix[idx]))
        return np.median(parameter_list)


================================================
FILE: src/combination_model.py
================================================
import pdb

import numpy as np
import torch.nn as nn

class CombiNet(nn.Module):
	def __init__(self, in_dim = 2560, hidden_units = 512, out_dim = 2560):
		super().__init__()
		self.fc1 = nn.Linear(in_dim, 2*hidden_units)
		# self.bn1 = nn.BatchNorm1d(hidden_units)
		self.fc2 = nn.Linear(2*hidden_units, 2*hidden_units)
		# self.bn2 = nn.BatchNorm1d(2*hidden_units)
		self.fc3 = nn.Linear(2*hidden_units, out_dim)
		self.relu = nn.ReLU()
		self.apply(weight_init)
	def forward(self, x):
		out = nn.functional.normalize(x)
		skip = out
		out = self.fc1(x)
		# out = self.bn1(out)
		out = self.relu(out)
		out = self.fc2(out)
		# out = self.bn2(out)
		out = self.relu(out)
		out = self.fc3(out)
		out = nn.functional.normalize(out)
		out += skip
		return out

class CombiLSTM(nn.Module):
	def __init__(self, in_dim = 2560, hidden_units = 512, out_dim = 2560):
		super().__init__()
		self.in_linear1 = nn.Linear(in_dim, hidden_units)
		# self.bn1 = nn.BatchNorm1d(hidden_units)
		self.in_linear2 = nn.Linear(hidden_units, hidden_units)
		self.rnn = nn.LSTM(input_size = hidden_units, hidden_size = hidden_units, dropout = 0)
		self.out_linear1 = nn.Linear(hidden_units, hidden_units)
		# self.bn2 = nn.BatchNorm1d(hidden_units)
		self.out_linear2 = nn.Linear(hidden_units, out_dim)
		self.relu = nn.ReLU()
		self.apply(weight_init)

	def forward(self, x, hidden = None):
		out = nn.functional.normalize(x)
		skip = out
		out = self.in_linear1(out)
		# out = self.bn1(out)
		out = self.relu(out)
		out = self.in_linear2(out)
		out = out.unsqueeze(1) #Adding batch dimension
		if hidden is None:
			out, hidden = self.rnn(out)
		else:
			out, hidden = self.rnn(out, hidden)

		out = out.squeeze(1) #removing batch dimension
		out = self.out_linear1(out)
		# out = self.bn2(out)
		out = self.relu(out)
		out = self.out_linear2(out)
		out = nn.functional.normalize(out)
		out += skip
		return out, hidden

def weight_init(m):
	if type(m)==nn.Linear:
		nn.init.xavier_normal_(m.weight, gain=np.sqrt(2))
	elif type(m)==nn.LSTM:
		nn.init.xavier_normal_(m.weight_ih_l0)
		nn.init.xavier_normal_(m.weight_hh_l0)


================================================
FILE: src/deep_sort_utils.py
================================================
# vim: expandtab:ts=4:sw=4
import numpy as np
import cv2


def non_max_suppression(boxes, max_bbox_overlap, scores=None):
    """Suppress overlapping detections.

    Original code from [1]_ has been adapted to include confidence score.

    .. [1] http://www.pyimagesearch.com/2015/02/16/
           faster-non-maximum-suppression-python/

    Examples
    --------

        >>> boxes = [d.roi for d in detections]
        >>> scores = [d.confidence for d in detections]
        >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores)
        >>> detections = [detections[i] for i in indices]

    Parameters
    ----------
    boxes : ndarray
        Array of ROIs (x, y, width, height).
    max_bbox_overlap : float
        ROIs that overlap more than this values are suppressed.
    scores : Optional[array_like]
        Detector confidence score.

    Returns
    -------
    List[int]
        Returns indices of detections that have survived non-maxima suppression.

    """
    if len(boxes) == 0:
        return []

    boxes = boxes.astype(np.float)
    pick = []

    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2] + boxes[:, 0]
    y2 = boxes[:, 3] + boxes[:, 1]

    area = (x2 - x1 + 1) * (y2 - y1 + 1)
    if scores is not None:
        idxs = np.argsort(scores)
    else:
        idxs = np.argsort(y2)

    while len(idxs) > 0:
        last = len(idxs) - 1
        i = idxs[last]
        pick.append(i)

        xx1 = np.maximum(x1[i], x1[idxs[:last]])
        yy1 = np.maximum(y1[i], y1[idxs[:last]])
        xx2 = np.minimum(x2[i], x2[idxs[:last]])
        yy2 = np.minimum(y2[i], y2[idxs[:last]])

        w = np.maximum(0, xx2 - xx1 + 1)
        h = np.maximum(0, yy2 - yy1 + 1)

        overlap = (w * h) / (area[idxs[:last]]) # + area[idxs[last:last+1]] - w * h) #changed from deepsort to sum both areas

        idxs = np.delete(
            idxs, np.concatenate(
                ([last], np.where(overlap > max_bbox_overlap)[0])))

    return pick


================================================
FILE: src/detection.py
================================================
# vim: expandtab:ts=4:sw=4
import numpy as np


class Detection(object):
    """
    This class represents a bounding box detection in a single image.

    Parameters
    ----------
    tlwh : array_like
        Bounding box in format `(x, y, w, h)`.
    confidence : float
        Detector confidence score.
    feature : array_like
        A feature vector that describes the object contained in this image.

    Attributes
    ----------
    tlwh : ndarray
        Bounding box in format `(top left x, top left y, width, height)`.
    confidence : ndarray
        Detector confidence score.
    feature : ndarray | NoneType
        A feature vector that describes the object contained in this image.

    """

    def __init__(self, tlwh, box_3d, confidence, appearance_feature, feature):
        self.tlwh = np.asarray(tlwh, dtype=np.float)
        # Note that detections format is centre of 3D box and dimensions (not bottom face)
        self.box_3d = box_3d
        if box_3d is not None:
            self.box_3d[1] -= box_3d[4]/2
            self.box_3d = np.asarray(box_3d, dtype=np.float32)
        self.confidence = float(confidence)
        self.appearance_feature = appearance_feature
        if feature is not None:
            self.feature = feature
        else:
            self.feature = None


    def to_tlbr(self):
        """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
        `(top left, bottom right)`.
        """
        ret = self.tlwh.copy()
        ret[2:] += ret[:2]
        return ret

    def to_xyah(self):
        """Convert bounding box to format `(center x, center y, aspect ratio,
        height)`, where the aspect ratio is `width / height`.
        """
        ret = self.tlwh.copy()
        ret[:2] += ret[2:] / 2
        ret[2] /= ret[3]
        return ret
    def to_xywh(self):
        """Convert bounding box to format `(center x, center y, aspect ratio,
        height)`, where the aspect ratio is `width / height`.
        """
        ret = self.tlwh.copy()
        ret[:2] += ret[2:] / 2
        return ret
    def get_3d_distance(self):
        if self.box_3d is not None:
            return np.sqrt(self.box_3d[0]**2 + self.box_3d[2]**2)

================================================
FILE: src/distances.py
================================================
"""py-motmetrics - metrics for multiple object tracker (MOT) benchmarking.

Christoph Heindl, 2017
https://github.com/cheind/py-motmetrics
"""

import numpy as np
import pdb

def norm2squared_matrix(objs, hyps, max_d2=float('inf')):
    """Computes the squared Euclidean distance matrix between object and hypothesis points.

    Params
    ------
    objs : NxM array
        Object points of dim M in rows
    hyps : KxM array
        Hypothesis points of dim M in rows

    Kwargs
    ------
    max_d2 : float
        Maximum tolerable squared Euclidean distance. Object / hypothesis points
        with larger distance are set to np.nan signalling do-not-pair. Defaults
        to +inf

    Returns
    -------
    C : NxK array
        Distance matrix containing pairwise distances or np.nan.
    """

    objs = np.atleast_2d(objs).astype(float)
    hyps = np.atleast_2d(hyps).astype(float)

    if objs.size == 0 or hyps.size == 0:
        return np.empty((0,0))

    assert hyps.shape[1] == objs.shape[1], "Dimension mismatch"

    C = np.empty((objs.shape[0], hyps.shape[0]))

    for o in range(objs.shape[0]):
        for h in range(hyps.shape[0]):
            e = objs[o] - hyps[h]
            C[o, h] = e.dot(e)

    C[C > max_d2] = np.nan
    return C


def iou_matrix(objs, hyps, max_iou=1.):
    """Computes 'intersection over union (IoU)' distance matrix between object and hypothesis rectangles.

    The IoU is computed as

        IoU(a,b) = 1. - isect(a, b) / union(a, b)

    where isect(a,b) is the area of intersection of two rectangles and union(a, b) the area of union. The
    IoU is bounded between zero and one. 0 when the rectangles overlap perfectly and 1 when the overlap is
    zero.

    Params
    ------
    objs : Nx4 array
        Object rectangles (x,y,w,h) in rows
    hyps : Kx4 array
        Hypothesis rectangles (x,y,w,h) in rows

    Kwargs
    ------
    max_iou : float
        Maximum tolerable overlap distance. Object / hypothesis points
        with larger distance are set to np.nan signalling do-not-pair. Defaults
        to 0.5

    Returns
    -------
    C : NxK array
        Distance matrix containing pairwise distances or np.nan.
    """

    objs = np.atleast_2d(objs).astype(float)
    hyps = np.atleast_2d(hyps).astype(float)

    if objs.size == 0 or hyps.size == 0:
        return np.empty((0,0))

    assert objs.shape[1] == 4
    assert hyps.shape[1] == 4

    br_objs = objs[:, :2] + objs[:, 2:]
    br_hyps = hyps[:, :2] + hyps[:, 2:]

    C = np.empty((objs.shape[0], hyps.shape[0]))

    for o in range(objs.shape[0]):
        for h in range(hyps.shape[0]):
            isect_xy = np.maximum(objs[o, :2], hyps[h, :2])
            isect_wh = np.maximum(np.minimum(br_objs[o], br_hyps[h]) - isect_xy, 0)
            isect_a = isect_wh[0]*isect_wh[1]
            union_a = objs[o, 2]*objs[o, 3] + hyps[h, 2]*hyps[h, 3] - isect_a
            if union_a != 0:
                C[o, h] = 1. - isect_a / union_a
            else:
                C[o, h] = np.nan

    C[C > max_iou] = np.nan
    return C


def find_area(vertices):
    area = 0
    for i in range(len(vertices)):
        area += vertices[i][0]*(vertices[(i+1)%len(vertices)][1] - vertices[i-1][1])
    return 0.5*abs(area)

def get_angle(p):
    x, y = p
    angle = np.arctan2(y,x)
    if angle < 0:
        angle += np.pi*2
    return angle

def clip_polygon(box1, box2):
    #clips box 1 by the edges in box2
    x,y,z,l,h,w,theta = box2
    theta = -theta

    box2_edges = np.asarray([(-np.cos(theta), -np.sin(theta), l/2-x*np.cos(theta)-z*np.sin(theta)),
                    (-np.sin(theta), np.cos(theta), w/2-x*np.sin(theta)+z*np.cos(theta)),
                    (np.cos(theta), np.sin(theta), l/2+x*np.cos(theta)+z*np.sin(theta)),
                    (np.sin(theta), -np.cos(theta), w/2+x*np.sin(theta)-z*np.cos(theta))])
    x,y,z,l,h,w,theta = box1
    theta = -theta

    box1_vertices = [(x+l/2*np.cos(theta)-w/2*np.sin(theta), z+l/2*np.sin(theta)+w/2*np.cos(theta)),
                        (x+l/2*np.cos(theta)+w/2*np.sin(theta), z+l/2*np.sin(theta)-w/2*np.cos(theta)),
                        (x-l/2*np.cos(theta)-w/2*np.sin(theta), z-l/2*np.sin(theta)+w/2*np.cos(theta)),
                        (x-l/2*np.cos(theta)+w/2*np.sin(theta), z-l/2*np.sin(theta)-w/2*np.cos(theta))]
    out_vertices = box1_vertices
    for edge in box2_edges:
        vertex_list = out_vertices[:]
        out_vertices = []
        for idx, current_vertex in enumerate(vertex_list):
            previous_vertex = vertex_list[idx-1]
            if point_inside_edge(current_vertex, edge):
                if not point_inside_edge(previous_vertex, edge):
                    out_vertices.append(compute_intersection_point(previous_vertex, current_vertex, edge))
                out_vertices.append(current_vertex)
            elif point_inside_edge(previous_vertex, edge):
                out_vertices.append(compute_intersection_point(previous_vertex, current_vertex, edge))
    to_remove = []
    for i in range(len(out_vertices)):
        if i in to_remove:
            continue
        for j in range(i+1, len(out_vertices)):
            if abs(out_vertices[i][0] - out_vertices[j][0]) < 1e-6 and abs(out_vertices[i][1] - out_vertices[j][1]) < 1e-6:
                to_remove.append(j)
    out_vertices = sorted([(v[0]-x, v[1]-z) for i,v in enumerate(out_vertices) if i not in to_remove], key = lambda p: get_angle((p[0],p[1])))
    return out_vertices

def compute_intersection_point(pt1, pt2, line1):
    if pt1[0] == pt2[0]:
        slope = np.inf
    else:
        slope = (pt1[1]-pt2[1])/(pt1[0] - pt2[0])
    if np.isinf(slope):
        line2 = (1, 0, pt1[0])
    else:
        line2 = (slope, -1, pt1[0]*slope-pt1[1])
    # print("Line1:", line1)
    # print("Line2:", line2)
    if line1[1] == 0:
        x = line1[2]/line1[0]
        y = (line2[2] - line2[0]*x)/line2[1]
    elif line1[0] == 0:
        y = line1[2]/line1[1]
        x = (line2[2] - line2[1]*y)/line2[0]
    elif line2[1] == 0:
        x = pt1[0]
        y = (line1[2]-x*line1[0])/line1[1]
    else:
        tmp_line = (line2 - line1*(line2[1]/line1[1]))
        x = tmp_line[2]/tmp_line[0]
        y = (line2[2] - line2[0]*x)/line2[1]
    return (x,y)

def point_inside_edge(pt, edge):
    lhs = pt[0]*edge[0] + pt[1]*edge[1]
    if lhs < edge[2] - 1e-6:
        return True
    else:
        return False


def iou_matrix_3d(objs, hyps, max_iou=1.):
    """Computes 'intersection over union (IoU)' distance matrix between object and hypothesis rectangles.

    The IoU is computed as

        IoU(a,b) = 1. - isect(a, b) / union(a, b)

    where isect(a,b) is the area of intersection of two rectangles and union(a, b) the area of union. The
    IoU is bounded between zero and one. 0 when the rectangles overlap perfectly and 1 when the overlap is
    zero.

    Params
    ------
    objs : Nx4 array
        Object rectangles (x,y,w,h) in rows
    hyps : Kx4 array
        Hypothesis rectangles (x,y,w,h) in rows

    Kwargs
    ------
    max_iou : float
        Maximum tolerable overlap distance. Object / hypothesis points
        with larger distance are set to np.nan signalling do-not-pair. Defaults
        to 0.5

    Returns
    -------
    C : NxK array
        Distance matrix containing pairwise distances or np.nan.
    """

    objs = np.atleast_2d(objs).astype(float)
    hyps = np.atleast_2d(hyps).astype(float)

    if objs.size == 0 or hyps.size == 0:
        return np.empty((0,0))
    assert objs.shape[1] == 7
    assert hyps.shape[1] == 7

    C = np.empty((objs.shape[0], hyps.shape[0]))
    for o in range(objs.shape[0]):
        for h in range(hyps.shape[0]):
            base_area = find_area(clip_polygon(objs[o], hyps[h]))
            height = min(objs[o][1], hyps[h][1]) - max(objs[o][1] - objs[o][4], hyps[h][1]-hyps[h][4])
            intersect = base_area*height
            union = objs[o][3]*objs[o][4]*objs[o][5] + hyps[h][3]*hyps[h][4]*hyps[h][5] - intersect
            if union != 0:
                C[o, h] = 1. - intersect / union
            else:
                C[o, h] = np.nan
    C[C > max_iou] = np.nan
    return C


================================================
FILE: src/double_measurement_kf.py
================================================
import random
import numpy as np
import scipy.linalg
import EKF
import pdb
import kf_2d
import os
import pickle
import torch
from copy import deepcopy
import matplotlib.pyplot as plt
np.set_printoptions(precision=4, suppress=True)
from calibration import Calibration
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from evaluation.distances import iou_matrix

class KF_3D(kf_2d.KalmanFilter2D):
    """
    3D Kalman Filter that tracks objets in 3D space

        The 8-dimensional state space

            x, y, z, l, h, w, theta, vx, vz

        contains the bounding box center position (x, z), the heading angle theta, the
        box dimensions l, w, h, and the x and z velocities.

        Object motion follows a constant velocity model. The bounding box location
        (x, y) is taken as direct observation of the state space (linear
        observation model).
    """
    def __init__(self, calib, pos_weight_3d, pos_weight, velocity_weight, theta_weight,
                    std_process, std_measurement_2d, std_measurement_3d,
                    initial_uncertainty, omni = True, debug=True):
        self.ndim, self.dt = 9, 4.

        # Create Kalman filter model matrices.
        # Motion model is constant velocity, i.e. x = x + Vx*dt
        self._motion_mat = np.eye(self.ndim, self.ndim)
        self._motion_mat[0, 7] = self.dt
        self._motion_mat[2, 8] = self.dt
        # Sensor model is direct observation, i.e. x = x
        self._observation_mat = np.eye(self.ndim - 2, self.ndim)
        if omni:
            self.x_constant = calib.img_shape[2]/(2*np.pi)
            self.y_constant = calib.median_focal_length_y
            self.calib = calib
        else:
            self.projection_matrix = calib.P

        self.omni = omni
        self._std_weight_pos_3d = pos_weight_3d
        self._std_weight_pos = pos_weight
        self._std_weight_vel = velocity_weight
        self._std_weight_theta= theta_weight

        self._std_weight_process = std_process
        self._initial_uncertainty = initial_uncertainty
        self._std_weight_measurement_2d = std_measurement_2d
        self._std_weight_measurement_3d = std_measurement_3d
        self.debug = debug

    def initiate(self, measurement_3d):

        mean_pos = measurement_3d
        mean_vel = np.zeros((2,))
        mean = np.r_[mean_pos, mean_vel]
        std = [
                self._std_weight_pos_3d,
                self._std_weight_pos_3d * 0.15,
                self._std_weight_pos_3d,
                self._std_weight_pos_3d * 0.015,
                self._std_weight_pos_3d * 0.015,
                self._std_weight_pos_3d * 0.015,
                self._std_weight_theta * 10,
                self._std_weight_vel*5,
                self._std_weight_vel*5]
        covariance = np.diag(np.square(std))*(self._initial_uncertainty*self._std_weight_process)**2

        return mean, covariance

    def get_process_noise(self, mean):

        std_pos = [
                self._std_weight_pos_3d, # x
                self._std_weight_pos_3d * 0.15, # y
                self._std_weight_pos_3d, # z
                self._std_weight_pos_3d * 0.015, # l
                self._std_weight_pos_3d * 0.015, # h
                self._std_weight_pos_3d * 0.015, # w
                self._std_weight_theta # theta
            ]
        std_vel = [
            self._std_weight_vel, # x
            self._std_weight_vel, # z
            ]
        self._motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
        motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))*self._std_weight_process**2
        return motion_cov

    def get_2d_measurement_noise(self, measurement_2d):
        # Returns Qt the sensor noise covariance

        # Measurement uncertainty scaled by estimated height
        std = [
                self._std_weight_pos*measurement_2d[2],
                self._std_weight_pos*measurement_2d[3],
                self._std_weight_pos*measurement_2d[2],
                self._std_weight_pos*measurement_2d[3]]
        innovation_cov = np.diag(np.square(std))*self._std_weight_measurement_2d**2
        return innovation_cov

    def get_3d_measurement_noise(self, measurement):
        # Returns Qt the sensor noise covariance

        # Measurement uncertainty scaled by estimated height
        std = [
            self._std_weight_pos_3d, # x
            self._std_weight_pos_3d * 0.5, # y
            self._std_weight_pos_3d, # z
            self._std_weight_pos_3d, # l
            self._std_weight_pos_3d, # h
            self._std_weight_pos_3d, # w
            self._std_weight_theta * 25 # theta
            ]
        innovation_cov = np.diag(np.square(std))*self._std_weight_measurement_3d**2
        return innovation_cov

    def gating_distance(self, mean, covariance, measurements,
                        only_position=False,
                        use_3d=True):
        """Compute gating distance between state distribution and measurements.

        A suitable distance threshold can be obtained from `chi2inv95`. If
        `only_position` is False, the chi-square distribution has 4 degrees of
        freedom, otherwise 2.

        Parameters
        ----------
        mean : ndarray
            Mean vector over the state distribution (8 dimensional).
        covariance : ndarray
            Covariance of the state distribution (8x8 dimensional).
        measurements : ndarray
            An Nx4 dimensional matrix of N measurements, each in
            format (x, y, a, h) where (x, y) is the bounding box center
            position, a the aspect ratio, and h the height.
        only_position : Optional[bool]
            If True, distance computation is done with respect to the bounding
            box center position only.

        Returns
        -------
        ndarray
            Returns an array of length N, where the i-th element contains the
            squared Mahalanobis distance between (mean, covariance) and
            `measurements[i]`.

        """
        if not use_3d:
            corner_points, corner_points_3d = self.calculate_corners(mean)
            H_2d = self.get_2d_measurement_matrix(mean, corner_points, corner_points_3d)
            min_x, min_y = np.amin(corner_points, axis = 0)[:2]
            max_x, max_y = np.amax(corner_points, axis = 0)[:2]
            cov = self.project_cov_2d(mean, covariance, H_2d)
            mean = np.array([min_x, min_y, max_x - min_x, max_y - min_y])
        else:
            mean, cov = mean[:7], self.project_cov(mean, covariance)
        if only_position:
            if use_3d:
                mean, cov = mean[[0, 2]], np.reshape(cov[[0, 0, 2, 2], [0, 2, 0, 2]], (2,2))
                measurements = measurements[:, [0, 2]]
            else:
                mean, cov = mean[:2], cov[:2, :2]
                measurements = measurements[:, :2]
        self.LIMIT=0.3
        if np.amax(cov) > self.LIMIT:
            cov_2 = cov * self.LIMIT / np.amax(cov)
        return EKF.squared_mahalanobis_distance(mean, cov2, measurements)

    def project_cov(self, mean, covariance):
        # Returns S the innovation covariance (projected covariance)

        measurement_noise = self.get_3d_measurement_noise(mean)
        innovation_cov = (np.linalg.multi_dot((self._observation_mat, covariance,
                                          self._observation_mat.T))
                     + measurement_noise)
        return innovation_cov

    def project_cov_2d(self, mean, covariance, H_2d):
        # Returns S the innovation covariance (projected covariance)

        measurement_noise = self.get_2d_measurement_noise(mean)
        innovation_cov = (np.linalg.multi_dot((H_2d, covariance,
                                          H_2d.T))
                     + measurement_noise)
        return innovation_cov
    # @profile
    def update(self, mean, covariance, measurement_2d, measurement_3d = None, marginalization=None, JPDA=False):
        """Run Kalman filter correction step.

        Parameters
        ----------
        mean : ndarray
            The predicted state's mean vector (9 dimensional).
        covariance : ndarray
            The state's covariance matrix (9x9 dimensional).
        measurement_2d : ndarray
            The 4 dimensional measurement vector (x, y, w, h), where (x, y)
            is the center position, a the aspect ratio, and h the height of the
            bounding box.
        measurement_3d : ndarray
            The 7 dimensional measurement vector (x, y, z, l, h, w, theta), where (x, y, z)
            is the center bottom of the box, l, q, h are the dimensions of the bounding box
            theta is the orientation angle w.r.t. the positive x axis.
        Returns
        -------
        (ndarray, ndarray)
            Returns the measurement-corrected state distribution.

        """

        if np.any(np.isnan(mean)):
            return mean, covariance
        out_cov = deepcopy(covariance)
        H_3d = self._observation_mat
        do_3d = True
        covariance_3d = None
        post_3d_mean = mean
        if measurement_3d is None:
            do_3d = False
        else:
            for meas in measurement_3d:
                if meas is None:
                    do_3d = False
                    break
        if do_3d:
            S_matrix = self.project_cov(mean, out_cov)
            try:
                chol_factor, lower = scipy.linalg.cho_factor(
                    S_matrix, lower=True, check_finite=False)
                kalman_gain = scipy.linalg.cho_solve(
                    (chol_factor, lower), np.dot(out_cov, H_3d.T).T,
                    check_finite=False).T
            except:
                # in case cholesky factorization fails, revert to standard solver
                kalman_gain = np.linalg.multi_dot((out_cov, H_3d.T, np.linalg.inv(S_matrix)))
            out_cov -= np.linalg.multi_dot((kalman_gain, S_matrix, kalman_gain.T))
            if JPDA:
                innovation_3d = 0
                cov_uncertainty_3d = 0
                for i, detection_3d in enumerate(measurement_3d):
                    innovation_partial = detection_3d - mean[:7]
                    innovation_3d += innovation_partial * marginalization[i+1]
                    cov_uncertainty_3d += marginalization[i+1] * np.outer(innovation_partial, innovation_partial)
                partial_cov = cov_uncertainty_3d-np.outer(innovation_3d, innovation_3d)
                out_cov *= 1 - marginalization[0]
                out_cov += np.linalg.multi_dot((kalman_gain, partial_cov, kalman_gain.T))
                out_cov += marginalization[0]*covariance
            else:
                out_cov = out_cov - np.linalg.multi_dot((kalman_gain, H_3d, out_cov))
                innovation_3d = measurement_3d - mean[:7]
            mean = mean + np.dot(kalman_gain, innovation_3d)
            post_3d_mean = mean
            covariance_3d = deepcopy(out_cov)

        if measurement_2d is not None:
            corner_points, corner_points_3d = self.calculate_corners(mean)
            H_2d = self.get_2d_measurement_matrix(mean, corner_points, corner_points_3d)
            #update based on 2D
            min_x, min_y = np.amin(corner_points, axis = 0)[:2]
            max_x, max_y = np.amax(corner_points, axis = 0)[:2]
            if min_y < 0:
                min_y = 0
            if max_y >= self.calib.img_shape[1]:
                max_y = self.calib.img_shape[1] - 1
            S_matrix = self.project_cov_2d(np.array([min_x, min_y, max_x - min_x, max_y - min_y]), out_cov, H_2d)
            try:
                chol_factor, lower = scipy.linalg.cho_factor(
                    S_matrix, lower=True, check_finite=False)
                kalman_gain = scipy.linalg.cho_solve(
                    (chol_factor, lower), np.dot(out_cov, H_2d.T).T,
                    check_finite=False).T
            except:
                # in case cholesky factorization fails, revert to standard solver
                kalman_gain = np.linalg.multi_dot((out_cov, H_2d.T, np.linalg.inv(S_matrix)))
            out_cov = np.dot(np.eye(*out_cov.shape)-np.dot(kalman_gain, H_2d), out_cov)
            if JPDA:
                innovation_2d = 0
                cov_uncertainty_2d = 0
                for i, detection_2d in enumerate(measurement_2d):
                    innovation_partial = detection_2d[:4] - np.array([min_x, min_y, max_x - min_x, max_y - min_y])
                    innovation_2d += innovation_partial * marginalization[i+1] # +1 to account for dummy node
                    cov_uncertainty_2d += marginalization[i+1] * np.outer(innovation_partial, innovation_partial)
                partial_cov = cov_uncertainty_2d-np.outer(innovation_2d, innovation_2d)
                out_cov *= 1 - marginalization[0]
                out_cov += np.linalg.multi_dot((kalman_gain, partial_cov, kalman_gain.T))
                if covariance_3d is None:
                    out_cov += marginalization[0]*covariance
                else:
                    out_cov += marginalization[0]*covariance_3d
            else:
                innovation_2d = measurement_2d[:4] - np.array([min_x, min_y, max_x - min_x, max_y - min_y])
            mean = mean + np.dot(kalman_gain, innovation_2d)

        if self.debug:
            return mean, out_cov, post_3d_mean
        return mean, out_cov

    # @profile
    def get_2d_measurement_matrix(self, mean, corner_points, corner_points_3d):

        min_x = np.inf
        min_x_idx = None
        max_x = -np.inf
        max_x_idx = None
        min_y = np.inf
        min_y_idx = None
        max_y = -np.inf
        max_y_idx = None
        for idx, pt in enumerate(corner_points):
            if pt[0] < min_x:
                min_x_idx = idx
                min_x = pt[0]
            if pt[0] > max_x:
                max_x_idx = idx
                max_x = pt[0]
            if pt[1] < min_y:
                min_y_idx = idx
                min_y = pt[1]
            if pt[1] > max_y:
                max_y_idx = idx
                max_y = pt[1]
        if self.omni:
            jac_x = np.dot(self.jacobian_omni(corner_points_3d[min_x_idx])[0], self.corner_jacobian(mean, min_x_idx))
            jac_y = np.dot(self.jacobian_omni(corner_points_3d[min_y_idx])[1], self.corner_jacobian(mean, min_y_idx))
            jac_w = np.dot(self.jacobian_omni(corner_points_3d[max_x_idx])[0], self.corner_jacobian(mean, max_x_idx)) - jac_x
            jac_h = np.dot(self.jacobian_omni(corner_points_3d[max_y_idx])[1], self.corner_jacobian(mean, max_y_idx)) - jac_y
        else:
            jac_x = np.dot(self.jacobian(corner_points_3d[min_x_idx])[0], self.corner_jacobian(mean, min_x_idx))
            jac_y = np.dot(self.jacobian(corner_points_3d[min_y_idx])[1], self.corner_jacobian(mean, min_y_idx))
            jac_w = np.dot(self.jacobian(corner_points_3d[max_x_idx])[0], self.corner_jacobian(mean, max_x_idx)) - jac_x
            jac_h = np.dot(self.jacobian(corner_points_3d[max_y_idx])[1], self.corner_jacobian(mean, max_y_idx)) - jac_y
        jac = np.vstack([jac_x, jac_y, jac_w, jac_h])
        jac = np.hstack([jac, np.zeros((jac.shape[0], 2))])
        return jac
    # Jacobian for projective transformation
    def jacobian(self, pt_3d):
        pt_2d = self.project_2d(pt_3d[None, :])
        den = np.sum(self.projection_matrix[2] * pt_3d)
        dxy = (self.projection_matrix[0:2] - self.projection_matrix[2:3] * pt_2d.T)/den

        return dxy[:, :3]

    def jacobian_omni(self, pt_3d):
        jac = np.zeros((2, 3))
        x, y, z = pt_3d[0], pt_3d[1], pt_3d[2]
        denominator = (x**2 + z**2)
        jac[0, 0] = self.x_constant*(z/denominator)
        jac[0, 2] = -self.x_constant*(x/denominator)

        jac[1, 0] = self.y_constant*x*y/denominator
        jac[1, 1] = -self.y_constant
        jac[1,2] = self.y_constant*z*y/denominator
        jac[1, :] /= np.sqrt(denominator)

        return jac

    def calculate_corners(self, box):
        x,y,z,l,h,w,theta = box[:7]
        pt_3d = []
        x_delta_1 = np.cos(theta)*l/2+np.sin(theta)*w/2
        x_delta_2 = np.cos(theta)*l/2 - np.sin(theta)*w/2
        z_delta_1 = np.sin(theta)*l/2-np.cos(theta)*w/2
        z_delta_2 = np.sin(theta)*l/2+np.cos(theta)*w/2
        pt_3d.append((x+x_delta_1, y + h/2, z+z_delta_1, 1))
        pt_3d.append((x+x_delta_2, y + h/2, z+z_delta_2, 1))
        pt_3d.append((x-x_delta_2, y + h/2, z-z_delta_2, 1))
        pt_3d.append((x-x_delta_1, y + h/2, z-z_delta_1, 1))
        pt_3d.append((x+x_delta_1, y - h/2, z+z_delta_1, 1))
        pt_3d.append((x+x_delta_2, y - h/2, z+z_delta_2, 1))
        pt_3d.append((x-x_delta_2, y - h/2, z-z_delta_2, 1))
        pt_3d.append((x-x_delta_1, y - h/2, z-z_delta_1, 1))
        pts_3d = np.vstack(pt_3d)
        pts_2d = self.project_2d(pts_3d)
        return pts_2d, pts_3d

    def corner_jacobian(self, pt_3d, corner_idx):
        _, _, _, l, _, w, theta = pt_3d[:7]
        jac = np.eye(3,7)

        jac[1, 4] = 0.5 if corner_idx < 4 else -0.5

        jac[0, 3] = 0.5*np.sin(theta) if corner_idx % 4 < 2 else -0.5*np.sin(theta)
        jac[0, 5] = 0.5*np.cos(theta) if corner_idx % 2 == 0 else -0.5*np.cos(theta)

        jac[2, 3] = 0.5*np.cos(theta) if corner_idx%4 < 2 else -0.5*np.cos(theta)
        jac[2, 5] = 0.5*np.sin(theta) if corner_idx%2 == 0 else -0.5*np.sin(theta)

        if corner_idx%4 == 0:
            jac[0, 6] = -np.sin(theta)*l/2 + np.cos(theta)*w/2
            jac[2, 6] = np.cos(theta)*l/2 + np.sin(theta)*w/2
        elif corner_idx%4==1:
            jac[0, 6] = -np.sin(theta)*l/2 - np.cos(theta)*w/2
            jac[2, 6] = np.cos(theta)*l/2 - np.sin(theta)*w/2
        elif corner_idx%4==2:
            jac[0, 6] = +np.sin(theta)*l/2 + np.cos(theta)*w/2
            jac[2, 6] = -np.cos(theta)*l/2 + np.sin(theta)*w/2
        else:
            jac[0, 6] = +np.sin(theta)*l/2 - np.cos(theta)*w/2
            jac[2, 6] = -np.cos(theta)*l/2 - np.sin(theta)*w/2

        return jac

    def project_2d(self, pts_3d):
        if self.omni:
            pts_2d = np.array(self.calib.project_ref_to_image_torch(torch.from_numpy(pts_3d)))
        else:
            pts_2d = np.dot(pts_3d, self.projection_matrix.T)
            pts_2d /= np.expand_dims(pts_2d[:, 2], 1)
        for pt in pts_2d:
            if pt[1] > self.calib.img_shape[1]:
                pt[1] = self.calib.img_shape[1]
            elif pt[1] < 0:
                pt[1] = 0
        # min_x = np.argmin(pts_2d[:, 0])
        # max_x = np.argmax(pts_2d[:, 0])
        # if abs(min_x - max_x) > 1800:
        #     # wrap around!
        #     pts_2d[min_x], pts_2d[max_x] = pts_2d[max_x], pts_2d[min_x]
        #     pts_2d[max_x, 0] += self.calib.img_shape[2]
        return pts_2d[:, :2]


def swap(detections_3d, iou, idx, swap_prob = 0):
    if random.random() > swap_prob:
        return detections_3d[idx]
    else:
        iou_row = iou[idx]
        iou_row[idx] = -1
        max_idx = np.argmax(iou_row)
        if iou_row[max_idx] > 0.4:
            # print("SWAP")
            return detections_3d[max_idx]
        else:
            return detections_3d[idx]


================================================
FILE: src/evaluation/__init__.py
================================================


================================================
FILE: src/evaluation/distances 2.py
================================================
"""py-motmetrics - metrics for multiple object tracker (MOT) benchmarking.

Christoph Heindl, 2017
https://github.com/cheind/py-motmetrics
"""

import numpy as np
import pdb

def norm2squared_matrix(objs, hyps, max_d2=float('inf')):
    """Computes the squared Euclidean distance matrix between object and hypothesis points.

    Params
    ------
    objs : NxM array
        Object points of dim M in rows
    hyps : KxM array
        Hypothesis points of dim M in rows

    Kwargs
    ------
    max_d2 : float
        Maximum tolerable squared Euclidean distance. Object / hypothesis points
        with larger distance are set to np.nan signalling do-not-pair. Defaults
        to +inf

    Returns
    -------
    C : NxK array
        Distance matrix containing pairwise distances or np.nan.
    """

    objs = np.atleast_2d(objs).astype(float)
    hyps = np.atleast_2d(hyps).astype(float)

    if objs.size == 0 or hyps.size == 0:
        return np.empty((0,0))

    assert hyps.shape[1] == objs.shape[1], "Dimension mismatch"

    C = np.empty((objs.shape[0], hyps.shape[0]))

    for o in range(objs.shape[0]):
        for h in range(hyps.shape[0]):
            e = objs[o] - hyps[h]
            C[o, h] = e.dot(e)

    C[C > max_d2] = np.nan
    return C


def iou_matrix(objs, hyps, max_iou=1.):
    """Computes 'intersection over union (IoU)' distance matrix between object and hypothesis rectangles.

    The IoU is computed as

        IoU(a,b) = 1. - isect(a, b) / union(a, b)

    where isect(a,b) is the area of intersection of two rectangles and union(a, b) the area of union. The
    IoU is bounded between zero and one. 0 when the rectangles overlap perfectly and 1 when the overlap is
    zero.

    Params
    ------
    objs : Nx4 array
        Object rectangles (x,y,w,h) in rows
    hyps : Kx4 array
        Hypothesis rectangles (x,y,w,h) in rows

    Kwargs
    ------
    max_iou : float
        Maximum tolerable overlap distance. Object / hypothesis points
        with larger distance are set to np.nan signalling do-not-pair. Defaults
        to 0.5

    Returns
    -------
    C : NxK array
        Distance matrix containing pairwise distances or np.nan.
    """

    objs = np.atleast_2d(objs).astype(float)
    hyps = np.atleast_2d(hyps).astype(float)

    if objs.size == 0 or hyps.size == 0:
        return np.empty((0,0))

    assert objs.shape[1] == 4
    assert hyps.shape[1] == 4

    br_objs = objs[:, :2] + objs[:, 2:]
    br_hyps = hyps[:, :2] + hyps[:, 2:]

    C = np.empty((objs.shape[0], hyps.shape[0]))

    for o in range(objs.shape[0]):
        for h in range(hyps.shape[0]):
            isect_xy = np.maximum(objs[o, :2], hyps[h, :2])
            isect_wh = np.maximum(np.minimum(br_objs[o], br_hyps[h]) - isect_xy, 0)
            isect_a = isect_wh[0]*isect_wh[1]
            union_a = objs[o, 2]*objs[o, 3] + hyps[h, 2]*hyps[h, 3] - isect_a
            if union_a != 0:
                C[o, h] = 1. - isect_a / union_a
            else:
                C[o, h] = np.nan

    C[C > max_iou] = np.nan
    return C


def find_area(vertices):
    area = 0
    for i in range(len(vertices)):
        area += vertices[i][0]*(vertices[(i+1)%len(vertices)][1] - vertices[i-1][1])
    return 0.5*abs(area)

def get_angle(p):
    x, y = p
    angle = np.arctan2(y,x)
    if angle < 0:
        angle += np.pi*2
    return angle

def clip_polygon(box1, box2):
    #clips box 1 by the edges in box2
    x,y,z,l,h,w,theta = box2
    theta = -theta

    box2_edges = np.asarray([(-np.cos(theta), -np.sin(theta), l/2-x*np.cos(theta)-z*np.sin(theta)),
                    (-np.sin(theta), np.cos(theta), w/2-x*np.sin(theta)+z*np.cos(theta)),
                    (np.cos(theta), np.sin(theta), l/2+x*np.cos(theta)+z*np.sin(theta)),
                    (np.sin(theta), -np.cos(theta), w/2+x*np.sin(theta)-z*np.cos(theta))])
    x,y,z,l,h,w,theta = box1
    theta = -theta

    box1_vertices = [(x+l/2*np.cos(theta)-w/2*np.sin(theta), z+l/2*np.sin(theta)+w/2*np.cos(theta)),
                        (x+l/2*np.cos(theta)+w/2*np.sin(theta), z+l/2*np.sin(theta)-w/2*np.cos(theta)),
                        (x-l/2*np.cos(theta)-w/2*np.sin(theta), z-l/2*np.sin(theta)+w/2*np.cos(theta)),
                        (x-l/2*np.cos(theta)+w/2*np.sin(theta), z-l/2*np.sin(theta)-w/2*np.cos(theta))]
    out_vertices = sort_points(box1_vertices, (x, z))
    for edge in box2_edges:
        vertex_list = out_vertices.copy()
        out_vertices = []
        for idx, current_vertex in enumerate(vertex_list):
            previous_vertex = vertex_list[idx-1]
            if point_inside_edge(current_vertex, edge):
                if not point_inside_edge(previous_vertex, edge):
                    out_vertices.append(compute_intersection_point(previous_vertex, current_vertex, edge))
                out_vertices.append(current_vertex)
            elif point_inside_edge(previous_vertex, edge):
                out_vertices.append(compute_intersection_point(previous_vertex, current_vertex, edge))
    to_remove = []
    for i in range(len(out_vertices)):
        if i in to_remove:
            continue
        for j in range(i+1, len(out_vertices)):
            if abs(out_vertices[i][0] - out_vertices[j][0]) < 1e-6 and abs(out_vertices[i][1] - out_vertices[j][1]) < 1e-6:
                to_remove.append(j)
    out_vertices = sorted([(v[0]-x, v[1]-z) for i,v in enumerate(out_vertices) if i not in to_remove], key = lambda p: get_angle((p[0],p[1])))
    return out_vertices

def sort_points(pts, center):
    x, z = center
    sorted_pts = sorted([(i, (v[0]-x, v[1]-z)) for i,v in enumerate(pts)], key = lambda p: get_angle((p[1][0],p[1][1])))
    idx, _ = zip(*sorted_pts)
    return [pts[i] for i in idx]

def compute_intersection_point(pt1, pt2, line1):
    if pt1[0] == pt2[0]:
        slope = np.inf
    else:
        slope = (pt1[1]-pt2[1])/(pt1[0] - pt2[0])
    if np.isinf(slope):
        line2 = (1, 0, pt1[0])
    else:
        line2 = (slope, -1, pt1[0]*slope-pt1[1])
    # print("Line1:", line1)
    # print("Line2:", line2)
    if line1[1] == 0:
        x = line1[2]/line1[0]
        y = (line2[2] - line2[0]*x)/line2[1]
    elif line1[0] == 0:
        y = line1[2]/line1[1]
        x = (line2[2] - line2[1]*y)/line2[0]
    elif line2[1] == 0:
        x = pt1[0]
        y = (line1[2]-x*line1[0])/line1[1]
    else:
        tmp_line = (line2 - line1*(line2[1]/line1[1]))
        x = tmp_line[2]/tmp_line[0]
        y = (line2[2] - line2[0]*x)/line2[1]
    return (x,y)

def point_inside_edge(pt, edge):
    lhs = pt[0]*edge[0] + pt[1]*edge[1]
    if lhs < edge[2] - 1e-6:
        return True
    else:
        return False


def iou_matrix_3d(objs, hyps, max_iou=1.):
    """Computes 'intersection over union (IoU)' distance matrix between object and hypothesis rectangles.

    The IoU is computed as

        IoU(a,b) = 1. - isect(a, b) / union(a, b)

    where isect(a,b) is the area of intersection of two rectangles and union(a, b) the area of union. The
    IoU is bounded between zero and one. 0 when the rectangles overlap perfectly and 1 when the overlap is
    zero.

    Params
    ------
    objs : Nx4 array
        Object rectangles (x,y,w,h) in rows
    hyps : Kx4 array
        Hypothesis rectangles (x,y,w,h) in rows

    Kwargs
    ------
    max_iou : float
        Maximum tolerable overlap distance. Object / hypothesis points
        with larger distance are set to np.nan signalling do-not-pair. Defaults
        to 0.5

    Returns
    -------
    C : NxK array
        Distance matrix containing pairwise distances or np.nan.
    """

    objs = np.atleast_2d(objs).astype(float)
    hyps = np.atleast_2d(hyps).astype(float)

    if objs.size == 0 or hyps.size == 0:
        return np.empty((0,0))
    assert objs.shape[1] == 7
    assert hyps.shape[1] == 7

    C = np.empty((objs.shape[0], hyps.shape[0]))
    for o in range(objs.shape[0]):
        for h in range(hyps.shape[0]):
            base_area = find_area(clip_polygon(objs[o], hyps[h]))
            height = max(objs[o][1], hyps[h][1]) - min(objs[o][1] - objs[o][4], hyps[h][1]-hyps[h][4])
            intersect = base_area*height
            union = objs[o][3]*objs[o][4]*objs[o][5] + hyps[h][3]*hyps[h][4]*hyps[h][5] - intersect
            if union != 0:
                C[o, h] = 1. - intersect / union
            else:
                C[o, h] = np.nan
    C[C > max_iou] = np.nan
    return C


================================================
FILE: src/evaluation/distances.py
================================================
"""py-motmetrics - metrics for multiple object tracker (MOT) benchmarking.

Christoph Heindl, 2017
https://github.com/cheind/py-motmetrics
"""

import numpy as np
import pdb

def norm2squared_matrix(objs, hyps, max_d2=float('inf')):
    """Computes the squared Euclidean distance matrix between object and hypothesis points.

    Params
    ------
    objs : NxM array
        Object points of dim M in rows
    hyps : KxM array
        Hypothesis points of dim M in rows

    Kwargs
    ------
    max_d2 : float
        Maximum tolerable squared Euclidean distance. Object / hypothesis points
        with larger distance are set to np.nan signalling do-not-pair. Defaults
        to +inf

    Returns
    -------
    C : NxK array
        Distance matrix containing pairwise distances or np.nan.
    """

    objs = np.atleast_2d(objs).astype(float)
    hyps = np.atleast_2d(hyps).astype(float)

    if objs.size == 0 or hyps.size == 0:
        return np.empty((0,0))

    assert hyps.shape[1] == objs.shape[1], "Dimension mismatch"

    C = np.empty((objs.shape[0], hyps.shape[0]))

    for o in range(objs.shape[0]):
        for h in range(hyps.shape[0]):
            e = objs[o] - hyps[h]
            C[o, h] = e.dot(e)

    C[C > max_d2] = np.nan
    return C


def iou_matrix(objs, hyps, max_iou=1.):
    """Computes 'intersection over union (IoU)' distance matrix between object and hypothesis rectangles.

    The IoU is computed as

        IoU(a,b) = 1. - isect(a, b) / union(a, b)

    where isect(a,b) is the area of intersection of two rectangles and union(a, b) the area of union. The
    IoU is bounded between zero and one. 0 when the rectangles overlap perfectly and 1 when the overlap is
    zero.

    Params
    ------
    objs : Nx4 array
        Object rectangles (x,y,w,h) in rows
    hyps : Kx4 array
        Hypothesis rectangles (x,y,w,h) in rows

    Kwargs
    ------
    max_iou : float
        Maximum tolerable overlap distance. Object / hypothesis points
        with larger distance are set to np.nan signalling do-not-pair. Defaults
        to 0.5

    Returns
    -------
    C : NxK array
        Distance matrix containing pairwise distances or np.nan.
    """

    objs = np.atleast_2d(objs).astype(float)
    hyps = np.atleast_2d(hyps).astype(float)

    if objs.size == 0 or hyps.size == 0:
        return np.empty((0,0))

    assert objs.shape[1] == 4
    assert hyps.shape[1] == 4

    br_objs = objs[:, :2] + objs[:, 2:]
    br_hyps = hyps[:, :2] + hyps[:, 2:]

    C = np.empty((objs.shape[0], hyps.shape[0]))

    for o in range(objs.shape[0]):
        for h in range(hyps.shape[0]):
            isect_xy = np.maximum(objs[o, :2], hyps[h, :2])
            isect_wh = np.maximum(np.minimum(br_objs[o], br_hyps[h]) - isect_xy, 0)
            isect_a = isect_wh[0]*isect_wh[1]
            union_a = objs[o, 2]*objs[o, 3] + hyps[h, 2]*hyps[h, 3] - isect_a
            if union_a != 0:
                C[o, h] = 1. - isect_a / union_a
            else:
                C[o, h] = np.nan

    C[C > max_iou] = np.nan
    return C


def find_area(vertices):
    area = 0
    for i in range(len(vertices)):
        area += vertices[i][0]*(vertices[(i+1)%len(vertices)][1] - vertices[i-1][1])
    return 0.5*abs(area)

def get_angle(p):
    x, y = p
    angle = np.arctan2(y,x)
    if angle < 0:
        angle += np.pi*2
    return angle

def clip_polygon(box1, box2):
    #clips box 1 by the edges in box2
    x,y,z,l,h,w,theta = box2
    theta = -theta

    box2_edges = np.asarray([(-np.cos(theta), -np.sin(theta), l/2-x*np.cos(theta)-z*np.sin(theta)),
                    (-np.sin(theta), np.cos(theta), w/2-x*np.sin(theta)+z*np.cos(theta)),
                    (np.cos(theta), np.sin(theta), l/2+x*np.cos(theta)+z*np.sin(theta)),
                    (np.sin(theta), -np.cos(theta), w/2+x*np.sin(theta)-z*np.cos(theta))])
    x,y,z,l,h,w,theta = box1
    theta = -theta

    box1_vertices = [(x+l/2*np.cos(theta)-w/2*np.sin(theta), z+l/2*np.sin(theta)+w/2*np.cos(theta)),
                        (x+l/2*np.cos(theta)+w/2*np.sin(theta), z+l/2*np.sin(theta)-w/2*np.cos(theta)),
                        (x-l/2*np.cos(theta)-w/2*np.sin(theta), z-l/2*np.sin(theta)+w/2*np.cos(theta)),
                        (x-l/2*np.cos(theta)+w/2*np.sin(theta), z-l/2*np.sin(theta)-w/2*np.cos(theta))]
    out_vertices = sort_points(box1_vertices, (x, z))
    for edge in box2_edges:
        vertex_list = out_vertices.copy()
        out_vertices = []
        for idx, current_vertex in enumerate(vertex_list):
            previous_vertex = vertex_list[idx-1]
            if point_inside_edge(current_vertex, edge):
                if not point_inside_edge(previous_vertex, edge):
                    out_vertices.append(compute_intersection_point(previous_vertex, current_vertex, edge))
                out_vertices.append(current_vertex)
            elif point_inside_edge(previous_vertex, edge):
                out_vertices.append(compute_intersection_point(previous_vertex, current_vertex, edge))
    to_remove = []
    for i in range(len(out_vertices)):
        if i in to_remove:
            continue
        for j in range(i+1, len(out_vertices)):
            if abs(out_vertices[i][0] - out_vertices[j][0]) < 1e-6 and abs(out_vertices[i][1] - out_vertices[j][1]) < 1e-6:
                to_remove.append(j)
    out_vertices = sorted([(v[0]-x, v[1]-z) for i,v in enumerate(out_vertices) if i not in to_remove], key = lambda p: get_angle((p[0],p[1])))
    return out_vertices

def sort_points(pts, center):
    x, z = center
    sorted_pts = sorted([(i, (v[0]-x, v[1]-z)) for i,v in enumerate(pts)], key = lambda p: get_angle((p[1][0],p[1][1])))
    idx, _ = zip(*sorted_pts)
    return [pts[i] for i in idx]

def compute_intersection_point(pt1, pt2, line1):
    if pt1[0] == pt2[0]:
        slope = np.inf
    else:
        slope = (pt1[1]-pt2[1])/(pt1[0] - pt2[0])
    if np.isinf(slope):
        line2 = (1, 0, pt1[0])
    else:
        line2 = (slope, -1, pt1[0]*slope-pt1[1])
    # print("Line1:", line1)
    # print("Line2:", line2)
    if line1[1] == 0:
        x = line1[2]/line1[0]
        y = (line2[2] - line2[0]*x)/line2[1]
    elif line1[0] == 0:
        y = line1[2]/line1[1]
        x = (line2[2] - line2[1]*y)/line2[0]
    elif line2[1] == 0:
        x = pt1[0]
        y = (line1[2]-x*line1[0])/line1[1]
    else:
        tmp_line = (line2 - line1*(line2[1]/line1[1]))
        x = tmp_line[2]/tmp_line[0]
        y = (line2[2] - line2[0]*x)/line2[1]
    return (x,y)

def point_inside_edge(pt, edge):
    lhs = pt[0]*edge[0] + pt[1]*edge[1]
    if lhs < edge[2] - 1e-6:
        return True
    else:
        return False


def iou_matrix_3d(objs, hyps, max_iou=1.):
    """Computes 'intersection over union (IoU)' distance matrix between object and hypothesis rectangles.

    The IoU is computed as

        IoU(a,b) = 1. - isect(a, b) / union(a, b)

    where isect(a,b) is the area of intersection of two rectangles and union(a, b) the area of union. The
    IoU is bounded between zero and one. 0 when the rectangles overlap perfectly and 1 when the overlap is
    zero.

    Params
    ------
    objs : Nx4 array
        Object rectangles (x,y,w,h) in rows
    hyps : Kx4 array
        Hypothesis rectangles (x,y,w,h) in rows

    Kwargs
    ------
    max_iou : float
        Maximum tolerable overlap distance. Object / hypothesis points
        with larger distance are set to np.nan signalling do-not-pair. Defaults
        to 0.5

    Returns
    -------
    C : NxK array
        Distance matrix containing pairwise distances or np.nan.
    """

    objs = np.atleast_2d(objs).astype(float)
    hyps = np.atleast_2d(hyps).astype(float)

    if objs.size == 0 or hyps.size == 0:
        return np.empty((0,0))
    assert objs.shape[1] == 7
    assert hyps.shape[1] == 7

    C = np.empty((objs.shape[0], hyps.shape[0]))
    for o in range(objs.shape[0]):
        for h in range(hyps.shape[0]):
            base_area = find_area(clip_polygon(objs[o], hyps[h]))
            height = max(objs[o][1], hyps[h][1]) - min(objs[o][1] - objs[o][4], hyps[h][1]-hyps[h][4])
            intersect = base_area*height
            union = objs[o][3]*objs[o][4]*objs[o][5] + hyps[h][3]*hyps[h][4]*hyps[h][5] - intersect
            if union != 0:
                C[o, h] = 1. - intersect / union
            else:
                C[o, h] = np.nan
    C[C > max_iou] = np.nan
    return C


================================================
FILE: src/featurepointnet_model.py
================================================
import os, pdb
import numpy as np
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)
import configparser

import featurepointnet_tf_util as tf_util
import featurepointnet_model_util as model_util
from calibration import Calibration, OmniCalibration

batch_size = 6 #TODO: Update if needed?

class FPointNet():
    def __init__(self, config_path):
        parser = configparser.SafeConfigParser()
        parser.read(config_path)
        self.num_point = parser.getint('general', 'num_point')
        self.model_path = parser.get('general', 'model_path')

        with tf.device('/gpu:'+str('0')):
            pointclouds_pl, one_hot_vec_pl, labels_pl, centers_pl, \
            heading_class_label_pl, heading_residual_label_pl, \
            size_class_label_pl, size_residual_label_pl = model_util.placeholder_inputs(batch_size, self.num_point)
            is_training_pl = tf.placeholder(tf.bool, shape=())
            end_points, depth_feature = self.get_model(pointclouds_pl, one_hot_vec_pl, is_training_pl)
            self.object_pointcloud = tf.placeholder(tf.float32, shape=(None, None, 3))
            #depth_feature = self.get_depth_feature_op(is_training_pl)
            loss = model_util.get_loss(labels_pl, centers_pl, heading_class_label_pl, heading_residual_label_pl, size_class_label_pl, size_residual_label_pl, end_points)
            self.saver = tf.train.Saver()

        # Create a session
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        config.allow_soft_placement = True
        self.sess = tf.Session(config=config)

        #Initialize variables
        self.sess.run(tf.global_variables_initializer())
        # Restore variables from disk.
        self.saver.restore(self.sess, self.model_path)
        self.ops = {'pointclouds_pl': pointclouds_pl,
               'one_hot_vec_pl': one_hot_vec_pl,
               'labels_pl': labels_pl,
               'centers_pl': centers_pl,
               'heading_class_label_pl': heading_class_label_pl,
               'heading_residual_label_pl': heading_residual_label_pl,
               'size_class_label_pl': size_class_label_pl,
               'size_residual_label_pl': size_residual_label_pl,
               'is_training_pl': is_training_pl,
               'logits': end_points['mask_logits'],
               'center': end_points['center'],
               'end_points': end_points,
               'depth_feature':depth_feature,
               'loss': loss}

    # @profile
    def __call__(self, input_point_cloud, rot_angle, peds=False):
        '''
        one_hot_vec = np.zeros((batch_size, 3))
        feed_dict = {self.pointclouds_pl: input_point_cloud,
                     self.one_hot_vec_pl: one_hot_vec,
                     self.is_training_pl: False}
        features = self.sess.run(self.feature,feed_dict=feed_dict)
        return features '''

        ''' Run inference for frustum pointnets in batch mode '''
        
        one_hot_vec = np.zeros((batch_size,3))
        if peds:
            one_hot_vec[:, 1] = 1
        num_batches = input_point_cloud.shape[0]//batch_size + 1
        num_inputs = input_point_cloud.shape[0]
        if input_point_cloud.shape[0]%batch_size !=0:
            input_point_cloud = np.vstack([input_point_cloud, np.zeros((batch_size - input_point_cloud.shape[0]%batch_size, self.num_point, 4))])
        else:
            num_batches -= 1
        logits = np.zeros((input_point_cloud.shape[0], input_point_cloud.shape[1], 2))
        centers = np.zeros((input_point_cloud.shape[0], 3))
        heading_logits = np.zeros((input_point_cloud.shape[0], model_util.NUM_HEADING_BIN))
        heading_residuals = np.zeros((input_point_cloud.shape[0], model_util.NUM_HEADING_BIN))
        size_logits = np.zeros((input_point_cloud.shape[0], model_util.NUM_SIZE_CLUSTER))
        size_residuals = np.zeros((input_point_cloud.shape[0], model_util.NUM_SIZE_CLUSTER, 3))
        scores = np.zeros((input_point_cloud.shape[0],)) # 3D box score 
        features = np.zeros((input_point_cloud.shape[0], 512))
        
        for i in range(num_batches):    
            ep = self.ops['end_points'] 
            feed_dict = {\
                self.ops['pointclouds_pl']: input_point_cloud[i*batch_size: (i+1)*batch_size],
                self.ops['one_hot_vec_pl']: one_hot_vec,
                self.ops['is_training_pl']: False}

            batch_centers, \
            batch_heading_scores, batch_heading_residuals, \
            batch_size_scores, batch_size_residuals, batch_features = \
                self.sess.run([self.ops['center'],
                    ep['heading_scores'], ep['heading_residuals'],
                    ep['size_scores'], ep['size_residuals'], self.ops['depth_feature']],
                    feed_dict=feed_dict)

            # logits[i*batch_size: (i+1)*batch_size] = batch_logits
            centers[i*batch_size: (i+1)*batch_size] = batch_centers
            heading_logits[i*batch_size: (i+1)*batch_size] = batch_heading_scores
            heading_residuals[i*batch_size: (i+1)*batch_size] = batch_heading_residuals
            size_logits[i*batch_size: (i+1)*batch_size] = batch_size_scores
            size_residuals[i*batch_size: (i+1)*batch_size] = batch_size_residuals
            features[i*batch_size: (i+1)*batch_size] = batch_features[:,0,:]
        heading_cls = np.argmax(heading_logits, 1) # B
        size_cls = np.argmax(size_logits, 1) # B
        heading_res = np.vstack([heading_residuals[i, heading_cls[i]] for i in range(heading_cls.shape[0])])
        size_res = np.vstack([size_residuals[i, size_cls[i], :] for i in range(size_cls.shape[0])])

        #TODO: Make this accept batches if wanted
        boxes = []
        for i in range(num_inputs):
            box = np.array(model_util.from_prediction_to_label_format(centers[i], heading_cls[i], heading_res[i], size_cls[i], size_res[i], rot_angle[i]))
            box[6] = np.squeeze(box[6])
            swp = box[5]
            box[5] = box[4]
            box[4] = swp
            boxes.append(box)       
        boxes = np.vstack(boxes)
        return boxes, scores[:num_inputs], features[:num_inputs]


    def get_instance_seg_v1_net(self, point_cloud, one_hot_vec, is_training, bn_decay, end_points):
        ''' 3D instance segmentation PointNet v1 network.
        Input:
            point_cloud: TF tensor in shape (B,N,4)
                frustum point clouds with XYZ and intensity in point channels
                XYZs are in frustum coordinate
            one_hot_vec: TF tensor in shape (B,3)
                length-3 vectors indicating predicted object type
            is_training: TF boolean scalar
            bn_decay: TF float scalar
            end_points: dict
        Output:
            logits: TF tensor in shape (B,N,2), scores for bkg/clutter and object
            end_points: dict
        '''
        num_point = point_cloud.get_shape()[1].value

        net = tf.expand_dims(point_cloud, 2)

        net = tf_util.conv2d(net, 64, [1,1],
                             padding='VALID', stride=[1,1],
                             bn=True, is_training=is_training,
                             scope='conv1', bn_decay=bn_decay)
        net = tf_util.conv2d(net, 64, [1,1],
                             padding='VALID', stride=[1,1],
                             bn=True, is_training=is_training,
                             scope='conv2', bn_decay=bn_decay)
        point_feat = tf_util.conv2d(net, 64, [1,1],
                             padding='VALID', stride=[1,1],
                             bn=True, is_training=is_training,
                             scope='conv3', bn_decay=bn_decay)
        net = tf_util.conv2d(point_feat, 128, [1,1],
                             padding='VALID', stride=[1,1],
                             bn=True, is_training=is_training,
                             scope='conv4', bn_decay=bn_decay)
        net = tf_util.conv2d(net, 1024, [1,1],
                             padding='VALID', stride=[1,1],
                             bn=True, is_training=is_training,
                             scope='conv5', bn_decay=bn_decay)

        global_feat = tf_util.max_pool2d(net, [num_point,1],
                                         padding='VALID', scope='maxpool')

        global_feat = tf.concat([global_feat, tf.expand_dims(tf.expand_dims(one_hot_vec, 1), 1)], axis=3)
        global_feat_expand = tf.tile(global_feat, [1, num_point, 1, 1])
        concat_feat = tf.concat(axis=3, values=[point_feat, global_feat_expand])

        net = tf_util.conv2d(concat_feat, 512, [1,1],
                             padding='VALID', stride=[1,1],
                             bn=True, is_training=is_training,
                             scope='conv6', bn_decay=bn_decay)
        net = tf_util.conv2d(net, 256, [1,1],
                             padding='VALID', stride=[1,1],
                             bn=True, is_training=is_training,
                             scope='conv7', bn_decay=bn_decay)
        net = tf_util.conv2d(net, 128, [1,1],
                             padding='VALID', stride=[1,1],
                             bn=True, is_training=is_training,
                             scope='conv8', bn_decay=bn_decay)
        net = tf_util.conv2d(net, 128, [1,1],
                             padding='VALID', stride=[1,1],
                             bn=True, is_training=is_training,
                             scope='conv9', bn_decay=bn_decay)
        net = tf_util.dropout(net, is_training, 'dp1', keep_prob=0.5)

        logits = tf_util.conv2d(net, 2, [1,1],
                             padding='VALID', stride=[1,1], activation_fn=None,
                             scope='conv10')
        logits = tf.squeeze(logits, [2]) # BxNxC
        return logits, end_points
     
    def get_3d_box_estimation_v1_net(self, object_point_cloud, one_hot_vec,is_training, bn_decay, end_points):
        ''' 3D Box Estimation PointNet v1 network.
        Input:
            object_point_cloud: TF tensor in shape (B,M,C)
                point clouds in object coordinate
            one_hot_vec: TF tensor in shape (B,3)
                length-3 vectors indicating predicted object type
        Output:
            output: TF tensor in shape (B,3+NUM_HEADING_BIN*2+NUM_SIZE_CLUSTER*4)
                including box centers, heading bin class scores and residuals,
                and size cluster scores and residuals
        ''' 
        num_point = object_point_cloud.get_shape()[1].value
        net = tf.expand_dims(object_point_cloud, 2)
        net = tf_util.conv2d(net, 128, [1,1],
                             padding='VALID', stride=[1,1],
                             bn=True, is_training=is_training,
                             scope='conv-reg1', bn_decay=bn_decay)
        net = tf_util.conv2d(net, 128, [1,1],
                             padding='VALID', stride=[1,1],
                             bn=True, is_training=is_training,
                             scope='conv-reg2', bn_decay=bn_decay)
        net = tf_util.conv2d(net, 256, [1,1],
                             padding='VALID', stride=[1,1],
                             bn=True, is_training=is_training,
                             scope='conv-reg3', bn_decay=bn_decay)
        net = tf_util.conv2d(net, 512, [1,1],
                             padding='VALID', stride=[1,1],
                             bn=True, is_training=is_training,
                             scope='conv-reg4', bn_decay=bn_decay)

        features = tf.reduce_max(net, axis = 1)

        net = tf_util.max_pool2d(net, [num_point,1],
            padding='VALID', scope='maxpool2')
        net = tf.squeeze(net, axis=[1,2])
        net = tf.concat([net, one_hot_vec], axis=1)
        net = tf_util.fully_connected(net, 512, scope='fc1', bn=True,
            is_training=is_training, bn_decay=bn_decay)
        net = tf_util.fully_connected(net, 256, scope='fc2', bn=True,
            is_training=is_training, bn_decay=bn_decay)

        # The first 3 numbers: box center coordinates (cx,cy,cz),
        # the next NUM_HEADING_BIN*2:  heading bin class scores and bin residuals
        # next NUM_SIZE_CLUSTER*4: box cluster scores and residuals
        output = tf_util.fully_connected(net,
            3+model_util.NUM_HEADING_BIN*2+model_util.NUM_SIZE_CLUSTER*4, activation_fn=None, scope='fc3')
        return output, end_points, features

    def get_model(self, point_cloud, one_hot_vec, is_training, bn_decay=None):
        ''' Frustum PointNets model. The model predict 3D object masks and
        amodel bounding boxes for objects in frustum point clouds.
        Input:
            point_cloud: TF tensor in shape (B,N,4)
                frustum point clouds with XYZ and intensity in point channels
                XYZs are in frustum coordinate
            one_hot_vec: TF tensor in shape (B,3)
                length-3 vectors indicating predicted object type
            is_training: TF boolean scalar
            bn_decay: TF float scalar
        Output:
            end_points: dict (map from name strings to TF tensors)
        '''
        end_points = {}
        
        # 3D Instance Segmentation PointNet
        logits, end_points = self.get_instance_seg_v1_net(\
            point_cloud, one_hot_vec,
            is_training, bn_decay, end_points)
        end_points['mask_logits'] = logits

        # Masking
        # select masked points and translate to masked points' centroid
        object_point_cloud_xyz, mask_xyz_mean, end_points = \
            model_util.point_cloud_masking(point_cloud, logits, end_points)

        # T-Net and coordinate translation
        center_delta, end_points = model_util.get_center_regression_net(\
            object_point_cloud_xyz, one_hot_vec,
            is_training, bn_decay, end_points)
        stage1_center = center_delta + mask_xyz_mean # Bx3
        end_points['stage1_center'] = stage1_center
        # Get object point cloud in object coordinate
        object_point_cloud_xyz_new = \
            object_point_cloud_xyz - tf.expand_dims(center_delta, 1)

        # Amodel Box Estimation PointNet
        output, end_points, features = self.get_3d_box_estimation_v1_net(\
            object_point_cloud_xyz_new, one_hot_vec,
            is_training, bn_decay, end_points)

        # Parse output to 3D box parameters
        end_points = model_util.parse_output_to_tensors(output, end_points)
        end_points['center'] = end_points['center_boxnet'] + stage1_center # Bx3

        return end_points, features
    
    def get_depth_feature_op(self, is_training):

        net = tf.expand_dims(self.object_pointcloud, 2)
        net = tf_util.conv2d(net, 128, [1,1],
                             padding='VALID', stride=[1,1],
                             bn=True, is_training=is_training,
                             scope='conv-reg1', bn_decay=None)
        net = tf_util.conv2d(net, 128, [1,1],
                             padding='VALID', stride=[1,1],
                             bn=True, is_training=is_training,
                             scope='conv-reg2', bn_decay=None)
        net = tf_util.conv2d(net, 256, [1,1],
                             padding='VALID', stride=[1,1],
                             bn=True, is_training=is_training,
                             scope='conv-reg3', bn_decay=None)
        net = tf_util.conv2d(net, 512, [1,1],
                             padding='VALID', stride=[1,1],
                             bn=True, is_training=is_training,
                             scope='conv-reg4', bn_decay=None)
        net = tf.reduce_max(net, axis = 1)
        
        return net

    def get_depth_feature(self, object_pointcloud):
        
        feed_dict = {self.object_pointcloud:object_pointcloud, self.ops['is_training_pl']:False}
        depth_feature = self.sess.run([self.ops['depth_feature']], feed_dict = feed_dict)
        return depth_feature

    def softmax(self, x):
        ''' Numpy function for softmax'''
        shape = x.shape
        probs = np.exp(x - np.max(x, axis=len(shape)-1, keepdims=True))
        probs /= np.sum(probs, axis=len(shape)-1, keepdims=True)
        return probs

def create_depth_model(model, config_path):
    #Note that folder path must be the folder containing the config.yaml file if omni_camera is True
    if model == 'FPointNet':
        return FPointNet(config_path)
    elif model == 'PointNet':
        return PointNet(config_path)


================================================
FILE: src/featurepointnet_model_util.py
================================================
# import open3d as o3d
import numpy as np
import tensorflow as tf
import os
import sys
import torch
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(BASE_DIR)
import featurepointnet_tf_util as tf_util

# -----------------
# Global Constants
# -----------------

NUM_HEADING_BIN = 12
NUM_SIZE_CLUSTER = 8 # one cluster for each type
NUM_OBJECT_POINT = 512
g_type2class={'Car':0, 'Van':1, 'Truck':2, 'Pedestrian':3,
              'Person_sitting':4, 'Cyclist':5, 'Tram':6, 'Misc':7}
g_class2type = {g_type2class[t]:t for t in g_type2class}
g_type2onehotclass = {'Car': 0, 'Pedestrian': 1, 'Cyclist': 2}
#Added 0.5m and 0.2m for car and pedestrian to make boxes slightly bigger
g_type_mean_size = {'Car': np.array([3.88311640418,1.62856739989,1.52563191462]),
                    'Van': np.array([5.06763659,1.9007158,2.20532825]),
                    'Truck': np.array([10.13586957,2.58549199,3.2520595]),
                    'Pedestrian': np.array([0.84422524,0.66068622,1.76255119]),
                    'Person_sitting': np.array([0.80057803,0.5983815,1.27450867]),
                    'Cyclist': np.array([1.76282397,0.59706367,1.73698127]),
                    'Tram': np.array([16.17150617,2.53246914,3.53079012]),
                    'Misc': np.array([3.64300781,1.54298177,1.92320313])}
g_mean_size_arr = np.zeros((NUM_SIZE_CLUSTER, 3)) # size clustrs
for i in range(NUM_SIZE_CLUSTER):
    g_mean_size_arr[i,:] = g_type_mean_size[g_class2type[i]]

# -----------------
# TF Functions Helpers
# -----------------

def tf_gather_object_pc(point_cloud, mask, npoints=512):
    ''' Gather object point clouds according to predicted masks.
    Input:
        point_cloud: TF tensor in shape (B,N,C)
        mask: TF tensor in shape (B,N) of 0 (not pick) or 1 (pick)
        npoints: int scalar, maximum number of points to keep (default: 512)
    Output:
        object_pc: TF tensor in shape (B,npoint,C)
        indices: TF int tensor in shape (B,npoint,2)
    '''
    def mask_to_indices(mask):
        indices = np.zeros((mask.shape[0], npoints, 2), dtype=np.int32)
        for i in range(mask.shape[0]):
            pos_indices = np.where(mask[i,:]>0.5)[0]
            # skip cases when pos_indices is empty
            if len(pos_indices) > 0: 
                if len(pos_indices) > npoints:
                    choice = np.random.choice(len(pos_indices),
                        npoints, replace=False)
                else:
                    choice = np.random.choice(len(pos_indices),
                        npoints-len(pos_indices), replace=True)
                    choice = np.concatenate((np.arange(len(pos_indices)), choice))
                np.random.shuffle(choice)
                indices[i,:,1] = pos_indices[choice]
            indices[i,:,0] = i
        return indices

    indices = tf.py_func(mask_to_indices, [mask], tf.int32)  
    object_pc = tf.gather_nd(point_cloud, indices)
    return object_pc, indices


def get_box3d_corners_helper(centers, headings, sizes):
    """ TF layer. Input: (N,3), (N,), (N,3), Output: (N,8,3) """
    #print '-----', centers
    N = centers.get_shape()[0].value
    l = tf.slice(sizes, [0,0], [-1,1]) # (N,1)
    w = tf.slice(sizes, [0,1], [-1,1]) # (N,1)
    h = tf.slice(sizes, [0,2], [-1,1]) # (N,1)
    #print l,w,h
    x_corners = tf.concat([l/2,l/2,-l/2,-l/2,l/2,l/2,-l/2,-l/2], axis=1) # (N,8)
    y_corners = tf.concat([h/2,h/2,h/2,h/2,-h/2,-h/2,-h/2,-h/2], axis=1) # (N,8)
    z_corners = tf.concat([w/2,-w/2,-w/2,w/2,w/2,-w/2,-w/2,w/2], axis=1) # (N,8)
    corners = tf.concat([tf.expand_dims(x_corners,1), tf.expand_dims(y_corners,1), tf.expand_dims(z_corners,1)], axis=1) # (N,3,8)
    #print x_corners, y_corners, z_corners
    c = tf.cos(headings)
    s = tf.sin(headings)
    ones = tf.ones([N], dtype=tf.float32)
    zeros = tf.zeros([N], dtype=tf.float32)
    row1 = tf.stack([c,zeros,s], axis=1) # (N,3)
    row2 = tf.stack([zeros,ones,zeros], axis=1)
    row3 = tf.stack([-s,zeros,c], axis=1)
    R = tf.concat([tf.expand_dims(row1,1), tf.expand_dims(row2,1), tf.expand_dims(row3,1)], axis=1) # (N,3,3)
    #print row1, row2, row3, R, N
    corners_3d = tf.matmul(R, corners) # (N,3,8)
    corners_3d += tf.tile(tf.expand_dims(centers,2), [1,1,8]) # (N,3,8)
    corners_3d = tf.transpose(corners_3d, perm=[0,2,1]) # (N,8,3)
    return corners_3d

def get_box3d_corners(center, heading_residuals, size_residuals):
    """ TF layer.
    Inputs:
        center: (B,3)
        heading_residuals: (B,NH)
        size_residuals: (B,NS,3)
    Outputs:
        box3d_corners: (B,NH,NS,8,3) tensor
    """
    batch_size = center.get_shape()[0].value
    heading_bin_centers = tf.constant(np.arange(0,2*np.pi,2*np.pi/NUM_HEADING_BIN), dtype=tf.float32) # (NH,)
    headings = heading_residuals + tf.expand_dims(heading_bin_centers, 0) # (B,NH)
    
    mean_sizes = tf.expand_dims(tf.constant(g_mean_size_arr, dtype=tf.float32), 0) + size_residuals # (B,NS,1)
    sizes = mean_sizes + size_residuals # (B,NS,3)
    sizes = tf.tile(tf.expand_dims(sizes,1), [1,NUM_HEADING_BIN,1,1]) # (B,NH,NS,3)
    headings = tf.tile(tf.expand_dims(headings,-1), [1,1,NUM_SIZE_CLUSTER]) # (B,NH,NS)
    centers = tf.tile(tf.expand_dims(tf.expand_dims(center,1),1), [1,NUM_HEADING_BIN, NUM_SIZE_CLUSTER,1]) # (B,NH,NS,3)

    N = batch_size*NUM_HEADING_BIN*NUM_SIZE_CLUSTER
    corners_3d = get_box3d_corners_helper(tf.reshape(centers, [N,3]), tf.reshape(headings, [N]), tf.reshape(sizes, [N,3]))

    return tf.reshape(corners_3d, [batch_size, NUM_HEADING_BIN, NUM_SIZE_CLUSTER, 8, 3])


def huber_loss(error, delta):
    abs_error = tf.abs(error)
    quadratic = tf.minimum(abs_error, delta)
    linear = (abs_error - quadratic)
    losses = 0.5 * quadratic**2 + delta * linear
    return tf.reduce_mean(losses)


def parse_output_to_tensors(output, end_points):
    ''' Parse batch output to separate tensors (added to end_points)
    Input:
        output: TF tensor in shape (B,3+2*NUM_HEADING_BIN+4*NUM_SIZE_CLUSTER)
        end_points: dict
    Output:
        end_points: dict (updated)
    '''
    batch_size = output.get_shape()[0].value
    center = tf.slice(output, [0,0], [-1,3])
    end_points['center_boxnet'] = center

    heading_scores = tf.slice(output, [0,3], [-1,NUM_HEADING_BIN])
    heading_residuals_normalized = tf.slice(output, [0,3+NUM_HEADING_BIN],
        [-1,NUM_HEADING_BIN])
    end_points['heading_scores'] = heading_scores # BxNUM_HEADING_BIN
    end_points['heading_residuals_normalized'] = \
        heading_residuals_normalized # BxNUM_HEADING_BIN (-1 to 1)
    end_points['heading_residuals'] = \
        heading_residuals_normalized * (np.pi/NUM_HEADING_BIN) # BxNUM_HEADING_BIN
    
    size_scores = tf.slice(output, [0,3+NUM_HEADING_BIN*2],
        [-1,NUM_SIZE_CLUSTER]) # BxNUM_SIZE_CLUSTER
    size_residuals_normalized = tf.slice(output,
        [0,3+NUM_HEADING_BIN*2+NUM_SIZE_CLUSTER], [-1,NUM_SIZE_CLUSTER*3])
    size_residuals_normalized = tf.reshape(size_residuals_normalized,
        [batch_size, NUM_SIZE_CLUSTER, 3]) # BxNUM_SIZE_CLUSTERx3
    end_points['size_scores'] = size_scores
    end_points['size_residuals_normalized'] = size_residuals_normalized
    end_points['size_residuals'] = size_residuals_normalized * \
        tf.expand_dims(tf.constant(g_mean_size_arr, dtype=tf.float32), 0)

    return end_points

# -----------------
# Box Parsing Helpers
# -----------------

def from_prediction_to_label_format(center, angle_class, angle_res,\
                                    size_class, size_res, rot_angle):
    ''' Convert predicted box parameters to label format. '''
    l,w,h = class2size(size_class, size_res)
    ry = class2angle(angle_class, angle_res, NUM_HEADING_BIN) + rot_angle
    tx,ty,tz = rotate_pc_along_y(np.expand_dims(center,0),-rot_angle).squeeze()
    ty += h/2.0
    return tx,ty,tz,l,w,h,ry

def size2class(size, type_name):
    ''' Convert 3D bounding box size to template class and residuals.
    todo (rqi): support multiple size clusters per type.
 
    Input:
        size: numpy array of shape (3,) for (l,w,h)
        type_name: string
    Output:
        size_class: int scalar
        size_residual: numpy array of shape (3,)
    '''
    size_class = g_type2class[type_name]
    size_residual = size - g_type_mean_size[type_name]
    return size_class, size_residual

def class2size(pred_cls, residual):
    ''' Inverse function to size2class. '''
    mean_size = g_type_mean_size[g_class2type[pred_cls]]
    return mean_size + residual

def angle2class(angle, num_class):
    ''' Convert continuous angle to discrete class and residual.
    Input:
        angle: rad scalar, from 0-2pi (or -pi~pi), class center at
            0, 1*(2pi/N), 2*(2pi/N) ...  (N-1)*(2pi/N)
        num_class: int scalar, number of classes N
    Output:
        class_id, int, among 0,1,...,N-1
        residual_angle: float, a number such that
            class*(2pi/N) + residual_angle = angle
    '''
    angle = angle%(2*np.pi)
    assert(angle>=0 and angle<=2*np.pi)
    angle_per_class = 2*np.pi/float(num_class)
    shifted_angle = (angle+angle_per_class/2)%(2*np.pi)
    class_id = int(shifted_angle/angle_per_class)
    residual_angle = shifted_angle - \
        (class_id * angle_per_class + angle_per_class/2)
    return class_id, residual_angle

def class2angle(pred_cls, residual, num_class, to_label_format=True):
    ''' Inverse function to angle2class.
    If to_label_format, adjust angle to the range as in labels.
    '''
    angle_per_class = 2*np.pi/float(num_class)
    angle_center = pred_cls * angle_per_class
    angle = angle_center + residual
    if to_label_format and angle>np.pi:
        angle = angle - 2*np.pi
    return angle

def rotate_pc_along_y(pc, rot_angle):
    '''
    Input:
        pc: numpy array (N,C), first 3 channels are XYZ
            z is facing forward, x is left ward, y is downward
        rot_angle: rad scalar
    Output:
        pc: updated pc with XYZ rotated
    '''
    cosval = np.cos(rot_angle)
    sinval = np.sin(rot_angle)
    rotmat = np.array([[cosval, -sinval],[sinval, cosval]])
    pc[:,[0,2]] = np.dot(pc[:,[0,2]], np.transpose(rotmat))
    return pc


def rotate_pc_along_y_torch(pc, rot_angle):
    '''
    Input:
        pc: numpy array (N,C), first 3 channels are XYZ
            z is facing forward, x is left ward, y is downward
        rot_angle: rad scalar
    Output:
        pc: updated pc with XYZ rotated
    '''
    rotmats = []
    for angle in rot_angle:
        cosval = np.cos(angle)
        sinval = np.sin(angle)
        rotmat = torch.Tensor([[cosval, sinval],[-sinval, cosval]]).type(pc.type())
        rotmats.append(rotmat)
    rotmats = torch.stack(rotmats, dim=0)
    pc[:, :,[0,2]] = torch.bmm(pc[:, :,[0,2]], rotmats)
    return pc

# --------------------------------------
# Shared subgraphs for v1 and v2 models
# --------------------------------------

def placeholder_inputs(batch_size, num_point):
    ''' Get useful placeholder tensors.
    Input:
        batch_size: scalar int
        num_point: scalar int
    Output:
        TF placeholders for inputs and ground truths
    '''
    pointclouds_pl = tf.placeholder(tf.float32,
        shape=(batch_size, num_point, 4))
    one_hot_vec_pl = tf.placeholder(tf.float32, shape=(batch_size, 3))

    # labels_pl is for segmentation label
    labels_pl = tf.placeholder(tf.int32, shape=(batch_size, num_point))
    centers_pl = tf.placeholder(tf.float32, shape=(batch_size, 3))
    heading_class_label_pl = tf.placeholder(tf.int32, shape=(batch_size,))
    heading_residual_label_pl = tf.placeholder(tf.float32, shape=(batch_size,))
    size_class_label_pl = tf.placeholder(tf.int32, shape=(batch_size,))
    size_residual_label_pl = tf.placeholder(tf.float32, shape=(batch_size,3))

    return pointclouds_pl, one_hot_vec_pl, labels_pl, centers_pl, \
        heading_class_label_pl, heading_residual_label_pl, \
        size_class_label_pl, size_residual_label_pl


def point_cloud_masking(point_cloud, logits, end_points, xyz_only=True):
    ''' Select point cloud with predicted 3D mask,
    translate coordinates to the masked points centroid.
    
    Input:
        point_cloud: TF tensor in shape (B,N,C)
        logits: TF tensor in shape (B,N,2)
        end_points: dict
        xyz_only: boolean, if True only return XYZ channels
    Output:
        object_point_cloud: TF tensor in shape (B,M,3)
            for simplicity we only keep XYZ here
            M = NUM_OBJECT_POINT as a hyper-parameter
        mask_xyz_mean: TF tensor in shape (B,3)
    '''
    batch_size = point_cloud.get_shape()[0].value
    num_point = point_cloud.get_shape()[1].value
    mask = tf.slice(logits,[0,0,0],[-1,-1,1]) < \
        tf.slice(logits,[0,0,1],[-1,-1,1])
    mask = tf.to_float(mask) # BxNx1
    mask_count = tf.tile(tf.reduce_sum(mask,axis=1,keep_dims=True),
        [1,1,3]) # Bx1x3
    point_cloud_xyz = tf.slice(point_cloud, [0,0,0], [-1,-1,3]) # BxNx3
    mask_xyz_mean = tf.reduce_sum(tf.tile(mask, [1,1,3])*point_cloud_xyz,
        axis=1, keep_dims=True) # Bx1x3
    mask = tf.squeeze(mask, axis=[2]) # BxN
    end_points['mask'] = mask
    mask_xyz_mean = mask_xyz_mean/tf.maximum(mask_count,1) # Bx1x3

    # Translate to masked points' centroid
    point_cloud_xyz_stage1 = point_cloud_xyz - \
        tf.tile(mask_xyz_mean, [1,num_point,1])

    if xyz_only:
        point_cloud_stage1 = point_cloud_xyz_stage1
    else:
        point_cloud_features = tf.slice(point_cloud, [0,0,3], [-1,-1,-1])
        point_cloud_stage1 = tf.concat(\
            [point_cloud_xyz_stage1, point_cloud_features], axis=-1)
    num_channels = point_cloud_stage1.get_shape()[2].value

    object_point_cloud, _ = tf_gather_object_pc(point_cloud_stage1,
        mask, NUM_OBJECT_POINT)
    object_point_cloud.set_shape([batch_size, NUM_OBJECT_POINT, num_channels])

    return object_point_cloud, tf.squeeze(mask_xyz_mean, axis=1), end_points


def get_center_regression_net(object_point_cloud, one_hot_vec,
                              is_training, bn_decay, end_points):
    ''' Regression network for center delta. a.k.a. T-Net.
    Input:
        object_point_cloud: TF tensor in shape (B,M,C)
            point clouds in 3D mask coordinate
        one_hot_vec: TF tensor in shape (B,3)
            length-3 vectors indicating predicted object type
    Output:
        predicted_center: TF tensor in shape (B,3)
    ''' 
    num_point = object_point_cloud.get_shape()[1].value
    net = tf.expand_dims(object_point_cloud, 2)
    net = tf_util.conv2d(net, 128, [1,1],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='conv-reg1-stage1', bn_decay=bn_decay)
    net = tf_util.conv2d(net, 128, [1,1],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='conv-reg2-stage1', bn_decay=bn_decay)
    net = tf_util.conv2d(net, 256, [1,1],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='conv-reg3-stage1', bn_decay=bn_decay)
    net = tf_util.max_pool2d(net, [num_point,1],
        padding='VALID', scope='maxpool-stage1')
    net = tf.squeeze(net, axis=[1,2])
    net = tf.concat([net, one_hot_vec], axis=1)
    net = tf_util.fully_connected(net, 256, scope='fc1-stage1', bn=True,
        is_training=is_training, bn_decay=bn_decay)
    net = tf_util.fully_connected(net, 128, scope='fc2-stage1', bn=True,
        is_training=is_training, bn_decay=bn_decay)
    predicted_center = tf_util.fully_connected(net, 3, activation_fn=None,
        scope='fc3-stage1')
    return predicted_center, end_points

def softmax(x):
    ''' Numpy function for softmax'''
    shape = x.shape
    probs = np.exp(x - np.max(x, axis=len(shape)-1, keepdims=True))
    probs /= np.sum(probs, axis=len(shape)-1, keepdims=True)
    return probs


def get_loss(mask_label, center_label, \
             heading_class_label, heading_residual_label, \
             size_class_label, size_residual_label, \
             end_points, \
             corner_loss_weight=10.0, \
             box_loss_weight=1.0):
    ''' Loss functions for 3D object detection.
    Input:
        mask_label: TF int32 tensor in shape (B,N)
        center_label: TF tensor in shape (B,3)
        heading_class_label: TF int32 tensor in shape (B,) 
        heading_residual_label: TF tensor in shape (B,) 
        size_class_label: TF tensor int32 in shape (B,)
        size_residual_label: TF tensor tensor in shape (B,)
        end_points: dict, outputs from our model
        corner_loss_weight: float scalar
        box_loss_weight: float scalar
    Output:
        total_loss: TF scalar tensor
            the total_loss is also added to the losses collection
    '''
    # 3D Segmentation loss
    mask_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(\
        logits=end_points['mask_logits'], labels=mask_label))
    tf.summary.scalar('3d mask loss', mask_loss)

    # Center regression losses
    center_dist = tf.norm(center_label - end_points['center'], axis=-1)
    center_loss = huber_loss(center_dist, delta=2.0)
    tf.summary.scalar('center loss', center_loss)
    stage1_center_dist = tf.norm(center_label - \
        end_points['stage1_center'], axis=-1)
    stage1_center_loss = huber_loss(stage1_center_dist, delta=1.0)
    tf.summary.scalar('stage1 center loss', stage1_center_loss)

    # Heading loss
    heading_class_loss = tf.reduce_mean( \
        tf.nn.sparse_softmax_cross_entropy_with_logits( \
        logits=end_points['heading_scores'], labels=heading_class_label))
    tf.summary.scalar('heading class loss', heading_class_loss)

    hcls_onehot = tf.one_hot(heading_class_label,
        depth=NUM_HEADING_BIN,
        on_value=1, off_value=0, axis=-1) # BxNUM_HEADING_BIN
    heading_residual_normalized_label = \
        heading_residual_label / (np.pi/NUM_HEADING_BIN)
    heading_residual_normalized_loss = huber_loss(tf.reduce_sum( \
        end_points['heading_residuals_normalized']*tf.to_float(hcls_onehot), axis=1) - \
        heading_residual_normalized_label, delta=1.0)
    tf.summary.scalar('heading residual normalized loss',
        heading_residual_normalized_loss)

    # Size loss
    size_class_loss = tf.reduce_mean( \
        tf.nn.sparse_softmax_cross_entropy_with_logits( \
        logits=end_points['size_scores'], labels=size_class_label))
    tf.summary.scalar('size class loss', size_class_loss)

    scls_onehot = tf.one_hot(size_class_label,
        depth=NUM_SIZE_CLUSTER,
        on_value=1, off_value=0, axis=-1) # BxNUM_SIZE_CLUSTER
    scls_onehot_tiled = tf.tile(tf.expand_dims( \
        tf.to_float(scls_onehot), -1), [1,1,3]) # BxNUM_SIZE_CLUSTERx3
    predicted_size_residual_normalized = tf.reduce_sum( \
        end_points['size_residuals_normalized']*scls_onehot_tiled, axis=[1]) # Bx3

    mean_size_arr_expand = tf.expand_dims( \
        tf.constant(g_mean_size_arr, dtype=tf.float32),0) # 1xNUM_SIZE_CLUSTERx3
    mean_size_label = tf.reduce_sum( \
        scls_onehot_tiled * mean_size_arr_expand, axis=[1]) # Bx3
    size_residual_label_normalized = size_residual_label / mean_size_label
    size_normalized_dist = tf.norm( \
        size_residual_label_normalized - predicted_size_residual_normalized,
        axis=-1)
    size_residual_normalized_loss = huber_loss(size_normalized_dist, delta=1.0)
    tf.summary.scalar('size residual normalized loss',
        size_residual_normalized_loss)

    # Corner loss
    # We select the predicted corners corresponding to the 
    # GT heading bin and size cluster.
    corners_3d = get_box3d_corners(end_points['center'],
        end_points['heading_residuals'],
        end_points['size_residuals']) # (B,NH,NS,8,3)
    gt_mask = tf.tile(tf.expand_dims(hcls_onehot, 2), [1,1,NUM_SIZE_CLUSTER]) * \
        tf.tile(tf.expand_dims(scls_onehot,1), [1,NUM_HEADING_BIN,1]) # (B,NH,NS)
    corners_3d_pred = tf.reduce_sum( \
        tf.to_float(tf.expand_dims(tf.expand_dims(gt_mask,-1),-1)) * corners_3d,
        axis=[1,2]) # (B,8,3)

    heading_bin_centers = tf.constant( \
        np.arange(0,2*np.pi,2*np.pi/NUM_HEADING_BIN), dtype=tf.float32) # (NH,)
    heading_label = tf.expand_dims(heading_residual_label,1) + \
        tf.expand_dims(heading_bin_centers, 0) # (B,NH)
    heading_label = tf.reduce_sum(tf.to_float(hcls_onehot)*heading_label, 1)
    mean_sizes = tf.expand_dims( \
        tf.constant(g_mean_size_arr, dtype=tf.float32), 0) # (1,NS,3)
    size_label = mean_sizes + \
        tf.expand_dims(size_residual_label, 1) # (1,NS,3) + (B,1,3) = (B,NS,3)
    size_label = tf.reduce_sum( \
        tf.expand_dims(tf.to_float(scls_onehot),-1)*size_label, axis=[1]) # (B,3)
    corners_3d_gt = get_box3d_corners_helper( \
        center_label, heading_label, size_label) # (B,8,3)
    corners_3d_gt_flip = get_box3d_corners_helper( \
        center_label, heading_label+np.pi, size_label) # (B,8,3)

    corners_dist = tf.minimum(tf.norm(corners_3d_pred - corners_3d_gt, axis=-1),
        tf.norm(corners_3d_pred - corners_3d_gt_flip, axis=-1))
    corners_loss = huber_loss(corners_dist, delta=1.0) 
    tf.summary.scalar('corners loss', corners_loss)

    # Weighted sum of all losses
    total_loss = mask_loss + box_loss_weight * (center_loss + \
        heading_class_loss + size_class_loss + \
        heading_residual_normalized_loss*20 + \
        size_residual_normalized_loss*20 + \
        stage1_center_loss + \
        corner_loss_weight*corners_loss)
    tf.add_to_collection('losses', total_loss)

    return total_loss

def get_lidar_in_image_fov(pc_velo, calib, xmin, ymin, xmax, ymax,
                           clip_distance=40.0):
    ''' Filter lidar points, keep those in image FOV '''
    #pts_2d = calib.project_rect_to_image(calib.project_ref_to_rect(pc_velo))
    #pts_2d = calib.project_rect_to_image_torch(calib.project_ref_to_rect_torch(torch.from_numpy(pc_velo).cuda()))
    pts_2d = calib.project_ref_to_image_torch(pc_velo)

    fov_inds = (pts_2d[:,0]<xmax) & (pts_2d[:,0]>=xmin) & \
        (pts_2d[:,1]<ymax) & (pts_2d[:,1]>=ymin)
    # fov_inds = fov_inds & (pc_velo[:,2]<clip_distance) #filter out far z pts
    # imgfov_pc_velo = pc_velo[fov_inds,:].cpu().numpy()
    return pts_2d, fov_inds

# @profile
def preprocess_pointcloud(detections, point_cloud, pc_image_coord,
                            calib, num_point = 1024, 
                            lidar_point_threshold=5,
                            omni=False):
    ''' Extract point clouds in frustums extruded from 2D detection boxes.
        Update: Lidar points and 3d boxes are in *rect camera* coord system
            (as that in 3d box label files)
        
    Input:
        lidar_point_threshold: int, neglect frustum with too few points.
    Output:

    '''
    
    point_clouds = [] # channel number = 4, xyz,intensity in rect camera coord
    rot_angles = []
    ids_3d = np.zeros((len(detections)))
    for i, detection in enumerate(detections):

        xmin,ymin,xmax,ymax,_,_,_ = detection
        box_fov_inds = (pc_image_coord[:,0]<xmax) & \
            (pc_image_coord[:,0]>=xmin) & \
            (pc_image_coord[:,1]<ymax) & \
            (pc_image_coord[:,1]>=ymin)
        pc_in_box_fov = point_cloud[box_fov_inds,:]
        if omni:
            frustum_angle = ((xmin+xmax)/2.0) /calib.img_shape[2] * (2 * np.pi)  - np.pi/2
        else:
            box_center = np.array([xmax+xmin, ymin+ymax])/2
            uvdepth = np.zeros((1,3))
            uvdepth[0,0:2] = box_center
            uvdepth[0,2] = 20 # some random depth
            box2d_center_rect = calib.project_image_to_rect(uvdepth)
            frustum_angle = np.pi/2 - np.arctan2(box2d_center_rect[0,2],
                box2d_center_rect[0,0])
        rot_angles.append(frustum_angle)
        if len(pc_in_box_fov)<lidar_point_threshold:
            ids_3d[i] = -1
            point_clouds.append(torch.zeros((1, num_point, 4)).type(pc_in_box_fov.type()))
        else:
            if pc_in_box_fov.shape[0] > num_point:
                idx = np.random.choice(range(pc_in_box_fov.shape[0]), size = (num_point), replace=False)
                pc_in_box_fov = pc_in_box_fov[idx].unsqueeze(0)
            else:
                idx = np.random.choice(range(pc_in_box_fov.shape[0]), size = (num_point-pc_in_box_fov.shape[0]), replace=True)
                pc_in_box_fov = torch.cat([pc_in_box_fov, pc_in_box_fov[idx]], dim=0).unsqueeze(0)
            point_clouds.append(pc_in_box_fov)
    point_clouds = torch.cat(point_clouds, dim=0)
    point_clouds = rotate_pc_along_y_torch(point_clouds, rot_angles)

    return point_clouds, rot_angles, ids_3d
# @profile
def generate_detections_3d(detector, detections_2d, point_cloud, calib, img_shape, peds=False, omni=False):
    _, img_height, img_width = img_shape
    pc_image_coord, img_fov_inds = get_lidar_in_image_fov(point_cloud[:,:3], calib, 0, 0, img_width, img_height)
    pc_image_coord = pc_image_coord[img_fov_inds,:]
    point_cloud = point_cloud[img_fov_inds,:]
    point_cloud_frustrums, rot_angles, ids_3d = preprocess_pointcloud(detections_2d, point_cloud, pc_image_coord, calib, num_point = detector.num_point, omni=omni)
    point_cloud_frustrums = point_cloud_frustrums.cpu().numpy()
    boxes_3d, scores_3d, depth_features = detector(point_cloud_frustrums, np.asarray(rot_angles), peds)
    for i in range(len(ids_3d)):
        if ids_3d[i] == -1 or np.isnan(scores_3d[i]):
            boxes_3d[i] = None
            ids_3d[i] = -1
    return boxes_3d, ids_3d, rot_angles, scores_3d, depth_features, point_cloud_frustrums

def convert_depth_features(depth_features_orig, ids_3d):
    depth_features = []
    for i, depth_feature_orig in enumerate(depth_features_orig):
        if depth_feature_orig is None or ids_3d[i] == -1:
            depth_features.append(None)
        else:
            depth_features.append(depth_feature_orig)
    return depth_features


================================================
FILE: src/featurepointnet_tf_util.py
================================================
""" Wrapper functions for TensorFlow layers.

Author: Charles R. Qi
Date: November 2017
"""

import numpy as np
import tensorflow as tf

def _variable_on_cpu(name, shape, initializer, use_fp16=False):
  """Helper to create a Variable stored on CPU memory.
  Args:
    name: name of the variable
    shape: list of ints
    initializer: initializer for Variable
  Returns:
    Variable Tensor
  """
  with tf.device("/cpu:0"):
    dtype = tf.float16 if use_fp16 else tf.float32
    var = tf.get_variable(name, shape, initializer=initializer, dtype=dtype)
  return var

def _variable_with_weight_decay(name, shape, stddev, wd, use_xavier=True):
  """Helper to create an initialized Variable with weight decay.

  Note that the Variable is initialized with a truncated normal distribution.
  A weight decay is added only if one is specified.

  Args:
    name: name of the variable
    shape: list of ints
    stddev: standard deviation of a truncated Gaussian
    wd: add L2Loss weight decay multiplied by this float. If None, weight
        decay is not added for this Variable.
    use_xavier: bool, whether to use xavier initializer

  Returns:
    Variable Tensor
  """
  if use_xavier:
    initializer = tf.contrib.layers.xavier_initializer()
  else:
    initializer = tf.truncated_normal_initializer(stddev=stddev)
  var = _variable_on_cpu(name, shape, initializer)
  if wd is not None:
    weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss')
    tf.add_to_collection('losses', weight_decay)
  return var


def conv1d(inputs,
           num_output_channels,
           kernel_size,
           scope,
           stride=1,
           padding='SAME',
           data_format='NHWC',
           use_xavier=True,
           stddev=1e-3,
           weight_decay=None,
           activation_fn=tf.nn.relu,
           bn=False,
           bn_decay=None,
           is_training=None):
  """ 1D convolution with non-linear operation.

  Args:
    inputs: 3-D tensor variable BxLxC
    num_output_channels: int
    kernel_size: int
    scope: string
    stride: int
    padding: 'SAME' or 'VALID'
    data_format: 'NHWC' or 'NCHW'
    use_xavier: bool, use xavier_initializer if true
    stddev: float, stddev for truncated_normal init
    weight_decay: float
    activation_fn: function
    bn: bool, whether to use batch norm
    bn_decay: float or float tensor variable in [0,1]
    is_training: bool Tensor variable

  Returns:
    Variable tensor
  """
  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
    assert(data_format=='NHWC' or data_format=='NCHW')
    if data_format == 'NHWC':
      num_in_channels = inputs.get_shape()[-1].value
    elif data_format=='NCHW':
      num_in_channels = inputs.get_shape()[1].value
    kernel_shape = [kernel_size,
                    num_in_channels, num_output_channels]
    kernel = _variable_with_weight_decay('weights',
                                         shape=kernel_shape,
                                         use_xavier=use_xavier,
                                         stddev=stddev,
                                         wd=weight_decay)
    outputs = tf.nn.conv1d(inputs, kernel,
                           stride=stride,
                           padding=padding,
                           data_format=data_format)
    biases = _variable_on_cpu('biases', [num_output_channels],
                              tf.constant_initializer(0.0))
    outputs = tf.nn.bias_add(outputs, biases, data_format=data_format)

    if bn:
      outputs = batch_norm_for_conv1d(outputs, is_training,
                                      bn_decay=bn_decay, scope='bn',
                                      data_format=data_format)

    if activation_fn is not None:
      outputs = activation_fn(outputs)
    return outputs


def conv2d(inputs,
           num_output_channels,
           kernel_size,
           scope,
           stride=[1, 1],
           padding='SAME',
           data_format='NHWC',
           use_xavier=True,
           stddev=1e-3,
           weight_decay=None,
           activation_fn=tf.nn.relu,
           bn=False,
           bn_decay=None,
           is_training=None):
  """ 2D convolution with non-linear operation.

  Args:
    inputs: 4-D tensor variable BxHxWxC
    num_output_channels: int
    kernel_size: a list of 2 ints
    scope: string
    stride: a list of 2 ints
    padding: 'SAME' or 'VALID'
    data_format: 'NHWC' or 'NCHW'
    use_xavier: bool, use xavier_initializer if true
    stddev: float, stddev for truncated_normal init
    weight_decay: float
    activation_fn: function
    bn: bool, whether to use batch norm
    bn_decay: float or float tensor variable in [0,1]
    is_training: bool Tensor variable

  Returns:
    Variable tensor
  """
  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
      kernel_h, kernel_w = kernel_size
      assert(data_format=='NHWC' or data_format=='NCHW')
      if data_format == 'NHWC':
        num_in_channels = inputs.get_shape()[-1].value
      elif data_format=='NCHW':
        num_in_channels = inputs.get_shape()[1].value
      kernel_shape = [kernel_h, kernel_w,
                      num_in_channels, num_output_channels]
      kernel = _variable_with_weight_decay('weights',
                                           shape=kernel_shape,
                                           use_xavier=use_xavier,
                                           stddev=stddev,
                                           wd=weight_decay)
      stride_h, stride_w = stride
      outputs = tf.nn.conv2d(inputs, kernel,
                             [1, stride_h, stride_w, 1],
                             padding=padding,
                             data_format=data_format)
      biases = _variable_on_cpu('biases', [num_output_channels],
                                tf.constant_initializer(0.0))
      outputs = tf.nn.bias_add(outputs, biases, data_format=data_format)

      if bn:
        outputs = batch_norm_for_conv2d(outputs, is_training,
                                        bn_decay=bn_decay, scope='bn',
                                        data_format=data_format)

      if activation_fn is not None:
        outputs = activation_fn(outputs)
      return outputs


def conv2d_transpose(inputs,
                     num_output_channels,
                     kernel_size,
                     scope,
                     stride=[1, 1],
                     padding='SAME',
                     use_xavier=True,
                     stddev=1e-3,
                     weight_decay=None,
                     activation_fn=tf.nn.relu,
                     bn=False,
                     bn_decay=None,
                     is_training=None):
  """ 2D convolution transpose with non-linear operation.

  Args:
    inputs: 4-D tensor variable BxHxWxC
    num_output_channels: int
    kernel_size: a list of 2 ints
    scope: string
    stride: a list of 2 ints
    padding: 'SAME' or 'VALID'
    use_xavier: bool, use xavier_initializer if true
    stddev: float, stddev for truncated_normal init
    weight_decay: float
    activation_fn: function
    bn: bool, whether to use batch norm
    bn_decay: float or float tensor variable in [0,1]
    is_training: bool Tensor variable

  Returns:
    Variable tensor

  Note: conv2d(conv2d_transpose(a, num_out, ksize, stride), a.shape[-1], ksize, stride) == a
  """
  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
      kernel_h, kernel_w = kernel_size
      num_in_channels = inputs.get_shape()[-1].value
      kernel_shape = [kernel_h, kernel_w,
                      num_output_channels, num_in_channels] # reversed to conv2d
      kernel = _variable_with_weight_decay('weights',
                                           shape=kernel_shape,
                                           use_xavier=use_xavier,
                                           stddev=stddev,
                                           wd=weight_decay)
      stride_h, stride_w = stride
      
      # from slim.convolution2d_transpose
      def get_deconv_dim(dim_size, stride_size, kernel_size, padding):
          dim_size *= stride_size

          if padding == 'VALID' and dim_size is not None:
            dim_size += max(kernel_size - stride_size, 0)
          return dim_size

      # caculate output shape
      batch_size = inputs.get_shape()[0].value
      height = inputs.get_shape()[1].value
      width = inputs.get_shape()[2].value
      out_height = get_deconv_dim(height, stride_h, kernel_h, padding)
      out_width = get_deconv_dim(width, stride_w, kernel_w, padding)
      output_shape = [batch_size, out_height, out_width, num_output_channels]

      outputs = tf.nn.conv2d_transpose(inputs, kernel, output_shape,
                             [1, stride_h, stride_w, 1],
                             padding=padding)
      biases = _variable_on_cpu('biases', [num_output_channels],
                                tf.constant_initializer(0.0))
      outputs = tf.nn.bias_add(outputs, biases)

      if bn:
        outputs = batch_norm_for_conv2d(outputs, is_training,
                                        bn_decay=bn_decay, scope='bn')

      if activation_fn is not None:
        outputs = activation_fn(outputs)
      return outputs

   
def conv3d(inputs,
           num_output_channels,
           kernel_size,
           scope,
           stride=[1, 1, 1],
           padding='SAME',
           use_xavier=True,
           stddev=1e-3,
           weight_decay=None,
           activation_fn=tf.nn.relu,
           bn=False,
           bn_decay=None,
           is_training=None):
  """ 3D convolution with non-linear operation.

  Args:
    inputs: 5-D tensor variable BxDxHxWxC
    num_output_channels: int
    kernel_size: a list of 3 ints
    scope: string
    stride: a list of 3 ints
    padding: 'SAME' or 'VALID'
    use_xavier: bool, use xavier_initializer if true
    stddev: float, stddev for truncated_normal init
    weight_decay: float
    activation_fn: function
    bn: bool, whether to use batch norm
    bn_decay: float or float tensor variable in [0,1]
    is_training: bool Tensor variable

  Returns:
    Variable tensor
  """
  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
    kernel_d, kernel_h, kernel_w = kernel_size
    num_in_channels = inputs.get_shape()[-1].value
    kernel_shape = [kernel_d, kernel_h, kernel_w,
                    num_in_channels, num_output_channels]
    kernel = _variable_with_weight_decay('weights',
                                         shape=kernel_shape,
                                         use_xavier=use_xavier,
                                         stddev=stddev,
                                         wd=weight_decay)
    stride_d, stride_h, stride_w = stride
    outputs = tf.nn.conv3d(inputs, kernel,
                           [1, stride_d, stride_h, stride_w, 1],
                           padding=padding)
    biases = _variable_on_cpu('biases', [num_output_channels],
                              tf.constant_initializer(0.0))
    outputs = tf.nn.bias_add(outputs, biases)
    
    if bn:
      outputs = batch_norm_for_conv3d(outputs, is_training,
                                      bn_decay=bn_decay, scope='bn')

    if activation_fn is not None:
      outputs = activation_fn(outputs)
    return outputs

def fully_connected(inputs,
                    num_outputs,
                    scope,
                    use_xavier=True,
                    stddev=1e-3,
                    weight_decay=None,
                    activation_fn=tf.nn.relu,
                    bn=False,
                    bn_decay=None,
                    is_training=None):
  """ Fully connected layer with non-linear operation.
  
  Args:
    inputs: 2-D tensor BxN
    num_outputs: int
  
  Returns:
    Variable tensor of size B x num_outputs.
  """
  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
    num_input_units = inputs.get_shape()[-1].value
    weights = _variable_with_weight_decay('weights',
                                          shape=[num_input_units, num_outputs],
                                          use_xavier=use_xavier,
                                          stddev=stddev,
                                          wd=weight_decay)
    outputs = tf.matmul(inputs, weights)
    biases = _variable_on_cpu('biases', [num_outputs],
                             tf.constant_initializer(0.0))
    outputs = tf.nn.bias_add(outputs, biases)
     
    if bn:
      outputs = batch_norm_for_fc(outputs, is_training, bn_decay, 'bn')

    if activation_fn is not None:
      outputs = activation_fn(outputs)
    return outputs


def max_pool2d(inputs,
               kernel_size,
               scope,
               stride=[2, 2],
               padding='VALID'):
  """ 2D max pooling.

  Args:
    inputs: 4-D tensor BxHxWxC
    kernel_size: a list of 2 ints
    stride: a list of 2 ints
  
  Returns:
    Variable tensor
  """
  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
    kernel_h, kernel_w = kernel_size
    stride_h, stride_w = stride
    outputs = tf.nn.max_pool(inputs,
                             ksize=[1, kernel_h, kernel_w, 1],
                             strides=[1, stride_h, stride_w, 1],
                             padding=padding,
                             name=sc.name)
    return outputs

def avg_pool2d(inputs,
               kernel_size,
               scope,
               stride=[2, 2],
               padding='VALID'):
  """ 2D avg pooling.

  Args:
    inputs: 4-D tensor BxHxWxC
    kernel_size: a list of 2 ints
    stride: a list of 2 ints
  
  Returns:
    Variable tensor
  """
  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
    kernel_h, kernel_w = kernel_size
    stride_h, stride_w = stride
    outputs = tf.nn.avg_pool(inputs,
                             ksize=[1, kernel_h, kernel_w, 1],
                             strides=[1, stride_h, stride_w, 1],
                             padding=padding,
                             name=sc.name)
    return outputs


def max_pool3d(inputs,
               kernel_size,
               scope,
               stride=[2, 2, 2],
               padding='VALID'):
  """ 3D max pooling.

  Args:
    inputs: 5-D tensor BxDxHxWxC
    kernel_size: a list of 3 ints
    stride: a list of 3 ints
  
  Returns:
    Variable tensor
  """
  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
    kernel_d, kernel_h, kernel_w = kernel_size
    stride_d, stride_h, stride_w = stride
    outputs = tf.nn.max_pool3d(inputs,
                               ksize=[1, kernel_d, kernel_h, kernel_w, 1],
                               strides=[1, stride_d, stride_h, stride_w, 1],
                               padding=padding,
                               name=sc.name)
    return outputs

def avg_pool3d(inputs,
               kernel_size,
               scope,
               stride=[2, 2, 2],
               padding='VALID'):
  """ 3D avg pooling.

  Args:
    inputs: 5-D tensor BxDxHxWxC
    kernel_size: a list of 3 ints
    stride: a list of 3 ints
  
  Returns:
    Variable tensor
  """
  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
    kernel_d, kernel_h, kernel_w = kernel_size
    stride_d, stride_h, stride_w = stride
    outputs = tf.nn.avg_pool3d(inputs,
                               ksize=[1, kernel_d, kernel_h, kernel_w, 1],
                               strides=[1, stride_d, stride_h, stride_w, 1],
                               padding=padding,
                               name=sc.name)
    return outputs


def batch_norm_template_unused(inputs, is_training, scope, moments_dims, bn_decay):
  """ NOTE: this is older version of the util func. it is deprecated.
  Batch normalization on convolutional maps and beyond...
  Ref.: http://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow
  
  Args:
      inputs:        Tensor, k-D input ... x C could be BC or BHWC or BDHWC
      is_training:   boolean tf.Varialbe, true indicates training phase
      scope:         string, variable scope
      moments_dims:  a list of ints, indicating dimensions for moments calculation
      bn_decay:      float or float tensor variable, controling moving average weight
  Return:
      normed:        batch-normalized maps
  """
  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
    num_channels = inputs.get_shape()[-1].value
    beta = _variable_on_cpu(name='beta',shape=[num_channels],
                            initializer=tf.constant_initializer(0))
    gamma = _variable_on_cpu(name='gamma',shape=[num_channels],
                            initializer=tf.constant_initializer(1.0))
    batch_mean, batch_var = tf.nn.moments(inputs, moments_dims, name='moments')
    decay = bn_decay if bn_decay is not None else 0.9
    ema = tf.train.ExponentialMovingAverage(decay=decay)
    # Operator that maintains moving averages of variables.
    # Need to set reuse=False, otherwise if reuse, will see moments_1/mean/ExponentialMovingAverage/ does not exist
    # https://github.com/shekkizh/WassersteinGAN.tensorflow/issues/3
    with tf.variable_scope(tf.get_variable_scope(), reuse=False):
        ema_apply_op = tf.cond(is_training,
                               lambda: ema.apply([batch_mean, batch_var]),
                               lambda: tf.no_op())
    
    # Update moving average and return current batch's avg and var.
    def mean_var_with_update():
      with tf.control_dependencies([ema_apply_op]):
        return tf.identity(batch_mean), tf.identity(batch_var)
    
    # ema.average returns the Variable holding the average of var.
    mean, var = tf.cond(is_training,
                        mean_var_with_update,
                        lambda: (ema.average(batch_mean), ema.average(batch_var)))
    normed = tf.nn.batch_normalization(inputs, mean, var, beta, gamma, 1e-3)
  return normed


def batch_norm_template(inputs, is_training, scope, moments_dims_unused, bn_decay, data_format='NHWC'):
  """ Batch normalization on convolutional maps and beyond...
  Ref.: http://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow
  
  Args:
      inputs:        Tensor, k-D input ... x C could be BC or BHWC or BDHWC
      is_training:   boolean tf.Varialbe, true indicates training phase
      scope:         string, variable scope
      moments_dims:  a list of ints, indicating dimensions for moments calculation
      bn_decay:      float or float tensor variable, controling moving average weight
      data_format:   'NHWC' or 'NCHW'
  Return:
      normed:        batch-normalized maps
  """
  bn_decay = bn_decay if bn_decay is not None else 0.9
  return tf.contrib.layers.batch_norm(inputs, 
                                      center=True, scale=True,
                                      is_training=is_training, decay=bn_decay,updates_collections=None,
                                      scope=scope,
                                      data_format=data_format)


def batch_norm_for_fc(inputs, is_training, bn_decay, scope):
  """ Batch normalization on FC data.
  
  Args:
      inputs:      Tensor, 2D BxC input
      is_training: boolean tf.Varialbe, true indicates training phase
      bn_decay:    float or float tensor variable, controling moving average weight
      scope:       string, variable scope
  Return:
      normed:      batch-normalized maps
  """
  return batch_norm_template(inputs, is_training, scope, [0,], bn_decay)


def batch_norm_for_conv1d(inputs, is_training, bn_decay, scope, data_format):
  """ Batch normalization on 1D convolutional maps.
  
  Args:
      inputs:      Tensor, 3D BLC input maps
      is_training: boolean tf.Varialbe, true indicates training phase
      bn_decay:    float or float tensor variable, controling moving average weight
      scope:       string, variable scope
      data_format: 'NHWC' or 'NCHW'
  Return:
      normed:      batch-normalized maps
  """
  return batch_norm_template(inputs, is_training, scope, [0,1], bn_decay, data_format)


def batch_norm_for_conv2d(inputs, is_training, bn_decay, scope, data_format):
  """ Batch normalization on 2D convolutional maps.
  
  Args:
      inputs:      Tensor, 4D BHWC input maps
      is_training: boolean tf.Varialbe, true indicates training phase
      bn_decay:    float or float tensor variable, controling moving average weight
      scope:       string, variable scope
      data_format: 'NHWC' or 'NCHW'
  Return:
      normed:      batch-normalized maps
  """
  return batch_norm_template(inputs, is_training, scope, [0,1,2], bn_decay, data_format)


def batch_norm_for_conv3d(inputs, is_training, bn_decay, scope):
  """ Batch normalization on 3D convolutional maps.
  
  Args:
      inputs:      Tensor, 5D BDHWC input maps
      is_training: boolean tf.Varialbe, true indicates training phase
      bn_decay:    float or float tensor variable, controling moving average weight
      scope:       string, variable scope
  Return:
      normed:      batch-normalized maps
  """
  return batch_norm_template(inputs, is_training, scope, [0,1,2,3], bn_decay)


def dropout(inputs,
            is_training,
            scope,
            keep_prob=0.5,
            noise_shape=None):
  """ Dropout layer.

  Args:
    inputs: tensor
    is_training: boolean tf.Variable
    scope: string
    keep_prob: float in [0,1]
    noise_shape: list of ints

  Returns:
    tensor variable
  """
  with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
    outputs = tf.cond(is_training,
                      lambda: tf.nn.dropout(inputs, keep_prob, noise_shape),
                      lambda: inputs)
    return outputs


================================================
FILE: src/iou_matching.py
================================================
# vim: expandtab:ts=4:sw=4
from __future__ import absolute_import
import numpy as np
import linear_assignment
import pdb


def iou(bbox, candidates):
    """Computer intersection over union.

    Parameters
    ----------
    bbox : ndarray
        A bounding box in format `(top left x, top left y, width, height)`.
    candidates : ndarray
        A matrix of candidate bounding boxes (one per row) in the same format
        as `bbox`.

    Returns
    -------
    ndarray
        The intersection over union in [0, 1] between the `bbox` and each
        candidate. A higher score means a larger fraction of the `bbox` is
        occluded by the candidate.

    """
    bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:]
    candidates_tl = candidates[:, :2]
    candidates_br = candidates[:, :2] + candidates[:, 2:]

    tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis],
               np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]]
    br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis],
               np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]]
    wh = np.maximum(0., br - tl)

    area_intersection = wh.prod(axis=1)
    area_bbox = bbox[2:].prod()
    area_candidates = candidates[:, 2:].prod(axis=1)
    return area_intersection / (area_bbox + area_candidates - area_intersection)


def iou_cost(tracks, detections, track_indices=None,
             detection_indices=None, use3d=False, kf=None):
    """An intersection over union distance metric.

    Parameters
    ----------
    tracks : List[deep_sort.track.Track]
        A list of tracks.
    detections : List[deep_sort.detection.Detection]
        A list of detections.
    track_indices : Optional[List[int]]
        A list of indices to tracks that should be matched. Defaults to
        all `tracks`.
    detection_indices : Optional[List[int]]
        A list of indices to detections that should be matched. Defaults
        to all `detections`.
    box_expansion_factor:
        Multiplier for box size to bias towards higher recall

    Returns
    -------
    ndarray
        Returns a cost matrix of shape
        len(track_indices), len(detection_indices) where entry (i, j) is
        `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`.

    """
    if track_indices is None:
        track_indices = np.arange(len(tracks))
    if detection_indices is None:
        detection_indices = np.arange(len(detections))

    cost_matrix = np.zeros((len(track_indices), len(detection_indices)))

    if cost_matrix.shape[0] == 0 or cost_matrix.shape[1] == 0:
        return cost_matrix
    if use3d:
        # Convert 3d detctions to tlwh format
        # @TODO: Should use a Detection3D class to do this
        candidates = np.array([detections[i].box_3d for i in detection_indices])
        candidates[:,:2] -= candidates[:,3:5] / 2
        candidates = candidates[:, [0,2,3,5]]
    else:
        candidates = np.asarray([detections[i].tlwh for i in detection_indices])

    for row, track_idx in enumerate(track_indices):
        if use3d:
            bbox = tracks[track_idx].to_tlwh3d()
            bbox[:2] -= bbox[3:5] / 2
            bbox = bbox[[0,2,3,5]]
        else:
            bbox = tracks[track_idx].to_tlwh(kf)
        cost_matrix[row, :] = 1. - iou(bbox, candidates)
    return cost_matrix


================================================
FILE: src/kf_2d.py
================================================
# vim: expandtab:ts=4:sw=4
import numpy as np
import scipy.linalg
import EKF
import pdb
np.set_printoptions(precision=4, suppress=True)

class KalmanFilter2D(EKF.EKF):
    """
    A simple Kalman filter for tracking bounding boxes in image space.

    The 8-dimensional state space

        x, y, w, h, vx, vy, vw, vh

    contains the bounding box center position (x, y), width w, height h,
    and their respective velocities.

    Object motion follows a constant velocity model. The bounding box location
    (x, y, w, h) is taken as direct observation of the state space (linear
    observation model).

    """

    def __init__(self, pos_weight, velocity_weight, std_process, std_measurement, initial_uncertainty, gate_limit):
        ndim, dt = 4, 1.
        self.ndim = ndim
        self.img_center = 1242
        # Create Kalman filter model matrices.
        # Motion model is constant velocity, i.e. x = x + Vx*dt
        self._motion_mat = np.eye(2 * ndim, 2 * ndim)
        for i in range(ndim):
            self._motion_mat[i, ndim + i] = dt
        # Sensor model is direct observation, i.e. x = x
        self._observation_mat = np.eye(ndim, 2 * ndim)

        # Motion and observation uncertainty are chosen relative to the current
        # state estimate. These weights control the amount of uncertainty in
        # the model. This is a bit hacky.
        self._std_weight_process = std_process
        self._std_weight_measurement = std_measurement
        self._std_weight_pos = pos_weight
        self._std_weight_vel = velocity_weight
        self._initial_uncertainty = initial_uncertainty
        self.LIMIT = gate_limit

    def initiate(self, measurement, flow):
        """Create track from unassociated measurement.

        Parameters
        ----------
        measurement : ndarray
            Bounding box coordinates (x, y, a, h) with center position (x, y),
            aspect ratio a, and height h.

        Returns
        -------
        (ndarray, ndarray)
            Returns the mean vector (8 dimensional) and covariance matrix (8x8
            dimensional) of the new track. Unobserved velocities are initialized
            to 0 mean.

        """
        mean_pos = measurement
        mean_vel = np.zeros_like(mean_pos)
        if flow is not None:
            vel = np.mean(np.reshape(flow[int(mean_pos[1]):int(mean_pos[1]+mean_pos[3]), 
                    int(mean_pos[0]):int(mean_pos[0]+mean_pos[2]), :], (-1, 2)), axis=0)
            mean_vel[:2] = vel
        mean = np.r_[mean_pos, mean_vel]

        # Initialize covariance based on w, h and configured std
        std = [
            (1 + abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_pos * measurement[2],
            (1 + abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_pos * measurement[3],
            (1 + abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_pos * measurement[2],
            (1 + abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_pos * measurement[3],

            (1 + 1.5*abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_vel * measurement[2],
            (1 + 1.5*abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_vel * measurement[3],
            (1 + 1.5*abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_vel * measurement[2],
            (1 + 1.5*abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_vel * measurement[3]]

        covariance = np.diag(np.square(std))*(self._initial_uncertainty*self._std_weight_process)**2
        return mean, covariance

    def predict_mean(self, mean):
        # Updates predicted state from previous state (function g)
        # Calculates motion update Jacobian (Gt)
        # Returns (g(mean), Gt)
        return np.dot(self._motion_mat, mean)
    
    def predict_covariance(self, mean, covariance):
        # Updates predicted state from previous state (function g)
        # Calculates motion update Jacobian (Gt)
        # Returns (g(mean), Gt)
        process_noise = self.get_process_noise(mean)
        return (np.linalg.multi_dot((self._motion_mat, covariance, self._motion_mat.T)) 
                     + process_noise)
    def get_process_noise(self, mean):
        # Returns Rt the motion noise covariance

        # Motion uncertainty scaled by estimated height
        std_pos = [
            (1 + abs(mean[0]/self.img_center - 1)) * self._std_weight_pos * mean[2],
            (1 + abs(mean[0]/self.img_center - 1)) * self._std_weight_pos * mean[3],
            (1 + abs(mean[0]/self.img_center - 1)) * self._std_weight_pos * mean[2],
            (1 + abs(mean[0]/self.img_center - 1)) * self._std_weight_pos * mean[3]]
        std_vel = [
            (1 + abs(mean[0]/self.img_center - 1)) * self._std_weight_vel * mean[2],
            (1 + abs(mean[0]/self.img_center - 1)) * self._std_weight_vel * mean[3],
            (1 + abs(mean[0]/self.img_center - 1)) * self._std_weight_vel * mean[2],
            (1 + abs(mean[0]/self.img_center - 1)) * self._std_weight_vel * mean[3]]
        motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))*self._std_weight_process**2

        return motion_cov

    def project_mean(self, mean):
        # Measurement prediction from state (function h)
        # Calculations sensor update Jacobian (Ht)
        # Returns (h(mean), Ht)
        return np.dot(self._observation_mat, mean)


    def get_measurement_noise(self, measurement):
        # Returns Qt the sensor noise covariance
                
        # Measurement uncertainty scaled by estimated height
        std = [
                self._std_weight_pos*measurement[2],
                self._std_weight_pos*measurement[3],
                self._std_weight_pos*measurement[2],
                self._std_weight_pos*measurement[3]]
        innovation_cov = np.diag(np.square(std))*self._std_weight_measurement**2
        return innovation_cov
    
    def project_cov(self, mean, covariance):
        # Returns S the innovation covariance (projected covariance)
                
        measurement_noise = self.get_measurement_noise(mean)
        innovation_cov = (np.linalg.multi_dot((self._observation_mat, covariance,
                                          self._observation_mat.T))
                     + measurement_noise)
        return innovation_cov

    def gating_distance(self, mean, covariance, measurements,
                        only_position=False, use_3d=False):
        """Compute gating distance between state distribution and measurements.

        A suitable distance threshold can be obtained from `chi2inv95`. If
        `only_position` is False, the chi-square distribution has 4 degrees of
        freedom, otherwise 2.

        Parameters
        ----------
        mean : ndarray
            Mean vector over the state distribution (8 dimensional).
        covariance : ndarray
            Covariance of the state distribution (8x8 dimensional).
        measurements : ndarray
            An Nx4 dimensional matrix of N measurements, each in
            format (x, y, a, h) where (x, y) is the bounding box center
            position, a the aspect ratio, and h the height.
        only_position : Optional[bool]
            If True, distance computation is done with respect to the bounding
            box center position only.

        Returns
        -------
        ndarray
            Returns an array of length N, where the i-th element contains the
            squared Mahalanobis distance between (mean, covariance) and
            `measurements[i]`.

        """
        projected_mean, projected_covariance = self.project(mean, covariance)
        if only_position:
            projected_mean, projected_covariance = projected_mean[:2], projected_covariance[:2, :2]
            measurements = measurements[:, :2]
        max_val = np.amax(projected_covariance)
        # LIMIT = max(mean[2], mean[3]) #*(1 + abs(3*mean[0]/self.img_center - 1))
        if max_val > self.LIMIT:
            projected_covariance *= self.LIMIT / max_val
        return EKF.squared_mahalanobis_distance(projected_mean, projected_covariance, measurements)

class RandomWalkKalmanFilter2D(KalmanFilter2D): #TODO UPDATE THIS DOCUMENTATION
    """
    A simple Kalman filter for tracking bounding boxes in image space.

    The 8-dimensional state space

        x, y, w, h

    contains the bounding box center position (x, y), aspect ratio a, height h,
    and their respective velocities.

    Object motion follows a constant velocity model. The bounding box location
    (x, y, a, h) is taken as direct observation of the state space (linear
    observation model).

    """
    def __init__(self, pos_weight, velocity_weight, std_process, std_measurement, initial_uncertainty, img_center=1242):
        ndim, dt = 4, 1.
        self.ndim = ndim
        self.img_center = img_center
        # Create Kalman filter model matrices.
        # Motion model is constant velocity, i.e. x = x + Vx*dt
        self._motion_mat = np.eye(2*ndim, 2*ndim)
        self._motion_mat[ndim:, ndim:] = 0
        # Sensor model is direct observation, i.e. x = x
        self._observation_mat = np.eye(ndim, 2*ndim)

        # Motion and observation uncertainty are chosen relative to the current
        # state estimate. These weights control the amount of uncertainty in
        # the model. This is a bit hacky.
        self._std_weight_process = std_process
        self._std_weight_measurement = std_measurement
        self._std_weight_pos = pos_weight
        self._std_weight_vel = velocity_weight
        self._initial_uncertainty = initial_uncertainty


================================================
FILE: src/linear_assignment.py
================================================
# vim: expandtab:ts=4:sw=4
from __future__ import absolute_import
import numpy as np
from sklearn.utils.linear_assignment_ import linear_assignment
import EKF
import pdb
from mbest_ilp import new_m_best_sol
from multiprocessing import Pool
from functools import partial
#from mbest_ilp import m_best_sol as new_m_best_sol

INFTY_COST = 1e+5
APP_COUNT = 0
IOU_COUNT = 0
def min_marg_matching(marginalizations, track_indices=None, max_distance=1):
    cost_matrix = 1 - marginalizations
    num_tracks, num_detections = cost_matrix.shape
    if track_indices is None:
        track_indices = np.arange(num_tracks)

    detection_indices = np.arange(num_detections-1)

    if num_tracks == 0 or num_detections == 0:
        return [], track_indices, detection_indices  # Nothing to match.

    extra_dummy_cols = np.tile(cost_matrix[:,0,np.newaxis], (1, num_tracks-1))
    expanded_cost_matrix = np.hstack((extra_dummy_cols, cost_matrix))
    indices = linear_assignment(expanded_cost_matrix)

    matches, unmatched_tracks, unmatched_detections = [], [], []

    # gather unmatched detections (new track)
    for col, detection_idx in enumerate(detection_indices):
        if col+num_tracks not in indices[:, 1]:
            unmatched_detections.append(detection_idx)

    # gather unmatched tracks (no detection)
    for row, track_idx in enumerate(track_indices):
        if row not in indices[:, 0]:
            unmatched_tracks.append(track_idx)

    # thresholding and matches
    for row, col in indices:

        track_idx = track_indices[row]
        detection_idx = col - num_tracks
        if detection_idx < 0:
            unmatched_tracks.append(track_idx)
            continue

        if expanded_cost_matrix[row, col] > max_distance:
            # apply thresholding
            unmatched_tracks.append(track_idx)
            unmatched_detections.append(detection_idx)
        else:
            # associate matches
            matches.append((track_idx, detection_idx))

    return matches, unmatched_tracks, unmatched_detections

def min_cost_matching(
        distance_metric, max_distance, tracks, detections, track_indices=None,
        detection_indices=None, compare_2d = False, detections_3d=None):
    """Solve linear assignment problem.

    Parameters
    ----------
    distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
        The distance metric is given a list of tracks and detections as well as
        a list of N track indices and M detection indices. The metric should
        return the NxM dimensional cost matrix, where element (i, j) is the
        association cost between the i-th track in the given track indices and
        the j-th detection in the given detection_indices.
    max_distance : float
        Gating threshold. Associations with cost larger than this value are
        disregarded.
    tracks : List[track.Track]
        A list of predicted tracks at the current time step.
    detections : List[detection.Detection]
        A list of detections at the current time step.
    track_indices : List[int]
        List of track indices that maps rows in `cost_matrix` to tracks in
        `tracks` (see description above).
    detection_indices : List[int]
        List of detection indices that maps columns in `cost_matrix` to
        detections in `detections` (see description above).

    Returns
    -------
    (List[(int, int)], List[int], List[int])
        Returns a tuple with the following three entries:
        * A list of matched track and detection indices.
        * A list of unmatched track indices.
        * A list of unmatched detection indices.

    """
    if track_indices is None:
        track_indices = np.arange(len(tracks))
    if detection_indices is None:
        detection_indices = np.arange(len(detections))

    if len(detection_indices) == 0 or len(track_indices) == 0:
        return [], track_indices, detection_indices  # Nothing to match.

    cost_matrix = distance_metric(
        tracks, detections, track_indices, detection_indices, compare_2d, detections_3d)
    cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5

    #print("\n\nCascade Cost Matrix: ", cost_matrix)

    indices = linear_assignment(cost_matrix)

    matches, unmatched_tracks, unmatched_detections = [], [], []

    # gather unmatched detections (new track)
    for col, detection_idx in enumerate(detection_indices):
        if col not in indices[:, 1]:
            unmatched_detections.append(detection_idx)

    # gather unmatched trackes (no detection)
    for row, track_idx in enumerate(track_indices):
        if row not in indices[:, 0]:
            unmatched_tracks.append(track_idx)

    # thresholding and matches
    for row, col in indices:

        track_idx = track_indices[row]
        detection_idx = detection_indices[col]

        if cost_matrix[row, col] > max_distance:
            # apply thresholding
            unmatched_tracks.append(track_idx)
            unmatched_detections.append(detection_idx)
        else:
            # associate matches
            matches.append((track_idx, detection_idx))

    return matches, unmatched_tracks, unmatched_detections

# @profile
def JPDA(
        distance_metric, dummy_node_cost_app, dummy_node_cost_iou, tracks, detections, track_indices=None,
        detection_indices=None, m=1, compare_2d = False, windowing = False):
    """Solve linear assignment problem.

    Parameters
    ----------
    distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
        The distance metric is given a list of tracks and detections as well as
        a list of N track indices and M detection indices. The metric should
        return the NxM dimensional cost matrix, where element (i, j) is the
        association cost between the i-th track in the given track indices and
        the j-th detection in the given detection_indices.
    max_distance : float
        Gating threshold. Associations with cost larger than this value are
        disregarded.
    tracks : List[track.Track]
        A list of predicted tracks at the current time step.
    detections : List[detection.Detection]
        A list of detections at the current time step.
    track_indices : List[int]
        List of track indices that maps rows in `cost_matrix` to tracks in
        `tracks` (see description above).
    detection_indices : List[int]
        List of detection indices that maps columns in `cost_matrix` to
        detections in `detections` (see description above).

    Returns
    -------
    (List[(int, int)], List[int], List[int])
        Returns a tuple with the following three entries:
        * A list of matched track and detection indices.
        * A list of unmatched track indices.
        * A list of unmatched detection indices.

    """
    if track_indices is None:
        track_indices = np.arange(len(tracks))
    if detection_indices is None:
        detection_indices = np.arange(len(detections))

    if len(detection_indices) == 0 or len(track_indices) == 0:
        return np.zeros((0, len(detections) + 1))  # Nothing to match.
    cost_matrix, gate_mask = distance_metric(
        tracks, detections, track_indices, detection_indices, compare_2d)

    num_tracks, num_detections = cost_matrix.shape[0], cost_matrix.shape[1]
    cost_matrix[gate_mask] = INFTY_COST

    clusters = find_clusters(cost_matrix[:,:,0], INFTY_COST - 0.0001)

    jpda_output = []
    for cluster in clusters:
        jpda_output.append(get_JPDA_output(cluster, cost_matrix, dummy_node_cost_app, dummy_node_cost_iou, INFTY_COST - 0.0001, m))
    if not jpda_output:
        mc = np.zeros((num_tracks, num_detections + 1))
        mc[:, 0] = 1
        return mc
    assignments, assignment_cost = zip(*jpda_output)
    assignments = np.vstack([item for sublist in assignments for item in sublist])
    assignment_cost = np.array([item for sublist in assignment_cost for item in sublist])

    marginalised_cost = np.sum(assignments*np.exp(-np.expand_dims(assignment_cost, 1)), axis = 0)
    marginalised_cost = np.reshape(marginalised_cost, (num_tracks, num_detections+1))
    return marginalised_cost

def calculate_entropy(matrix, idx, idy):
    mask = np.ones(matrix.shape)
    mask[idx, idy] = 0
    entropy = matrix/np.sum(mask*matrix, axis=1, keepdims=True)
    entropy = (-entropy*np.log(entropy)) * mask
    entropy = np.mean(np.sum(entropy, axis=1))
    return entropy

def get_JPDA_output(cluster, cost_matrix, dummy_node_cost_app, dummy_node_cost_iou, cutoff, m):
    if len(cluster[1]) == 0:
        assignment = np.zeros((cost_matrix.shape[0], cost_matrix.shape[1]+1))
        assignment[cluster[0], 0] = 1
        assignment = assignment.reshape(1,-1)
        return [assignment], np.array([0])

    new_cost_matrix_appearance = np.reshape(cost_matrix[np.repeat(cluster[0], len(cluster[1])),
                                        np.tile(cluster[1] - 1, len(cluster[0])),
                                        [0]*(len(cluster[1])*len(cluster[0]))],
                                        (len(cluster[0]), len(cluster[1])))
    new_cost_matrix_iou = np.reshape(cost_matrix[np.repeat(cluster[0], len(cluster[1])), np.tile(cluster[1] - 1, len(cluster[0])), 1],
                (len(cluster[0]), len(cluster[1])))
    idx_x, idx_y = np.where(new_cost_matrix_appearance > cutoff)
    appearance_entropy = calculate_entropy(new_cost_matrix_appearance, idx_x, idx_y)
    iou_entropy = calculate_entropy(new_cost_matrix_iou, idx_x, idx_y)
    if appearance_entropy < iou_entropy:
        new_cost_matrix = new_cost_matrix_appearance
        # new_cost_matrix = 2*np.ones(new_cost_matrix.shape)/(new_cost_matrix+1) - 1
        global APP_COUNT
        APP_COUNT += 1
        dummy_node_cost = dummy_node_cost_app
    else:
        global IOU_COUNT
        IOU_COUNT += 1
        new_cost_matrix = new_cost_matrix_iou
        new_cost_matrix[new_cost_matrix==1] -= 1e-3
        new_cost_matrix = 1 - new_cost_matrix
        dummy_node_cost = -np.log(1-dummy_node_cost_iou)
        new_cost_matrix = -np.log(new_cost_matrix)
    new_cost_matrix[idx_x, idx_y] = cutoff
    if len(cluster[0]) == 1:
        new_cost_matrix = np.concatenate([np.ones((new_cost_matrix.shape[0], 1))*dummy_node_cost, new_cost_matrix], axis = 1)
        total_cost = np.sum(np.exp(-new_cost_matrix))
        new_assignment = np.zeros((cost_matrix.shape[0], cost_matrix.shape[1]+1))
        new_assignment[np.repeat(cluster[0], len(cluster[1])+1), np.tile(
                        np.concatenate([np.zeros(1, dtype = np.int32), cluster[1]]), len(cluster[0]))] = np.exp(-new_cost_matrix)/total_cost
        new_assignment = new_assignment.reshape(1, -1)
        return  [new_assignment], np.array([0])
    if new_cost_matrix.ndim <= 1:
        new_cost_matrix = np.expand_dims(new_cost_matrix, 1)

    # print(new_cost_matrix)
    assignments, assignment_cost = new_m_best_sol(new_cost_matrix, m, dummy_node_cost)
    offset = np.amin(assignment_cost)
    assignment_cost -= offset
    new_assignments = []
    total_cost = np.sum(np.exp(-assignment_cost))
    for assignment in assignments:
        new_assignment = np.zeros((cost_matrix.shape[0], cost_matrix.shape[1]+1))
        new_assignment[np.repeat(cluster[0], len(cluster[1])+1), np.tile(
                    np.concatenate([np.zeros(1, dtype = np.int32), cluster[1]]), len(cluster[0]))] = \
                                                assignment/total_cost
        new_assignments.append(new_assignment.reshape(1, -1))
    return new_assignments, assignment_cost


def matching_cascade(
        distance_metric, max_distance, cascade_depth, tracks, detections,
        track_indices=None, detection_indices=None, compare_2d = False, detections_3d=None):
    """Run matching cascade.

    Parameters
    ----------
    distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
        The distance metric is given a list of tracks and detections as well as
        a list of N track indices and M detection indices. The metric should
        return the NxM dimensional cost matrix, where element (i, j) is the
        association cost between the i-th track in the given track indices and
        the j-th detection in the given detection indices.
    max_distance : float
        Gating threshold. Associations with cost larger than this value are
        disregarded.
    cascade_depth: int
        The cascade depth, should be se to the maximum track age.
    tracks : List[track.Track]
        A list of predicted tracks at the current time step.
    detections : List[detection.Detection]
        A list of detections at the current time step.
    track_indices : Optional[List[int]]
        List of track indices that maps rows in `cost_matrix` to tracks in
        `tracks` (see description above). Defaults to all tracks.
    detection_indices : Optional[List[int]]
        List of detection indices that maps columns in `cost_matrix` to
        detections in `detections` (see description above). Defaults to all
        detections.

    Returns
    -------
    (List[(int, int)], List[int], List[int])
        Returns a tuple with the following three entries:
        * A list of matched track and detection indices.
        * A list of unmatched track indices.
        * A list of unmatched detection indices.

    """
    if track_indices is None:
        track_indices = list(range(len(tracks)))
    if detection_indices is None:
        detection_indices = list(range(len(detections)))

    unmatched_detections = detection_indices
    matches = []
    for level in range(cascade_depth):
        if len(unmatched_detections) == 0:  # No detections left
            break

        track_indices_l = [
            k for k in track_indices
            if tracks[k].time_since_update == 1 + level
        ]
        if len(track_indices_l) == 0:  # Nothing to match at this level
            continue

        matches_l, _, unmatched_detections = \
            min_cost_matching(
                distance_metric, max_distance, tracks, detections,
                track_indices_l, unmatched_detections, compare_2d, detections_3d=detections_3d)
        matches += matches_l
    unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))
    return matches, unmatched_tracks, unmatched_detections

# @profile
def gate_cost_matrix(
        kf, tracks, detections, track_indices, detection_indices,
        gated_cost=INFTY_COST, only_position=False, use3d=False, windowing = False):
    """Invalidate infeasible entries in cost matrix based on the state
    distributions obtained by Kalman filtering.

    Parameters
    ----------
    kf : The Kalman filter.
    cost_matrix : ndarray
        The NxM dimensional cost matrix, where N is the number of track indices
        and M is the number of detection indices, such that entry (i, j) is the
        association cost between `tracks[track_indices[i]]` and
        `detections[detection_indices[j]]`.
    tracks : List[track.Track]
        A list of predicted tracks at the current time step.
    detections : List[detection.Detection]
        A list of detections at the current time step.
    track_indices : List[int]
        List of track indices that maps rows in `cost_matrix` to tracks in
        `tracks` (see description above).
    detection_indices : List[int]
        List of detection indices that maps columns in `cost_matrix` to
        detections in `detections` (see description above).
    gated_cost : Optional[float]
        Entries in the cost matrix corresponding to infeasible associations are
        set this value. Defaults to a very large value.
    only_position : Optional[bool]
        If True, only the x, y position of the state distribution is considered
        during gating. Defaults to False.

    Returns
    -------
    ndarray
        Returns the modified cost matrix.

    """

    # assert (len(track_indices) == cost_matrix.shape[0]), "Cost matrix shape does not match track indices"
    # assert (len(detection_indices) == cost_matrix.shape[1]), "Cost matrix shape does match detection indices"

    if len(track_indices) == 0 or len(detection_indices) == 0:
        return None

    if use3d:
        measurements = np.array([det.box_3d for i, det in enumerate(detections) if i in detection_indices])
    else:
        measurements = np.asarray(
            [detections[i].to_xywh() for i in detection_indices])
    if only_position:
        gating_dim = 2
    else:
        gating_dim =  measurements.shape[1]
    gating_threshold = EKF.chi2inv975[gating_dim]
    gate_mask = []
    for track_idx in track_indices:
        track = tracks[track_idx]
        gating_distance = kf.gating_distance(
            track.mean, track.covariance, measurements, only_position, use3d)
        gate_mask.append(gating_distance > gating_threshold)
    return np.vstack(gate_mask)

def find_clusters(cost_matrix, cutoff):
    num_tracks, _ = cost_matrix.shape
    clusters = []
    total_tracks = 0
    total_detections = 0
    all_tracks = set(range(num_tracks))
    all_visited_tracks = set()
    while total_tracks < num_tracks:
        visited_detections = set()
        visited_tracks = set()
        potential_track = next(iter(all_tracks - all_visited_tracks))
        potential_tracks = set()
        potential_tracks.add(potential_track)
        while potential_tracks:
            current_track = potential_tracks.pop()
            visited_detections.update((np.where(cost_matrix[current_track] < cutoff)[0])+1)
            visited_tracks.add(current_track)
            for detection in visited_detections:
                connected_tracks = np.where(cost_matrix[:, detection - 1] < cutoff)[0]
                for track in connected_tracks:
                    if track in visited_tracks or track in potential_tracks:
                        continue
                    potential_tracks.add(track)
        total_tracks += len(visited_tracks)
        total_detections += len(visited_detections)
        all_visited_tracks.update(visited_tracks)
        clusters.append((np.array(list(visited_tracks), dtype = np.int32), np.array(list(visited_detections), dtype = np.int32)))
    return clusters


================================================
FILE: src/mbest_ilp.py
================================================
from gurobipy import Model, quicksum, LinExpr, GRB
import numpy as np
import copy
import time
from sklearn.utils.linear_assignment_ import linear_assignment
import pickle
import itertools
import pdb
from copy import deepcopy
import math
"""
Fn: ilp_assignment
------------------
Solves ILP problem using gurobi
"""
def ilp_assignment(model):
    
    model.optimize()
    if(model.status == 3):
        return -1
    return

"""
Fn: initialize_model
--------------------
Initializes gurobi ILP model by setting the base objective
"""
# @profile
def initialize_model(cost_matrix, cutoff, model = None):
    #Add dummy detection
    cost_matrix = np.insert(cost_matrix,0, np.ones(cost_matrix.shape[0])*cutoff, axis=1)
    M,N = cost_matrix.shape
    if model is None:
        model = Model()
    else:
        model.remove(model.getVars())
        model.remove(model.getConstrs())
    model.setParam('OutputFlag', False)
    # y = []
    # for i in range(M):
    #     y.append([])
    #     for j in range(N):
    #         y[i].append(m.addVar(vtype=GRB.BINARY, name = 'y_%d%d'%(i,j)))
    y = model.addVars(M,N, vtype=GRB.BINARY, name = 'y')
    model.setObjective(quicksum(quicksum([y[i,j]*cost_matrix[i][j] for j in range(N)]) for i in range(M)), GRB.MINIMIZE)
    # for i in range(M):
    model.addConstrs((quicksum(y[i,j] for j in range(N))==1 for i in range(M)), name='constraint for track')
    # for j in range(1,N):
    model.addConstrs((quicksum(y[i,j] for i in range(M))<=1 for j in range(1, N)), name='constraint for detection')
    y = list(y.values())
    return model, M, N, y

"""
Fn: m_best_sol
--------------
Finds m_best solutions for object/track association givent the
input cost matrix. Solves constrained ILP problems using gurobi solver.
"""
def cache(func):
    cache = {}
    def cached_function(*args):
        cost_matrix = args[0]
        cost_matrix = np.hstack((np.ones((cost_matrix.shape[0], 1))*args[1], cost_matrix))
        if (cost_matrix.shape[0], cost_matrix.shape[1]) in cache:
            solution_list = cache[(cost_matrix.shape[0], cost_matrix.shape[1])]
            solution_vals = np.sum(solution_list*cost_matrix.reshape(1, -1), axis = 1)
            return solution_list, solution_vals
        else: 
            solution_list, solution_vals = func(*args)
            cache[(cost_matrix.shape[0], cost_matrix.shape[1])] = solution_list
            return solution_list, solution_vals
    return cached_function
# @profile
def num_solutions(cost_matrix):
    M,N = cost_matrix.shape
    N += 1
    count = 0
    for i in range(min(M+1, N)):
        count += np.prod(range(M-i+1, M+1))*np.prod(range(N-i, N))//math.factorial(i)
        if count > 2000:
            break
    return int(count)

@cache
def enumerate_solutions(cost_matrix, cutoff, num_solutions):
    # num_solutions = [[2, 3, 4, 5, 6, 7],[3, 7, 13, 21, 31],[4, 13, 34, 73, 136],[5, 21, 73, 209, 501],[6, 31, 136, 501, 1546], [7]]
    cost_matrix = np.hstack((np.ones((cost_matrix.shape[0], 1))*cutoff, cost_matrix))
    M,N = cost_matrix.shape
    solution_list = np.zeros((num_solutions, M, N), dtype = np.int32)
    solution_list[:, :, 0] = 1
    count = 0
    for i in range(min(M+1, N)):
        for chosen in itertools.combinations(range(M), i):
            for perm in itertools.permutations(range(1,N), i):
                if chosen:
                    solution_list[[count]*len(chosen), chosen, perm] = 1
                    solution_list[[count]*len(chosen), chosen, [0]*len(chosen)] = 0
                count += 1
    solution_vals = np.sum(np.sum(solution_list*np.expand_dims(cost_matrix, 0), axis = 1), axis = 1)
    solution_list = np.reshape(solution_list, (num_solutions, -1))
    return solution_list, solution_vals


def new_m_best_sol(cost_matrix, m_sol, cutoff, model = None):
    sols = num_solutions(cost_matrix)
    if sols <= 2000:
        return enumerate_solutions(cost_matrix, cutoff, sols)
    model, M, N, y = initialize_model(cost_matrix, cutoff, model)
    X = np.zeros((m_sol, M*N))
    xv = []
    if (ilp_assignment(model) == -1):
        xv.append(0)
    else:
        x = model.getAttr("X", y)
        X[0] = x
        xv.append(model.objVal)
    if m_sol > 1:
        model.addConstr(LinExpr(x,y) <= M-1, name = 'constraint_0')
        if (ilp_assignment(model) == -1):
            xv.append(0)
        else:
            x = model.getAttr("X", y)
            X[1] = x
            xv.append(model.objVal)
    if m_sol > 2:
        model.remove(model.getConstrByName('constraint_0'))
        second_best_solutions = []
        second_best_solution_vals = []
        partitions = []
        j = np.argmax(np.logical_xor(X[0], X[1]))
        partitions.append([j])
        partitions.append([j])
        model.addConstr(y[j]==X[0][j], name = 'partition_constraint')
        model.addConstr(LinExpr(X[0], y) <= M-1, name = 'non_equality_constraint')
        ilp_assignment(model)
        second_best_solutions.append(model.getAttr("X", y))
        second_best_solution_vals.append(model.objVal)
        model.remove(model.getConstrByName('non_equality_constraint'))
        model.remove(model.getConstrByName('partition_constraint'))
        model.addConstr(y[j]==X[1][j], name = 'partition_constraint')
        model.addConstr(LinExpr(X[1], y) <= M-1, name = 'non_equality_constraint')
        ilp_assignment(model)
        second_best_solution_vals.append(model.objVal)
        second_best_solutions.append(model.getAttr("X", y))
        model.remove(model.getConstrByName('non_equality_constraint'))
        model.remove(model.getConstrByName('partition_constraint'))
        
        for m in range(2, m_sol):
            l_k = np.argmin(second_best_solution_vals)
            X[m] = second_best_solutions[l_k]
            xv.append(second_best_solution_vals[l_k])
            if m==m_sol-1:
                break
            j = np.argmax(np.logical_xor(X[m], X[l_k]))
            parent_partition = partitions[l_k]
            constrs = []
            for idx in parent_partition:
                constrs.append(model.addConstr(y[idx]==X[l_k, idx]))
            model.addConstr(y[j]==X[m][j], name = 'partition_constraint_new')
            model.addConstr(LinExpr(X[m], y) <= M-1, name = 'non_equality_constraint')
            if(ilp_assignment(model) == -1):
                second_best_solutions.append(np.ones((M,N)))
                second_best_solution_vals.append(np.inf)
            else:
                second_best_solutions.append(model.getAttr("X", y))
                second_best_solution_vals.append(model.objVal)
            model.remove(model.getConstrByName('partition_constraint_new'))
            model.remove(model.getConstrByName('non_equality_constraint'))
            model.addConstr(LinExpr(X[l_k], y) <= M-1, name = 'non_equality_constraint')
            model.addConstr(y[j]==X[l_k][j], name = 'partition_constraint_new')
            if(ilp_assignment(model) == -1):
                second_best_solution_vals[l_k] = np.inf
                second_best_solutions[l_k] = np.ones((M,N))
            else:
                second_best_solution_vals[l_k] = model.objVal
                second_best_solutions[l_k] = model.getAttr("X", y)
            model.remove(model.getConstrByName('partition_constraint_new'))
            model.remove(model.getConstrByName('non_equality_constraint'))
            partitions[l_k].append(j)
            partitions.append(copy.deepcopy(partitions[l_k]))
            for constr in constrs:
                model.remove(constr)


    # X = np.asarray(X)
    xv = np.asarray(xv)
    return X, xv
def linear_assignment_wrapper(a):
    return linear_assignment(a)

if __name__=='__main__':
    # a = np.random.randn(100,100)
    # # cProfile.run('m_best_sol(a,1,10)', 'mbest.profile')
    # # cProfile.run('linear_assignment(a)', 'hungarian.profile')
    # total = 0
    # for i in range(10):
    #     start = time.time()
    #     _, sol_cost = m_best_sol(a, 1, 10)
    #     end = time.time()
    #     total+= end-start
    # print("Time for JPDA m=1, is %f"%(total/10))
    # total = 0
    # for i in range(10):
    #     start = time.time()
    #     ass = linear_assignment(a)
    #     end = time.time()
    #     total+= end-start
    # print("Time for Hungarian, is %f"%(total/10))
    
    np.random.seed(14295)
    # Check JPDA matches Hungarian
    # while True:
    #     print('*******')
    #     a = np.random.randn(100,100)
    #     X, _ = new_m_best_sol(a, 1, 10)
    #     X = np.reshape(X[0], (100,101))[:,1:]
    #     ass = linear_assignment(a)
    #     output_hungarian = np.zeros(a.shape)
    #     output_hungarian[ass[:,0], ass[:, 1]] = 1
    #     assert(np.all(output_hungarian==X))
    #
    # Output to file to check

    #  np.random.seed(14295)
    # vals = []
    # a = np.random.randn(5,5)
    a = np.array([[0.1,0.6,0.2,0.3],[0.4,0.1,0.9,0.4],[0.3,0.5,0.1,0.7],[0.8,0.2,0.2,0.1]])
    num_solutions(a)
    # enumerate_solutions(a.shape[0], a.shape[1]+1)
    # ass = linear_assignment_wrapper(a)
    # m = Model()
    sols, vals = new_m_best_sol(a, 100, 10)
    for i, val in enumerate(vals):
        print(np.reshape(sols[i], (4,5)), val)
    # print(np.reshape(sols[1], (4,5)), vals[1])
    # print(np.reshape(sols[2], (4,5)), vals[2])
    # print(np.reshape(sols[3], (4,5)), vals[3])

    # with open('test.pkl', 'wb') as f:
    #     pickle.dump(vals, f)


================================================
FILE: src/nn_matching.py
================================================
# vim: expandtab:ts=4:sw=4
import numpy as np
import pdb
import torch

def _pdist(a, b):
    """Compute pair-wise squared distance between points in `a` and `b`.

    Parameters
    ----------
    a : array_like
        An NxM matrix of N samples of dimensionality M.
    b : array_like
        An LxM matrix of L samples of dimensionality M.

    Returns
    -------
    ndarray
        Returns a matrix of size len(a), len(b) such that eleement (i, j)
        contains the squared distance between `a[i]` and `b[j]`.

    """
    a, b = np.asarray(a), np.asarray(b)
    if len(a) == 0 or len(b) == 0:
        return np.zeros((len(a), len(b)))
    a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1)
    r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :]
    r2 = np.clip(r2, 0., float(np.inf))
    return r2


def _cosine_distance(a, b, data_is_normalized=False):
    """Compute pair-wise cosine distance between points in `a` and `b`.

    Parameters
    ----------
    a : array_like
        An NxM matrix of N samples of dimensionality M.
    b : array_like
        An LxM matrix of L samples of dimensionality M.
    data_is_normalized : Optional[bool]
        If True, assumes rows in a and b are unit length vectors.
        Otherwise, a and b are explicitly normalized to lenght 1.

    Returns
    -------
    ndarray
        Returns a matrix of size len(a), len(b) such that eleement (i, j)
        contains the squared distance between `a[i]` and `b[j]`.

    """
    if not data_is_normalized:
        a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True)
        b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True)
    return 1. - np.dot(a, b.T)

def _cosine_distance_torch(a, b, data_is_normalized=False):
    '''
    _cosine_distance but torched
    '''
    if not data_is_normalized:
        a = a / torch.norm(a, dim=1, keepdim=True)
        b = b / torch.norm(b, dim=1, keepdim=True)
    return 1. - torch.matmul(a, torch.transpose(b,0,1))

def _nn_euclidean_distance(x, y):
    """ Helper function for nearest neighbor distance metric (Euclidean).

    Parameters
    ----------
    x : ndarray
        A matrix of N row-vectors (sample points).
    y : ndarray
        A matrix of M row-vectors (query points).

    Returns
    -------
    ndarray
        A vector of length M that contains for each entry in `y` the
        smallest Euclidean distance to a sample in `x`.

    """
    distances = _pdist(x, y)
    return np.maximum(0.0, distances.min(axis=0))

def _nn_euclidean_distance_torch(x, y):
    """ Helper function for nearest neighbor distance metric (Euclidean).

    Parameters
    ----------
    x : ndarray
        A matrix of N row-vectors (sample points).
    y : ndarray
        A matrix of M row-vectors (query points).

    Returns
    -------
    ndarray
        A vector of length M that contains for each entry in `y` the
        smallest Euclidean distance to a sample in `x`.

    """
    # x = x/((x*x).sum(1, keepdim = True)).sqrt()
    # y = y/((y*y).sum(1, keepdim = True)).sqrt()
    sim = (x.unsqueeze(1) - y.unsqueeze(0)).pow(2).sum(2).sqrt()
    # sim = sim.exp()
    # sim = (sim - 1)/(sim + 1)
    sim = torch.min(sim, 0)[0]
    return sim
    
def _nn_cosine_distance(x, y):
    """ Helper function for nearest neighbor distance metric (cosine).

    Parameters
    ----------
    x : ndarray
        A matrix of N row-vectors (sample points).
    y : ndarray
        A matrix of M row-vectors (query points).

    Returns
    -------
    ndarray
        A vector of length M that contains for each entry in `y` the
        smallest cosine distance to a sample in `x`.

    """
    distances = _cosine_distance(x, y)
    return distances.min(axis=0)

def _nn_cosine_distance_torch(x,y):
    '''
    Same as _nn_cosine_distance except torched
    '''
    distances = _cosine_distance_torch(x,y)
    return torch.min(distances, 0)[0]

class NearestNeighborDistanceMetric(object):
    """
    A nearest neighbor distance metric that, for each target, returns
    the closest distance to any sample that has been observed so far.

    Parameters
    ----------
    metric : str
        Either "euclidean" or "cosine".
    matching_threshold: float
        The matching threshold. Samples with larger distance are considered an
        invalid match.
    budget : Optional[int]
        If not None, fix samples per class to at most this number. Removes
        the oldest samples when the budget is reached.

    Attributes
    ----------
    samples : Dict[int -> List[ndarray]]
        A dictionary that maps from target identities to the list of samples
        that have been observed so far.

    """

    def __init__(self, metric, budget=None):


        if metric == "euclidean":
            self._metric = _nn_euclidean_distance
            self._metric_torch = _nn_euclidean_distance_torch
        elif metric == "cosine":
            self._metric = _nn_cosine_distance
            self._metric_torch = _nn_cosine_distance_torch
        else:
            raise ValueError(
                "Invalid metric; must be either 'euclidean' or 'cosine'")
        self.budget = budget
        self.samples = {}
        self.samples_2d = {}

    def partial_fit(self, features, features_2d, targets, targets_2d, active_targets):
        """Update the distance metric with new data.

        Parameters
        ----------
        features : ndarray
            An NxM matrix of N features of dimensionality M.
        targets : ndarray
            An integer array of associated target identities.
        active_targets : List[int]
            A list of targets that are currently present in the scene.

        """
        for feature, target in zip(features, targets):
            if feature is not None:
                self.samples.setdefault(target, []).append(feature)
            else:
                self.samples.setdefault(target, [])
            if self.budget is not None:
                self.samples[target] = self.samples[target][-self.budget:]
        self.samples = {k: self.samples[k] for k in active_targets if k in targets}
        for target in active_targets:
            self.samples.setdefault(target, [])
        
        for feature_2d, target in zip(features_2d, targets_2d):
            self.samples_2d.setdefault(target, []).append(feature_2d)
            if self.budget is not None:
                self.samples_2d[target] = self.samples_2d[target][-self.budget:]

        self.samples_2d = {k: self.samples_2d[k] for k in active_targets}

    def distance(self, features, targets, compare_2d=False):
        """Compute distance between features and targets.

        Parameters
        ----------
        features : ndarray
            An NxM matrix of N features of dimensionality M.
        targets : List[int]
            A list of targets to match the given `features` against.

        Returns
        -------
        ndarray
            Returns a cost matrix of shape len(targets), len(features), where
            element (i, j) contains the closest squared distance between
            `targets[i]` and `features[j]`.

        """
        cost_matrix = np.zeros((len(targets), len(features)))
        for i, target in enumerate(targets):
            if compare_2d:            
                cost_matrix[i, :] = self._metric(self.samples_2d[target], features)
            else:
                cost_matrix[i, :] = self._metric(self.samples[target], features)
        return cost_matrix

    def distance_torch(self, features, targets, compare_2d=False):
        '''
        Same as distance except torched.
        '''
        # features = torch.from_numpy(features).cuda()
        cost_matrix = torch.zeros(len(targets), len(features)).to('cuda:0')
        for i, target in enumerate(targets):
            if compare_2d:
                cost_matrix[i, :] = self._metric_torch(torch.stack(self.samples_2d[target], dim=0), features)
            else:
                cost_matrix[i, :] = self._metric_torch(torch.stack(self.samples[target], dim=0), features)
        return cost_matrix.cpu().numpy()

    def check_samples(self, targets):
        for target in targets:
            if len(self.samples[target]) == 0:
                return True
        return False


================================================
FILE: src/pointnet_model.py
================================================
import os, pdb
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)
import configparser
from utils.pointnet_transform_nets import input_transform_net, feature_transform_net
import utils.pointnet_tf_util as pointnet_tf_util


class PointNet():
    def __init__(self, config_path):
        parser = configparser.SafeConfigParser()
        parser.read(config_path)
        num_points = parser.getint('general', 'num_point')
        depth_model_path = parser.get('general', 'depth_model_path')

        with tf.device('/gpu:'+str(0)):
            self.pointclouds_pl, _ = self.placeholder_inputs(1, num_points)
            self.is_training_pl = tf.placeholder(tf.bool, shape=())

            # simple model
            feature = self.get_model(self.pointclouds_pl, self.is_training_pl)
            self.feature = feature
            # Add ops to save and restore all the variables.
        
        self.saver = tf.train.Saver()
        #Create session
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        config.allow_soft_placement = True
        config.log_device_placement = False
        self.sess = tf.Session(config=config)
        #Initialize variables
        self.sess.run(tf.global_variables_initializer())
        #Restore model weights
        self.saver.restore(self.sess, depth_model_path)

    def __call__(self, input_point_cloud):
        feed_dict = {self.pointclouds_pl: input_point_cloud,
                     self.is_training_pl: False}
        features = self.sess.run(self.feature,feed_dict=feed_dict)
        return features

    def placeholder_inputs(self, batch_size, num_point):
        pointclouds_pl = tf.placeholder(tf.float32, shape=(batch_size, None, 3))
        labels_pl = tf.placeholder(tf.int32, shape=(batch_size))
        return pointclouds_pl, labels_pl


    def get_model(self, point_cloud, is_training, bn_decay=None):
        """ Classification PointNet, input is BxNx3, output Bx40 """
        batch_size = point_cloud.get_shape()[0].value
        end_points = {}

        with tf.variable_scope('transform_net1', reuse=tf.AUTO_REUSE) as sc:
            transform = input_transform_net(point_cloud, is_training, bn_decay, K=3)
        point_cloud_transformed = tf.matmul(point_cloud, transform)
        input_image = tf.expand_dims(point_cloud_transformed, -1)

        net = pointnet_tf_util.conv2d(input_image, 64, [1,3],
                             padding='VALID', stride=[1,1],
                             bn=True, is_training=is_training,
                             scope='conv1', bn_decay=bn_decay)
        net = pointnet_tf_util.conv2d(net, 64, [1,1],
                             padding='VALID', stride=[1,1],
                             bn=True, is_training=is_training,
                             scope='conv2', bn_decay=bn_decay)

        with tf.variable_scope('transform_net2', reuse=tf.AUTO_REUSE) as sc:
            transform = feature_transform_net(net, is_training, bn_decay, K=64)
        end_points['transform'] = transform
        net_transformed = tf.matmul(tf.squeeze(net, axis=[2]), transform)
        net_transformed = tf.expand_dims(net_transformed, [2])

        net = pointnet_tf_util.conv2d(net_transformed, 64, [1,1],
                             padding='VALID', stride=[1,1],
                             bn=True, is_training=is_training,
                             scope='conv3', bn_decay=bn_decay)
        net = pointnet_tf_util.conv2d(net, 128, [1,1],
                             padding='VALID', stride=[1,1],
                             bn=True, is_training=is_training,
                             scope='conv4', bn_decay=bn_decay)
        net = pointnet_tf_util.conv2d(net, 1024, [1,1],
                             padding='VALID', stride=[1,1],
                             bn=True, is_training=is_training,
                             scope='conv5', bn_decay=bn_decay)

        # Symmetric function: max pooling
        net = tf.reduce_max(net, axis = 1)

        net = tf.reshape(net, [batch_size, -1])
        feature = net

        return feature


    def get_loss(self, pred, label, end_points, reg_weight=0.001):
        """ pred: B*NUM_CLASSES,
            label: B, """
        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=pred, labels=label)
        classify_loss = tf.reduce_mean(loss)
        tf.summary.scalar('classify loss', classify_loss)

        # Enforce the transformation as orthogonal matrix
        transform = end_points['transform'] # BxKxK
        K = transform.get_shape()[1].value
        mat_diff = tf.matmul(transform, tf.transpose(transform, perm=[0,2,1]))
        mat_diff = mat_diff - tf.constant(np.eye(K), dtype=tf.float32)
        mat_diff_loss = tf.nn.l2_loss(mat_diff) 
        tf.summary.scalar('mat loss', mat_diff_loss)

        return classify_loss + mat_diff_loss * reg_weight
    

================================================
FILE: src/template 2.py
================================================
#!/home/sibot/anaconda2/bin/python
""" yolo_bbox_to_sort.py
    Subscribe to the Yolo 2 bboxes, and publish the detections with a 2d appearance feature used for reidentification
"""
import time
import rospy
import sys
import torch
import numpy as np
import os
from std_msgs.msg import Int8
import message_filters
from sensor_msgs.msg import Image
from darknet_ros_msgs.msg import BoundingBoxes, BoundingBox
from cv_bridge import CvBridge, CvBridgeError
from aligned_reid_utils import get_image_patches, generate_features, create_appearance_model
from jpda_rospack.msg import detection2d_with_feature_array, detection2d_with_feature

class Appearance_Features:
    def __init__(self):
        self.node_name = "aligned_reid_feature_generator"
        
        rospy.init_node(self.node_name)
        rospy.on_shutdown(self.cleanup)
        apperance_model_ckpt = rospy.get_param('~aligned_reid_model', 'src/jpda_rospack/src/aligned_reid_MOT_weights.pth')
        self.appearance_model = create_appearance_model(apperance_model_ckpt)
        
        self.image_sub = message_filters.Subscriber("/ros_indigosdk_node/stitched_image0", Image, queue_size=2)
        self.yolo_bbox_sub = message_filters.Subscriber("/omni_yolo_bboxes", BoundingBoxes, queue_size=2)
        
        self.time_sync = message_filters.ApproximateTimeSynchronizer([self.yolo_bbox_sub, self.image_sub], 5, 0.1)
        self.time_sync.registerCallback(self.get_2d_feature)
    
        self.cv_bridge = CvBridge()
        self.feature_2d_pub = rospy.Publisher("detection2d_with_feature", detection2d_with_feature_array, queue_size=1)
        self.debug_pub = rospy.Publisher("/test", Int8, queue_size=1)
        rospy.loginfo("Ready.")
        
    def get_2d_feature(self, y1_bboxes, ros_image):
#        rospy.loginfo('Processing Image with AlignedReID')
        start = time.time()
        try:
            input_image = self.cv_bridge.imgmsg_to_cv2(ros_image, "bgr8")
        except CvBridgeError as e:
            print(e)
        input_img = torch.from_numpy(input_image).float()
        input_img = input_img.to('cuda:1')
        input_img = input_img.permute(2, 0, 1)/255
        # Generate 2D image feaures for each bounding box
        detections = []
        frame_det_ids = []
        count = 0
        for y1_bbox in y1_bboxes.bounding_boxes:
            if y1_bbox.Class == 'person':
                xmin = y1_bbox.xmin
                xmax = y1_bbox.xmax
                ymin = y1_bbox.ymin
                ymax = y1_bbox.ymax
                probability = y1_bbox.probability
                frame_det_ids.append(count)
                count += 1
                detections.append([int(xmin), int(ymin), int(xmax), int(ymax), probability, -1, -1])
        features_2d = detection2d_with_feature_array()
        features_2d.header.stamp = y1_bboxes.header.stamp
        features_2d.header.frame_id = 'occam'
        if not detections:
            self.feature_2d_pub.publish(features_2d)
            return
        image_patches = get_image_patches(input_img, detections)
        features = generate_features(self.appearance_model, image_patches)
        
        for (det, feature, i) in zip(detections, features, frame_det_ids):
            det_msg = detection2d_with_feature()
            det_msg.header.stamp = features_2d.header.stamp
            det_msg.x1 = det[0]
            det_msg.y1 = det[1]
            det_msg.x2 = det[2]
            det_msg.y2 = det[3]
            det_msg.feature = feature
            det_msg.valid = True
            det_msg.frame_det_id = i
            features_2d.detection2d_with_features.append(det_msg)
        self.feature_2d_pub.publish(features_2d)
        # rospy.loginfo("Aligned_ReID time: {}".format(time.time() - start))
                
    def cleanup(self):
        print("Shutting down 2D-Appearance node.")
    
def main(args):       
    try:
        Appearance_Features()
        rospy.spin()
    except KeyboardInterrupt:
        print("Shutting down 2D-Appearance node.")

if __name__ == '__main__':
    main(sys.argv)


================================================
FILE: src/template.py
================================================
#!/home/sibot/anaconda2/bin/python
""" yolo_bbox_to_sort.py
    Subscribe to the Yolo 2 bboxes, and publish the detections with a 2d appearance feature used for reidentification
"""
import time
import rospy
import sys
import torch
import numpy as np
import os
from std_msgs.msg import Int8
import message_filters
from sensor_msgs.msg import Image
from darknet_ros_msgs.msg import BoundingBoxes, BoundingBox
from cv_bridge import CvBridge, CvBridgeError
from aligned_reid_utils import get_image_patches, generate_features, create_appearance_model
from jpda_rospack.msg import detection2d_with_feature_array, detection2d_with_feature

class Appearance_Features:
    def __init__(self):
        self.node_name = "aligned_reid_feature_generator"
        
        rospy.init_node(self.node_name)
        rospy.on_shutdown(self.cleanup)
        apperance_model_ckpt = rospy.get_param('~aligned_reid_model', 'src/jpda_rospack/src/aligned_reid_MOT_weights.pth')
        self.appearance_model = create_appearance_model(apperance_model_ckpt)
        
        self.image_sub = message_filters.Subscriber("/ros_indigosdk_node/stitched_image0", Image, queue_size=2)
        self.yolo_bbox_sub = message_filters.Subscriber("/omni_yolo_bboxes", BoundingBoxes, queue_size=2)
        
        self.time_sync = message_filters.ApproximateTimeSynchronizer([self.yolo_bbox_sub, self.image_sub], 5, 0.1)
        self.time_sync.registerCallback(self.get_2d_feature)
    
        self.cv_bridge = CvBridge()
        self.feature_2d_pub = rospy.Publisher("detection2d_with_feature", detection2d_with_feature_array, queue_size=1)
        self.debug_pub = rospy.Publisher("/test", Int8, queue_size=1)
        rospy.loginfo("Ready.")
        
    def get_2d_feature(self, y1_bboxes, ros_image):
#        rospy.loginfo('Processing Image with AlignedReID')
        start = time.time()
        try:
            input_image = self.cv_bridge.imgmsg_to_cv2(ros_image, "bgr8")
        except CvBridgeError as e:
            print(e)
        input_img = torch.from_numpy(input_image).float()
        input_img = input_img.to('cuda:1')
        input_img = input_img.permute(2, 0, 1)/255
        # Generate 2D image feaures for each bounding box
        detections = []
        frame_det_ids = []
        count = 0
        for y1_bbox in y1_bboxes.bounding_boxes:
            if y1_bbox.Class == 'person':
                xmin = y1_bbox.xmin
                xmax = y1_bbox.xmax
                ymin = y1_bbox.ymin
                ymax = y1_bbox.ymax
                probability = y1_bbox.probability
                frame_det_ids.append(count)
                count += 1
                detections.append([int(xmin), int(ymin), int(xmax), int(ymax), probability, -1, -1])
        features_2d = detection2d_with_feature_array()
        features_2d.header.stamp = y1_bboxes.header.stamp
        features_2d.header.frame_id = 'occam'
        if not detections:
            self.feature_2d_pub.publish(features_2d)
            return
        image_patches = get_image_patches(input_img, detections)
        features = generate_features(self.appearance_model, image_patches)
        
        for (det, feature, i) in zip(detections, features, frame_det_ids):
            det_msg = detection2d_with_feature()
            det_msg.header.stamp = features_2d.header.stamp
            det_msg.x1 = det[0]
            det_msg.y1 = det[1]
            det_msg.x2 = det[2]
            det_msg.y2 = det[3]
            det_msg.feature = feature
            det_msg.valid = True
            det_msg.frame_det_id = i
            features_2d.detection2d_with_features.append(det_msg)
        self.feature_2d_pub.publish(features_2d)
        # rospy.loginfo("Aligned_ReID time: {}".format(time.time() - start))
                
    def cleanup(self):
        print("Shutting down 2D-Appearance node.")
    
def main(args):       
    try:
        Appearance_Features()
        rospy.spin()
    except KeyboardInterrupt:
        print("Shutting down 2D-Appearance node.")

if __name__ == '__main__':
    main(sys.argv)


================================================
FILE: src/track_3d 2.py
================================================
# vim: expandtab:ts=4:sw=4
import numpy as np
import pdb
import torch

class TrackState:
    """
    Enumeration type for the single target track state. Newly created tracks are
    classified as `tentative` until enough evidence has been collected. Then,
    the track state is changed to `confirmed`. Tracks that are no longer alive
    are classified as `deleted` to mark them for removal from the set of active
    tracks.

    """

    Tentative = 1
    Confirmed = 2
    Deleted = 3


class Track_3d:
    """
    A single target track with state space `(x, y, a, h)` and associated
    velocities, where `(x, y)` is the center of the bounding box, `a` is the
    aspect ratio and `h` is the height.

    Parameters
    ----------
    mean : ndarray
        Mean vector of the initial state distribution.
    covariance : ndarray
        Covariance matrix of the initial state distribution.
    track_id : int
        A unique track identifier.
    n_init : int
        Number of consecutive detections before the track is confirmed. The
        track state is set to `Deleted` if a miss occurs within the first
        `n_init` frames.
    max_age : int
        The maximum number of consecutive misses before the track state is
        set to `Deleted`.
    feature : Optional[ndarray]
        Feature vector of the detection this track originates from. If not None,
        this feature is added to the `features` cache.

    Attributes
    ----------
    mean : ndarray
        Mean vector of the initial state distribution.
    covariance : ndarray
        Covariance matrix of the initial state distribution.
    track_id : int
        A unique track identifier.
    hits : int
        Total number of measurement updates.
    age : int
        Total number of frames since first occurance.
    time_since_update : int
        Total number of frames since last measurement update.
    state : TrackState
        The current track state.
    features : List[ndarray]
        A cache of features. On each measurement update, the associated feature
        vector is added to this list.

    """
    def __init__(self, mean, covariance, track_id, n_init, max_age,
                 feature=None, appearance_feature = None, cuda = False, lstm = None):

        self.mean = mean
        self.covariance = covariance
        self.track_id = track_id
        self.hits = 1
        self.age = 1
        self.time_since_update = 0
        self.tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
        self.cuda = cuda
        self.state = TrackState.Tentative
        self.features = []
        self.features_2d = []
        self.hidden = None
        if lstm is None:
            self.features.append(feature)
            self.features_2d.append(appearance_feature)
        else:
            self.feature_update(feature, appearance_feature, lstm)
        self.first_detection = mean[:7]
        self._n_init = n_init
        if self.state == TrackState.Tentative and self.hits >= self._n_init:
            self.state = TrackState.Confirmed
        self._max_age = max_age
        self.matched = True
        self.exiting = False
        self.last_box = None


    def to_tlwh3d(self):
        """Get current position in bounding box format `(box center of bottom face [x, y, z], l, w, h)`.

        Returns
        -------
        ndarray
            The bounding box.

        """

        if self.last_box is not None:
            return self.last_box.box_3d
        else:
            return self.mean[[0,1,2,3,4,5,6]].copy()

    def to_tlwh(self, kf):
        """Get current position in bounding box format `(box center of bottom face [x, y, z], l, w, h)`.

        Returns
        -------
        ndarray
            The bounding box.

        """
        corner_points, _ = kf.calculate_corners(self.mean)
        min_x, min_y = np.amin(corner_points, axis = 0)[:2]
        max_x, max_y = np.amax(corner_points, axis = 0)[:2]
        ret = np.array([min_x, min_y, max_x - min_x, max_y - min_y])
        return ret

    def predict(self, kf):
        """Propagate the state distribution to the current time step using a
        Kalman filter prediction step.

        Parameters
        ----------
        kf : kalman_filter.KalmanFilter
            The Kalman filter.

        """
        self.mean, self.covariance = kf.predict(self.mean, self.covariance)
        self.age += 1
        self.time_since_update += 1

    # @profile
    def update(self, kf, detection, compare_2d=False,
                marginalization=None, detection_idx=None, JPDA=False, lstm = None):
        """Perform Kalman filter measurement update step and update the feature
        cache.

        Parameters
        ----------
        kf : kalman_filter.KalmanFilter
            The Kalman filter.
        detection : Detection
            The associated detection.

        """
        if JPDA:

            detections_2d = [det.tlwh for det in detection]
            if compare_2d:
                detections_3d = None
            else:
                detections_3d = [np.copy(det.box_3d) for det in detection]
                for det in detections_3d:
                    if det[6] - self.mean[6] > np.pi:
                        det[6] -= 2 * np.pi
                    elif det[6] - self.mean[6] < -np.pi:
                        det[6] += 2*np.pi
            self.mean, self.covariance, self.mean_post_3d = kf.update(
                self.mean, self.covariance, detections_2d, detections_3d, marginalization, JPDA)
            self.mean[6] = self.mean[6] % (2 * np.pi)
            self.feature_update(detection, detection_idx, lstm)
            if np.argmax(marginalization) != 0:
                self.matched=True
            else:
                self.matched=False
            if detection_idx < 0:
                self.last_box = None
                return
            self.hits += 1
            self.time_since_update = 0
            detection = detection[detection_idx]
            self.last_box = detection
        else:
            detection = detection[detection_idx]
            detections_3d = detections_3d[detection_idx]
            self.mean, self.covariance = kf.update(
                self.mean, self.covariance, detection.tlwh, detections_3d)

        if self.state == TrackState.Tentative and self.hits >= self._n_init:
            self.state = TrackState.Confirmed

    def mark_missed(self):
        """Mark this track as missed (no association at the current time step).
        """
        if self.state == TrackState.Tentative:
            self.state = TrackState.Deleted
        elif self.time_since_update > self._max_age:
            self.state = TrackState.Deleted

    def is_tentative(self):
        """Returns True if this track is tentative (unconfirmed).
        """
        return self.state == TrackState.Tentative

    def is_confirmed(self):
        """Returns True if this track is confirmed."""
        return self.state == TrackState.Confirmed

    def is_deleted(self):
        """Returns True if this track is dead and should be deleted."""
        return self.state == TrackState.Deleted

    def feature_update(self, detections, detection_idx, lstm, JPDA=False, marginalization=None):
        if JPDA:
            features=[d.feature for d in detections]
            appearance_features=[d.appearance_feature for d in detections]
            if len([i for i in features if i is None])==0:
                combined_feature=np.sum(np.array(features).reshape(len(features), -1)
                                        *marginalization[1:].reshape(-1, 1), axis=0).astype(np.float32)
                self.features.append(combined_feature)
            if len([i for i in appearance_features if i is None])==0:
                combined_feature=np.sum(
                                np.array(appearance_features).reshape(len(appearance_features), -1)
                                *marginalization[1:].reshape(-1, 1), axis=0).astype(np.float32)
                self.features_2d.append(combined_feature)
        else:
            feature = detections[detection_idx].feature
            appearance_feature = detections[detection_idx].appearance_feature
            if feature is not None:
                if lstm is not None:
                    input_feature = torch.Tensor(feature).type(self.tensor)
                    input_feature = input_feature.unsqueeze(0)
                    with torch.no_grad():
                        if self.hidden is None:
                            output_feature, self.hidden = lstm(input_feature)
                        else:
                            output_feature, self.hidden = lstm(input_feature, self.hidden)
                    output_feature = output_feature.cpu().numpy().squeeze(0)
                else:
                    output_feature = feature
                self.features.append(output_feature)
            if appearance_feature is not None:
                self.features_2d.append(appearance_feature)

    def get_cov(self):
        xyz_cov = self.covariance[:3, :3]
        theta_cov_1 = self.covariance[7, :3]
        theta_cov_2 = self.covariance[7, 7]
        out_cov = np.zeros((6, 6))
        out_cov[:3,:3] = xyz_cov
        out_cov[5, :3] = theta_cov_1
        out_cov[:3, 5] = theta_cov_1
        out_cov[5, 5] = theta_cov_2
        return out_cov

================================================
FILE: src/track_3d.py
================================================
# vim: expandtab:ts=4:sw=4
import numpy as np
import pdb
import torch

class TrackState:
    """
    Enumeration type for the single target track state. Newly created tracks are
    classified as `tentative` until enough evidence has been collected. Then,
    the track state is changed to `confirmed`. Tracks that are no longer alive
    are classified as `deleted` to mark them for removal from the set of active
    tracks.

    """

    Tentative = 1
    Confirmed = 2
    Deleted = 3


class Track_3d:
    """
    A single target track with state space `(x, y, a, h)` and associated
    velocities, where `(x, y)` is the center of the bounding box, `a` is the
    aspect ratio and `h` is the height.

    Parameters
    ----------
    mean : ndarray
        Mean vector of the initial state distribution.
    covariance : ndarray
        Covariance matrix of the initial state distribution.
    track_id : int
        A unique track identifier.
    n_init : int
        Number of consecutive detections before the track is confirmed. The
        track state is set to `Deleted` if a miss occurs within the first
        `n_init` frames.
    max_age : int
        The maximum number of consecutive misses before the track state is
        set to `Deleted`.
    feature : Optional[ndarray]
        Feature vector of the detection this track originates from. If not None,
        this feature is added to the `features` cache.

    Attributes
    ----------
    mean : ndarray
        Mean vector of the initial state distribution.
    covariance : ndarray
        Covariance matrix of the initial state distribution.
    track_id : int
        A unique track identifier.
    hits : int
        Total number of measurement updates.
    age : int
        Total number of frames since first occurance.
    time_since_update : int
        Total number of frames since last measurement update.
    state : TrackState
        The current track state.
    features : List[ndarray]
        A cache of features. On each measurement update, the associated feature
        vector is added to this list.

    """
    def __init__(self, mean, covariance, track_id, n_init, max_age,
                 feature=None, appearance_feature = None, cuda = False, lstm = None):

        self.mean = mean
        self.covariance = covariance
        self.track_id = track_id
        self.hits = 1
        self.age = 1
        self.time_since_update = 0
        self.tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
        self.cuda = cuda
        self.state = TrackState.Tentative
        self.features = []
        self.features_2d = []
        self.hidden = None
        if lstm is None:
            self.features.append(feature)
            self.features_2d.append(appearance_feature)
        else:
            self.feature_update(feature, appearance_feature, lstm)
        self.first_detection = mean[:7]
        self._n_init = n_init
        if self.state == TrackState.Tentative and self.hits >= self._n_init:
            self.state = TrackState.Confirmed
        self._max_age = max_age
        self.matched = True
        self.exiting = False
        self.last_box = None


    def to_tlwh3d(self):
        """Get current position in bounding box format `(box center of bottom face [x, y, z], l, w, h)`.

        Returns
        -------
        ndarray
            The bounding box.

        """

        if self.last_box is not None:
            return self.last_box.box_3d
        else:
            return self.mean[[0,1,2,3,4,5,6]].copy()

    def to_tlwh(self, kf):
        """Get current position in bounding box format `(box center of bottom face [x, y, z], l, w, h)`.

        Returns
        -------
        ndarray
            The bounding box.

        """
        corner_points, _ = kf.calculate_corners(self.mean)
        min_x, min_y = np.amin(corner_points, axis = 0)[:2]
        max_x, max_y = np.amax(corner_points, axis = 0)[:2]
        ret = np.array([min_x, min_y, max_x - min_x, max_y - min_y])
        return ret

    def predict(self, kf):
        """Propagate the state distribution to the current time step using a
        Kalman filter prediction step.

        Parameters
        ----------
        kf : kalman_filter.KalmanFilter
            The Kalman filter.

        """
        self.mean, self.covariance = kf.predict(self.mean, self.covariance)
        self.age += 1
        self.time_since_update += 1

    # @profile
    def update(self, kf, detection, compare_2d=False,
                marginalization=None, detection_idx=None, JPDA=False, lstm = None):
        """Perform Kalman filter measurement update step and update the feature
        cache.

        Parameters
        ----------
        kf : kalman_filter.KalmanFilter
            The Kalman filter.
        detection : Detection
            The associated detection.

        """
        if JPDA:

            detections_2d = [det.tlwh for det in detection]
            if compare_2d:
                detections_3d = None
            else:
                detections_3d = [np.copy(det.box_3d) for det in detection]
                for det in detections_3d:
                    if det[6] - self.mean[6] > np.pi:
                        det[6] -= 2 * np.pi
                    elif det[6] - self.mean[6] < -np.pi:
                        det[6] += 2*np.pi
            self.mean, self.covariance, self.mean_post_3d = kf.update(
                self.mean, self.covariance, detections_2d, detections_3d, marginalization, JPDA)
            self.mean[6] = self.mean[6] % (2 * np.pi)
            self.feature_update(detection, detection_idx, lstm)
            if np.argmax(marginalization) != 0:
                self.matched=True
            else:
                self.matched=False
            if detection_idx < 0:
                self.last_box = None
                return
            self.hits += 1
            self.time_since_update = 0
            detection = detection[detection_idx]
            self.last_box = detection
        else:
            detection = detection[detection_idx]
            detections_3d = detections_3d[detection_idx]
            self.mean, self.covariance = kf.update(
                self.mean, self.covariance, detection.tlwh, detections_3d)

        if self.state == TrackState.Tentative and self.hits >= self._n_init:
            self.state = TrackState.Confirmed

    def mark_missed(self):
        """Mark this track as missed (no association at the current time step).
        """
        if self.state == TrackState.Tentative:
            self.state = TrackState.Deleted
        elif self.time_since_update > self._max_age:
            self.state = TrackState.Deleted

    def is_tentative(self):
        """Returns True if this track is tentative (unconfirmed).
        """
        return self.state == TrackState.Tentative

    def is_confirmed(self):
        """Returns True if this track is confirmed."""
        return self.state == TrackState.Confirmed

    def is_deleted(self):
        """Returns True if this track is dead and should be deleted."""
        return self.state == TrackState.Deleted

    def feature_update(self, detections, detection_idx, lstm, JPDA=False, marginalization=None):
        if JPDA:
            features=[d.feature for d in detections]
            appearance_features=[d.appearance_feature for d in detections]
            if len([i for i in features if i is None])==0:
                combined_feature=np.sum(np.array(features).reshape(len(features), -1)
                                        *marginalization[1:].reshape(-1, 1), axis=0).astype(np.float32)
                self.features.append(combined_feature)
            if len([i for i in appearance_features if i is None])==0:
                combined_feature=np.sum(
                                np.array(appearance_features).reshape(len(appearance_features), -1)
                                *marginalization[1:].reshape(-1, 1), axis=0).astype(np.float32)
                self.features_2d.append(combined_feature)
        else:
            feature = detections[detection_idx].feature
            appearance_feature = detections[detection_idx].appearance_feature
            if feature is not None:
                if lstm is not None:
                    input_feature = torch.Tensor(feature).type(self.tensor)
                    input_feature = input_feature.unsqueeze(0)
                    with torch.no_grad():
                        if self.hidden is None:
                            output_feature, self.hidden = lstm(input_feature)
                        else:
                            output_feature, self.hidden = lstm(input_feature, self.hidden)
                    output_feature = output_feature.cpu().numpy().squeeze(0)
                else:
                    output_feature = feature
                self.features.append(output_feature)
            if appearance_feature is not None:
                self.features_2d.append(appearance_feature)

    def get_cov(self):
        xyz_cov = self.covariance[:3, :3]
        theta_cov_1 = self.covariance[7, :3]
        theta_cov_2 = self.covariance[7, 7]
        out_cov = np.zeros((6, 6))
        out_cov[:3,:3] = xyz_cov
        out_cov[5, :3] = theta_cov_1
        out_cov[:3, 5] = theta_cov_1
        out_cov[5, 5] = theta_cov_2
        return out_cov

================================================
FILE: src/tracker_3d 2.py
================================================
# vim: expandtab:ts=4:sw=4
from __future__ import absolute_import
import numpy as np
import pdb
import double_measurement_kf
import linear_assignment
import iou_matching
from track_3d import Track_3d
import JPDA_matching
import tracking_utils
import math
import torch
from nn_matching import NearestNeighborDistanceMetric

class Tracker_3d:
    """
    This is the multi-target tracker.

    Parameters
    ----------
    metric : nn_matching.NearestNeighborDistanceMetric
        A distance metric for measurement-to-track association.
    max_age : int
        Maximum number of missed misses before a track is deleted.
    n_init : int
        Number of consecutive detections before the track is confirmed. The
        track state is set to `Deleted` if a miss occurs within the first
        `n_init` frames.

    Attributes
    ----------
    metric : nn_matching.NearestNeighborDistanceMetric
        The distance metric used for measurement to track association.
    max_age : int
        Maximum number of missed misses before a track is deleted.
    n_init : int
        Number of frames that a track remains in initialization phase.
    kf : EKF.KalmanFilter
        A Kalman filter to filter target trajectories in image space.
    tracks : List[Track]
        The list of active tracks at the current time step.

    """

    def __init__(self, max_age=30, n_init=3,
                 JPDA=False, m_best_sol=1, assn_thresh=0.0,
                 matching_strategy=None, appearance_model = None,
                 gate_full_state=False, lstm = None, cuda = False, calib=None, omni=False,
                 kf_vel_params=(1./20, 1./160, 1, 1, 2), dummy_node_cost_iou=0.4, dummy_node_cost_app=0.2, nn_budget = None, use_imm=False,
                 markov=(0.9, 0.7), uncertainty_limit=1.8, optical_flow=False, gate_limit=400, dummy_node_cost_iou_2d=0.5):

        self.metric = NearestNeighborDistanceMetric("euclidean", nn_budget)
        self.max_age = max_age
        self.n_init = n_init
        self.kf = double_measurement_kf.KF_3D(calib, *kf_vel_params, omni=omni)
        self.tracks = []
        self._next_id = 1
        self.JPDA = JPDA
        self.m_best_sol = m_best_sol
        self.assn_thresh = assn_thresh
        self.matching_strategy = matching_strategy
        self.gate_only_position = not gate_full_state
        self.lstm = lstm
        self.cuda = cuda
        self.dummy_node_cost_app = dummy_node_cost_app
        self.dummy_node_cost_iou = dummy_node_cost_iou
        self.dummy_node_cost_iou_2d = dummy_node_cost_iou_2d
        self.appearance_model = appearance_model

    # @profile
    def gated_metric(self, tracks, dets, track_indices, detection_indices, compare_2d=None):
        targets = np.array([tracks[i].track_id for i in track_indices])
        if not compare_2d and self.metric.check_samples(targets):
            compare_2d = True
        if compare_2d:
            features = torch.stack([dets[i].appearance_feature for i in detection_indices], dim=0)
        else:
            features = torch.stack([dets[i].feature for i in detection_indices], dim=0)
        #cost_matrix = self.metric.distance(features, targets, compare_2d)
        cost_matrix_appearance = self.metric.distance_torch(features, targets, compare_2d)
        use_3d = not compare_2d
        # for i in detection_indices:
        #     if dets[i].box_3d is None:
        #         use_3d = False
        #         break
        if use_3d:
            cost_matrix_iou = iou_matching.iou_cost(tracks, dets, track_indices, detection_indices, use3d=use_3d)
        else:
            cost_matrix_iou = iou_matching.iou_cost(tracks, dets, track_indices, detection_indices, use3d=use_3d, kf=self.kf)
        dets_for_gating = dets

        gate_mask = linear_assignment.gate_cost_matrix(
            self.kf, tracks, dets_for_gating, track_indices,
            detection_indices, only_position=self.gate_only_position, use3d=use_3d)
        cost_matrix = np.dstack((cost_matrix_appearance, cost_matrix_iou))

        return cost_matrix, gate_mask

    def predict(self):
        """Propagate track state distributions one time step forward.

        This function should be called once every time step, before `update`.
        """
        for track in self.tracks:
            track.predict(self.kf)

    # @profile
    def update(self, input_img, detections):
        """Perform measurement update and track management.

        Parameters
        ----------
        detections : List[deep_sort.detection.Detection]
            A list of detections at the current time step.

        """

        matches, unmatched_tracks, unmatched_detections = \
            self._match(detections)

        # update filter for each assigned track
        # Only do this for non-JPDA because in JPDA the kf states are updated
        # during the matching process
        # update track state for unmatched tracks
        for track_idx in unmatched_tracks:
            self.tracks[track_idx].mark_missed()
        
        self.prune_tracks()
        # create new tracks
        for detection_idx in unmatched_detections:
            self._initiate_track(detections[detection_idx])

         # Update distance metric.
        active_targets = [t.track_id for t in self.tracks]
        features, features_2d, targets, targets_2d = [], [], [], []
        for track in self.tracks:
            features += track.features
            features_2d += track.features_2d
            targets += [track.track_id for _ in track.features]
            targets_2d += [track.track_id for _ in track.features_2d]
            track.features = []
            track.features_2d = []

        self.metric.partial_fit(
            features, features_2d, targets, targets_2d, active_targets)

    # @profile
    def _match(self, detections):

        # Associate confirmed tracks using appearance features.
        if self.JPDA:
            # Only run JPDA on confirmed tracks
            det_3d_idx = []
            det_2d_idx = []
            for idx, det in enumerate(detections):
                if det.box_3d is not None:
                    det_3d_idx.append(idx)
                else:
                    det_2d_idx.append(idx)
            marginalizations = \
                linear_assignment.JPDA(self.gated_metric, self.dummy_node_cost_app,
                                       self.dummy_node_cost_iou, self.tracks, \
                                       detections, compare_2d=False,
                                       detection_indices=det_3d_idx)
            #print(marginalizations) 
            dets_matching_3d = [d for i, d in enumerate(detections) if i in det_3d_idx]
            jpda_matcher = JPDA_matching.Matcher(
                detections, marginalizations, range(len(self.tracks)),
                self.matching_strategy, assignment_threshold=self.assn_thresh)
            matches_a, unmatched_tracks_a, unmatched_detections = jpda_matcher.match()

            # Map matched tracks to detections
            track_detection_map = {t:d for (t,d) in matches_a}

            # Map unmatched tracks to -1 for no detection
            for t in unmatched_tracks_a:
                track_detection_map[t] = -1
            if det_2d_idx:
                marginalizations_2d = \
                linear_assignment.JPDA(self.gated_metric, self.dummy_node_cost_app, self.dummy_node_cost_iou_2d, self.tracks, \
                    detections, compare_2d=True, detection_indices=det_2d_idx, track_indices=unmatched_tracks_a)
                dets_matching_2d = [d for i, d in enumerate(detections) if i in det_2d_idx]
                jpda_matcher = JPDA_matching.Matcher(
                dets_matching_2d, marginalizations_2d, range(len(unmatched_tracks_a)),
                self.matching_strategy, assignment_threshold=self.assn_thresh)
                matches_a, unmatched_tracks_2d, unmatched_detections = jpda_matcher.match()

                track_detection_map_2d = {unmatched_tracks_a[t]:d for (t,d) in matches_a}

                # Map unmatched tracks to -1 for no detection
                for t in unmatched_tracks_2d:
                    track_detection_map_2d[unmatched_tracks_a[t]] = -1
            # udpate Kalman state
            if marginalizations.shape[0] > 0:
                for i in range(len(self.tracks)):
                    if det_2d_idx and i in unmatched_tracks_a:
                        self.tracks[i].update(self.kf, dets_matching_2d,
                            marginalization=marginalizations_2d[unmatched_tracks_a.index(i),:], detection_idx=track_detection_map_2d[i], 
                            JPDA=self.JPDA, lstm = self.lstm, compare_2d=True)
                    else:
                        self.tracks[i].update(self.kf, dets_matching_3d,
                            marginalization=marginalizations[i,:], detection_idx=track_detection_map[i], 
                            JPDA=self.JPDA, lstm = self.lstm)

        else:
            matches_a, unmatched_tracks_a, unmatched_detections = \
                linear_assignment.matching_cascade(
                    self.gated_metric, self.metric.matching_threshold, self.max_age,
                    self.tracks, detections, confirmed_tracks, compare_2d = compare_2d, detections_3d=detections_3d)

        return matches_a, unmatched_tracks_a, unmatched_detections

    def _initiate_track(self, detection):
        if detection.box_3d is None:
            return
        mean, covariance = self.kf.initiate(detection.box_3d)
        self.tracks.append(Track_3d(
            mean, covariance, self._next_id, self.n_init, self.max_age,
            feature=detection.feature, appearance_feature = detection.appearance_feature,
            cuda = self.cuda, lstm = self.lstm))
        self._next_id += 1
    
    def prune_tracks(self):

        # for track in self.tracks:
        #     # Check if track is leaving
        #     predicted_mean = self.kf.predict_mean(track.mean)
        #     predicted_cov = track.covariance
        #     predicted_pos = predicted_mean[:2]
        #     predicted_vel = predicted_mean[4:6]
        #     predicted_pos[0] -= w/2
        #     predicted_pos[1] -= h/2

        #     cos_theta = np.dot(predicted_pos, predicted_vel)/(np.linalg.norm(predicted_pos)*
        #                                             np.linalg.norm(predicted_vel) + 1e-6)
        #     predicted_pos[0] += w/2
        #     predicted_pos[1] += h/2
        #     # Thresholds for deciding whether track is outside image
        #     BORDER_VALUE = 0
        #     if (cos_theta > 0 and
        #         (predicted_pos[0] - track.mean[2]/2<= BORDER_VALUE or
        #         predicted_pos[0] + track.mean[2]/2 >= w - BORDER_VALUE)):
        #         if track.is_exiting() and not track.matched:
        #             track.delete_track()
        #         else:
        #             track.mark_exiting()
            # Check if track is too uncertain
            # cov_axis,_ = np.linalg.eigh(predicted_cov)
            # if np.abs(np.sqrt(cov_axis[-1]))*6 > self.uncertainty_limit*np.linalg.norm(predicted_mean[2:4]):
            #    track.delete_track()
        self.tracks = [t for t in self.tracks if not t.is_deleted()]


================================================
FILE: src/tracker_3d.py
================================================
# vim: expandtab:ts=4:sw=4
from __future__ import absolute_import
import numpy as np
import pdb
import double_measurement_kf
import linear_assignment
import iou_matching
from track_3d import Track_3d
import JPDA_matching
import tracking_utils
import math
import torch
from nn_matching import NearestNeighborDistanceMetric

class Tracker_3d:
    """
    This is the multi-target tracker.

    Parameters
    ----------
    metric : nn_matching.NearestNeighborDistanceMetric
        A distance metric for measurement-to-track association.
    max_age : int
        Maximum number of missed misses before a track is deleted.
    n_init : int
        Number of consecutive detections before the track is confirmed. The
        track state is set to `Deleted` if a miss occurs within the first
        `n_init` frames.

    Attributes
    ----------
    metric : nn_matching.NearestNeighborDistanceMetric
        The distance metric used for measurement to track association.
    max_age : int
        Maximum number of missed misses before a track is deleted.
    n_init : int
        Number of frames that a track remains in initialization phase.
    kf : EKF.KalmanFilter
        A Kalman filter to filter target trajectories in image space.
    tracks : List[Track]
        The list of active tracks at the current time step.

    """

    def __init__(self, max_age=30, n_init=3,
                 JPDA=False, m_best_sol=1, assn_thresh=0.0,
                 matching_strategy=None, appearance_model = None,
                 gate_full_state=False, lstm = None, cuda = False, calib=None, omni=False,
                 kf_vel_params=(1./20, 1./160, 1, 1, 2), dummy_node_cost_iou=0.4, dummy_node_cost_app=0.2, nn_budget = None, use_imm=False,
                 markov=(0.9, 0.7), uncertainty_limit=1.8, optical_flow=False, gate_limit=400, dummy_node_cost_iou_2d=0.5):

        self.metric = NearestNeighborDistanceMetric("euclidean", nn_budget)
        self.max_age = max_age
        self.n_init = n_init
        self.kf = double_measurement_kf.KF_3D(calib, *kf_vel_params, omni=omni)
        self.tracks = []
        self._next_id = 1
        self.JPDA = JPDA
        self.m_best_sol = m_best_sol
        self.assn_thresh = assn_thresh
        self.matching_strategy = matching_strategy
        self.gate_only_position = not gate_full_state
        self.lstm = lstm
        self.cuda = cuda
        self.dummy_node_cost_app = dummy_node_cost_app
        self.dummy_node_cost_iou = dummy_node_cost_iou
        self.dummy_node_cost_iou_2d = dummy_node_cost_iou_2d
        self.appearance_model = appearance_model

    # @profile
    def gated_metric(self, tracks, dets, track_indices, detection_indices, compare_2d=None):
        targets = np.array([tracks[i].track_id for i in track_indices])
        if not compare_2d and self.metric.check_samples(targets):
            compare_2d = True
        if compare_2d:
            features = torch.stack([dets[i].appearance_feature for i in detection_indices], dim=0)
        else:
            features = torch.stack([dets[i].feature for i in detection_indices], dim=0)
        #cost_matrix = self.metric.distance(features, targets, compare_2d)
        cost_matrix_appearance = self.metric.distance_torch(features, targets, compare_2d)
        use_3d = not compare_2d
        # for i in detection_indices:
        #     if dets[i].box_3d is None:
        #         use_3d = False
        #         break
        if use_3d:
            cost_matrix_iou = iou_matching.iou_cost(tracks, dets, track_indices, detection_indices, use3d=use_3d)
        else:
            cost_matrix_iou = iou_matching.iou_cost(tracks, dets, track_indices, detection_indices, use3d=use_3d, kf=self.kf)
        dets_for_gating = dets

        gate_mask = linear_assignment.gate_cost_matrix(
            self.kf, tracks, dets_for_gating, track_indices,
            detection_indices, only_position=self.gate_only_position, use3d=use_3d)
        cost_matrix = np.dstack((cost_matrix_appearance, cost_matrix_iou))

        return cost_matrix, gate_mask

    def predict(self):
        """Propagate track state distributions one time step forward.

        This function should be called once every time step, before `update`.
        """
        for track in self.tracks:
            track.predict(self.kf)

    # @profile
    def update(self, input_img, detections):
        """Perform measurement update and track management.

        Parameters
        ----------
        detections : List[deep_sort.detection.Detection]
            A list of detections at the current time step.

        """

        matches, unmatched_tracks, unmatched_detections = \
            self._match(detections)

        # update filter for each assigned track
        # Only do this for non-JPDA because in JPDA the kf states are updated
        # during the matching process
        # update track state for unmatched tracks
        for track_idx in unmatched_tracks:
            self.tracks[track_idx].mark_missed()
        
        self.prune_tracks()
        # create new tracks
        for detection_idx in unmatched_detections:
            self._initiate_track(detections[detection_idx])

         # Update distance metric.
        active_targets = [t.track_id for t in self.tracks]
        features, features_2d, targets, targets_2d = [], [], [], []
        for track in self.tracks:
            features += track.features
            features_2d += track.features_2d
            targets += [track.track_id for _ in track.features]
            targets_2d += [track.track_id for _ in track.features_2d]
            track.features = []
            track.features_2d = []

        self.metric.partial_fit(
            features, features_2d, targets, targets_2d, active_targets)

    # @profile
    def _match(self, detections):

        # Associate confirmed tracks using appearance features.
        if self.JPDA:
            # Only run JPDA on confirmed tracks
            det_3d_idx = []
            det_2d_idx = []
            for idx, det in enumerate(detections):
                if det.box_3d is not None:
                    det_3d_idx.append(idx)
                else:
                    det_2d_idx.append(idx)
            marginalizations = \
                linear_assignment.JPDA(self.gated_metric, self.dummy_node_cost_app,
                                       self.dummy_node_cost_iou, self.tracks, \
                                       detections, compare_2d=False,
                                       detection_indices=det_3d_idx)
            #print(marginalizations) 
            dets_matching_3d = [d for i, d in enumerate(detections) if i in det_3d_idx]
            jpda_matcher = JPDA_matching.Matcher(
                detections, marginalizations, range(len(self.tracks)),
                self.matching_strategy, assignment_threshold=self.assn_thresh)
            matches_a, unmatched_tracks_a, unmatched_detections = jpda_matcher.match()

            # Map matched tracks to detections
            track_detection_map = {t:d for (t,d) in matches_a}

            # Map unmatched tracks to -1 for no detection
            for t in unmatched_tracks_a:
                track_detection_map[t] = -1
            if det_2d_idx:
                marginalizations_2d = \
                linear_assignment.JPDA(self.gated_metric, self.dummy_node_cost_app, self.dummy_node_cost_iou_2d, self.tracks, \
                    detections, compare_2d=True, detection_indices=det_2d_idx, track_indices=unmatched_tracks_a)
                dets_matching_2d = [d for i, d in enumerate(detections) if i in det_2d_idx]
                jpda_matcher = JPDA_matching.Matcher(
                dets_matching_2d, marginalizations_2d, range(len(unmatched_tracks_a)),
                self.matching_strategy, assignment_threshold=self.assn_thresh)
                matches_a, unmatched_tracks_2d, unmatched_detections = jpda_matcher.match()

                track_detection_map_2d = {unmatched_tracks_a[t]:d for (t,d) in matches_a}

                # Map unmatched tracks to -1 for no detection
                for t in unmatched_tracks_2d:
                    track_detection_map_2d[unmatched_tracks_a[t]] = -1
            # udpate Kalman state
            if marginalizations.shape[0] > 0:
                for i in range(len(self.tracks)):
                    if det_2d_idx and i in unmatched_tracks_a:
                        self.tracks[i].update(self.kf, dets_matching_2d,
                            marginalization=marginalizations_2d[unmatched_tracks_a.index(i),:], detection_idx=track_detection_map_2d[i], 
                            JPDA=self.JPDA, lstm = self.lstm, compare_2d=True)
                    else:
                        self.tracks[i].update(self.kf, dets_matching_3d,
                            marginalization=marginalizations[i,:], detection_idx=track_detection_map[i], 
                            JPDA=self.JPDA, lstm = self.lstm)

        else:
            matches_a, unmatched_tracks_a, unmatched_detections = \
                linear_assignment.matching_cascade(
                    self.gated_metric, self.metric.matching_threshold, self.max_age,
                    self.tracks, detections, confirmed_tracks, compare_2d = compare_2d, detections_3d=detections_3d)

        return matches_a, unmatched_tracks_a, unmatched_detections

    def _initiate_track(self, detection):
        if detection.box_3d is None:
            return
        mean, covariance = self.kf.initiate(detection.box_3d)
        self.tracks.append(Track_3d(
            mean, covariance, self._next_id, self.n_init, self.max_age,
            feature=detection.feature, appearance_feature = detection.appearance_feature,
            cuda = self.cuda, lstm = self.lstm))
        self._next_id += 1
    
    def prune_tracks(self):

        # for track in self.tracks:
        #     # Check if track is leaving
        #     predicted_mean = self.kf.predict_mean(track.mean)
        #     predicted_cov = track.covariance
        #     predicted_pos = predicted_mean[:2]
        #     predicted_vel = predicted_mean[4:6]
        #     predicted_pos[0] -= w/2
        #     predicted_pos[1] -= h/2

        #     cos_theta = np.dot(predicted_pos, predicted_vel)/(np.linalg.norm(predicted_pos)*
        #                                             np.linalg.norm(predicted_vel) + 1e-6)
        #     predicted_pos[0] += w/2
        #     predicted_pos[1] += h/2
        #     # Thresholds for deciding whether track is outside image
        #     BORDER_VALUE = 0
        #     if (cos_theta > 0 and
        #         (predicted_pos[0] - track.mean[2]/2<= BORDER_VALUE or
        #         predicted_pos[0] + track.mean[2]/2 >= w - BORDER_VALUE)):
        #         if track.is_exiting() and not track.matched:
        #             track.delete_track()
        #         else:
        #             track.mark_exiting()
            # Check if track is too uncertain
            # cov_axis,_ = np.linalg.eigh(predicted_cov)
            # if np.abs(np.sqrt(cov_axis[-1]))*6 > self.uncertainty_limit*np.linalg.norm(predicted_mean[2:4]):
            #    track.delete_track()
        self.tracks = [t for t in self.tracks if not t.is_deleted()]


================================================
FILE: src/tracker_3d_node 2.py
================================================
#!/home/sibot/anaconda2/bin/python
""" yolo_bbox_to_sort.py
    Subscribe to the Yolo 2 bboxes, and publish the detections with a 2d appearance feature used for reidentification
"""
import time
import rospy
import ros_numpy
import sys
import numpy as np
import torch
import os
import message_filters
from featurepointnet_model_util import generate_detections_3d, \
    convert_depth_features
from featurepointnet_model import create_depth_model
from calibration import OmniCalibration
from jpda_rospack.msg import detection3d_with_feature_array, \
    detection3d_with_feature, detection2d_with_feature_array
from tracking_utils import convert_detections, combine_features
from combination_model import CombiNet
from tracker_3d import Tracker_3d
from visualization_msgs.msg import MarkerArray, Marker
from std_msgs.msg import Int8
from geometry_msgs.msg import Pose, PoseWithCovariance
from spencer_tracking_msgs.msg import TrackedPerson, TrackedPersons

import pdb


class Tracker_3D_node:
    def __init__(self):
        self.node_name = "tracker_3d"
        
        rospy.init_node(self.node_name)
        rospy.on_shutdown(self.cleanup)

        self.depth_weight = float(rospy.get_param('~combination_depth_weight', 1))
        calibration_folder = rospy.get_param('~calib_3d', 'src/jpda_rospack/calib/')
        calib = OmniCalibration(calibration_folder)
        self.tracker = Tracker_3d(max_age=25, n_init=3,
                                  JPDA=True, m_best_sol=10, assn_thresh=0.6,
                                  matching_strategy='hungarian',
                                  cuda=True, calib=calib, omni=True,
                                  kf_vel_params=(0.08, 0.03, 0.01, 0.03,
                                                 1.2, 3.9, 0.8, 1.6),
                                  dummy_node_cost_iou=0.9, dummy_node_cost_app=6,
                                  nn_budget=3, dummy_node_cost_iou_2d=0.5)

        combination_model_path = rospy.get_param('~combination_model_path', False)
        if combination_model_path:
            self.combination_model = CombiNet()
            checkpoint = torch.load(combination_model_path)
            self.combination_model.load_state_dict(checkpoint['state_dict'])
            try:
                combination_model.cuda()
            except:
                pass
            self.combination_model.eval()
        else:
            self.combination_model = None
        
        self.detection_2d_sub = \
            message_filters.Subscriber("detection2d_with_feature",
                                       detection2d_with_feature_array,
                                       queue_size=5)
        self.detection_3d_sub = \
            message_filters.Subscriber("detection3d_with_feature",
                                       detection3d_with_feature_array,
                                       queue_size=5)
        
        # self.detection_2d_sub.registerCallback(self.find_time_diff_2d)
        # self.detection_3d_sub.registerCallback(self.find_time_diff_3d)
        # self.last_seen_2d = 0
        # self.last_seen_3d = 0
        self.time_sync = \
            message_filters.TimeSynchronizer([self.detection_2d_sub,
                                                         self.detection_3d_sub],
                                                        5)
        self.time_sync.registerCallback(self.do_3d_tracking)
    
        self.tracker_output_pub = rospy.Publisher("/jpda_output", TrackedPersons,
                                                  queue_size=30)
    
        self.debug_pub = rospy.Publisher("/test", Int8, queue_size=1)
        rospy.loginfo("Ready.")
        
    def do_3d_tracking(self, detections_2d, detections_3d):
        start = time.time()
        #rospy.loginfo("Tracking frame")
        # convert_detections
        boxes_2d = []
        boxes_3d = []
        valid_3d = []
        features_2d = []
        features_3d = []
        dets_2d = sorted(detections_2d.detection2d_with_features, key=lambda x:x.frame_det_id)
        dets_3d = sorted(detections_3d.detection3d_with_features, key=lambda x:x.frame_det_id)
        i, j = 0, 0
        while i < len(dets_2d) and j < len(dets_3d):
            det_2d = dets_2d[i]
            det_3d = dets_3d[j]
            if det_2d.frame_det_id == det_3d.frame_det_id:
                i += 1
                j += 1
                valid_3d.append(det_3d.valid)
                boxes_2d.append(np.array([det_2d.x1, det_2d.y1, det_2d.x2, det_2d.y2, 1, -1, -1]))
                features_2d.append(torch.Tensor(det_2d.feature).to('cuda:0'))
                if det_3d.valid:
                    boxes_3d.append(np.array([det_3d.x, det_3d.y, det_3d.z, det_3d.l, det_3d.h, det_3d.w, det_3d.theta]))
                    features_3d.append(torch.Tensor(det_3d.feature).to('cuda:0'))
                else:
                    boxes_3d.append(None)
                    features_3d.append(None)
            elif det_2d.frame_det_id < det_3d.frame_det_id:
                i += 1
            else:
                j += 1
        
        if not boxes_3d:
            boxes_3d = None
        features_3d, features_2d = combine_features(features_2d, features_3d,
                                                    valid_3d, self.combination_model,
                                                    depth_weight=self.depth_weight)
        detections = convert_detections(boxes_2d, features_3d, features_2d, boxes_3d)
        self.tracker.predict()
        self.tracker.update(None, detections)
        tracked_array = TrackedPersons()
        tracked_array.header.stamp = detections_3d.header.stamp
        tracked_array.header.frame_id = 'occam'

        for track in self.tracker.tracks:
            if not track.is_confirmed():
                continue
            #print('Confirmed track!')
            pose_msg = Pose()
            tracked_person_msg = TrackedPerson()
            tracked_person_msg.header.stamp = detections_3d.header.stamp
            tracked_person_msg.header.frame_id = 'occam'
            tracked_person_msg.track_id = track.track_id
            if track.time_since_update < 2:
                tracked_person_msg.is_matched = True
            else:
                tracked_person_msg.is_matched = False
            bbox = track.to_tlwh3d()
            covariance = track.get_cov().reshape(-1).tolist()
            pose_msg.position.x = bbox[0]
            pose_msg.position.y = bbox[1] - bbox[4]/2
            pose_msg.position.z = bbox[2]
            pose_msg = PoseWithCovariance(pose=pose_msg, covariance=covariance)
            tracked_person_msg.pose = pose_msg
            tracked_array.tracks.append(tracked_person_msg)

        self.tracker_output_pub.publish(tracked_array)

        #rospy.loginfo("tracker time: {}".format(time.time() - start))

    def find_time_diff_2d(self, a):
        print(a.header.stamp - self.last_seen_3d)
        self.last_seen_2d = a.header.stamp

    def find_time_diff_3d(self, a):
        print(a.header.stamp - self.last_seen_2d)
        self.last_seen_3d = a.header.stamp

    def cleanup(self):
        print("Shutting down 3D tracking node.")
        del self.combination_model
        del self.tracker
        del self.detection_2d_sub
        del self.detection_3d_sub
        del self.time_sync
        del self.tracker_output_pub
    
def main(args):       
    try:
        Tracker_3D_node()
        rospy.spin()
    except KeyboardInterrupt:
        print("Shutting down 3D tracking node.")

if __name__ == '__main__':
    main(sys.argv)


================================================
FILE: src/tracker_3d_node.py
================================================
#!/home/sibot/anaconda2/bin/python
""" yolo_bbox_to_sort.py
    Subscribe to the Yolo 2 bboxes, and publish the detections with a 2d appearance feature used for reidentification
"""
import time
import rospy
import ros_numpy
import sys
import numpy as np
import torch
import os
import message_filters
from featurepointnet_model_util import generate_detections_3d, \
    convert_depth_features
from featurepointnet_model import create_depth_model
from calibration import OmniCalibration
from jpda_rospack.msg import detection3d_with_feature_array, \
    detection3d_with_feature, detection2d_with_feature_array
from tracking_utils import convert_detections, combine_features
from combination_model import CombiNet
from tracker_3d import Tracker_3d
from visualization_msgs.msg import MarkerArray, Marker
from std_msgs.msg import Int8
from geometry_msgs.msg import Pose, PoseWithCovariance
from spencer_tracking_msgs.msg import TrackedPerson, TrackedPersons

import pdb


class Tracker_3D_node:
    def __init__(self):
        self.node_name = "tracker_3d"
        
        rospy.init_node(self.node_name)
        rospy.on_shutdown(self.cleanup)

        self.depth_weight = float(rospy.get_param('~combination_depth_weight', 1))
        calibration_folder = rospy.get_param('~calib_3d', 'src/jpda_rospack/calib/')
        calib = OmniCalibration(calibration_folder)
        self.tracker = Tracker_3d(max_age=25, n_init=3,
                                  JPDA=True, m_best_sol=10, assn_thresh=0.6,
                                  matching_strategy='hungarian',
                                  cuda=True, calib=calib, omni=True,
                                  kf_vel_params=(0.08, 0.03, 0.01, 0.03,
                                                 1.2, 3.9, 0.8, 1.6),
                                  dummy_node_cost_iou=0.9, dummy_node_cost_app=6,
                                  nn_budget=3, dummy_node_cost_iou_2d=0.5)

        combination_model_path = rospy.get_param('~combination_model_path', False)
        if combination_model_path:
            self.combination_model = CombiNet()
            checkpoint = torch.load(combination_model_path)
            self.combination_model.load_state_dict(checkpoint['state_dict'])
            try:
                combination_model.cuda()
            except:
                pass
            self.combination_model.eval()
        else:
            self.combination_model = None
        
        self.detection_2d_sub = \
            message_filters.Subscriber("detection2d_with_feature",
                                       detection2d_with_feature_array,
                                       queue_size=5)
        self.detection_3d_sub = \
            message_filters.Subscriber("detection3d_with_feature",
                                       detection3d_with_feature_array,
                                       queue_size=5)
        
        # self.detection_2d_sub.registerCallback(self.find_time_diff_2d)
        # self.detection_3d_sub.registerCallback(self.find_time_diff_3d)
        # self.last_seen_2d = 0
        # self.last_seen_3d = 0
        self.time_sync = \
            message_filters.TimeSynchronizer([self.detection_2d_sub,
                                                         self.detection_3d_sub],
                                                        5)
        self.time_sync.registerCallback(self.do_3d_tracking)
    
        self.tracker_output_pub = rospy.Publisher("/jpda_output", TrackedPersons,
                                                  queue_size=30)
    
        self.debug_pub = rospy.Publisher("/test", Int8, queue_size=1)
        rospy.loginfo("Ready.")
        
    def do_3d_tracking(self, detections_2d, detections_3d):
        start = time.time()
        #rospy.loginfo("Tracking frame")
        # convert_detections
        boxes_2d = []
        boxes_3d = []
        valid_3d = []
        features_2d = []
        features_3d = []
        dets_2d = sorted(detections_2d.detection2d_with_features, key=lambda x:x.frame_det_id)
        dets_3d = sorted(detections_3d.detection3d_with_features, key=lambda x:x.frame_det_id)
        i, j = 0, 0
        while i < len(dets_2d) and j < len(dets_3d):
            det_2d = dets_2d[i]
            det_3d = dets_3d[j]
            if det_2d.frame_det_id == det_3d.frame_det_id:
                i += 1
                j += 1
                valid_3d.append(det_3d.valid)
                boxes_2d.append(np.array([det_2d.x1, det_2d.y1, det_2d.x2, det_2d.y2, 1, -1, -1]))
                features_2d.append(torch.Tensor(det_2d.feature).to('cuda:0'))
                if det_3d.valid:
                    boxes_3d.append(np.array([det_3d.x, det_3d.y, det_3d.z, det_3d.l, det_3d.h, det_3d.w, det_3d.theta]))
                    features_3d.append(torch.Tensor(det_3d.feature).to('cuda:0'))
                else:
                    boxes_3d.append(None)
                    features_3d.append(None)
            elif det_2d.frame_det_id < det_3d.frame_det_id:
                i += 1
            else:
                j += 1
        
        if not boxes_3d:
            boxes_3d = None
        features_3d, features_2d = combine_features(features_2d, features_3d,
                                                    valid_3d, self.combination_model,
                                                    depth_weight=self.depth_weight)
        detections = convert_detections(boxes_2d, features_3d, features_2d, boxes_3d)
        self.tracker.predict()
        self.tracker.update(None, detections)
        tracked_array = TrackedPersons()
        tracked_array.header.stamp = detections_3d.header.stamp
        tracked_array.header.frame_id = 'occam'

        for track in self.tracker.tracks:
            if not track.is_confirmed():
                continue
            #print('Confirmed track!')
            pose_msg = Pose()
            tracked_person_msg = TrackedPerson()
            tracked_person_msg.header.stamp = detections_3d.header.stamp
            tracked_person_msg.header.frame_id = 'occam'
            tracked_person_msg.track_id = track.track_id
            if track.time_since_update < 2:
                tracked_person_msg.is_matched = True
            else:
                tracked_person_msg.is_matched = False
            bbox = track.to_tlwh3d()
            covariance = track.get_cov().reshape(-1).tolist()
            pose_msg.position.x = bbox[0]
            pose_msg.position.y = bbox[1] - bbox[4]/2
            pose_msg.position.z = bbox[2]
            pose_msg = PoseWithCovariance(pose=pose_msg, covariance=covariance)
            tracked_person_msg.pose = pose_msg
            tracked_array.tracks.append(tracked_person_msg)

        self.tracker_output_pub.publish(tracked_array)

        #rospy.loginfo("tracker time: {}".format(time.time() - start))

    def find_time_diff_2d(self, a):
        print(a.header.stamp - self.last_seen_3d)
        self.last_seen_2d = a.header.stamp

    def find_time_diff_3d(self, a):
        print(a.header.stamp - self.last_seen_2d)
        self.last_seen_3d = a.header.stamp

    def cleanup(self):
        print("Shutting down 3D tracking node.")
        del self.combination_model
        del self.tracker
        del self.detection_2d_sub
        del self.detection_3d_sub
        del self.time_sync
        del self.tracker_output_pub
    
def main(args):       
    try:
        Tracker_3D_node()
        rospy.spin()
    except KeyboardInterrupt:
        print("Shutting down 3D tracking node.")

if __name__ == '__main__':
    main(sys.argv)


================================================
FILE: src/tracking_utils 2.py
================================================
import torch, sys, os, pdb
import numpy as np
from PIL import Image
from scipy.spatial import Delaunay
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir)))
from aligned_reid_utils import load_state_dict
from featurepointnet_model_util import rotate_pc_along_y
from deep_sort_utils import non_max_suppression as deepsort_nms
import math
from detection import Detection


def create_detector(config_path, weight_path, cuda):

    detector = Darknet(config_path)
    detector.load_weights(weight_path)
    if cuda:
        detector.cuda()
    detector.eval()
    return detector

def get_depth_patches(point_cloud, box_3d, ids_3d, rot_angles, num_point = 1024):
    #print(ids_3d)
    depth_patches = []
    for i, box in enumerate(box_3d):
        if ids_3d[i] == -1:
            depth_patches.append(None)
            continue
        box_center = np.asarray([ [box[0], box[1], box[2]] ])
        rotate_pc_along_y(box_center, np.pi/2 + np.squeeze(box[6]))
        box_center = box_center[0]
        rotate_pc_along_y(point_cloud, np.pi/2 + np.squeeze(box[6]))
        x = point_cloud[:, 0]
        y = point_cloud[:, 1]
        z = point_cloud[:, 2]
        idx_1 = np.logical_and(x >= float(box_center[0] - box[3]/2.0), x <= float(box_center[0] + box[3]/2.0))
        idx_2 = np.logical_and(y <= (box_center[1]+0.1), y >= float(box_center[1] - box[4]))
        idx_3 = np.logical_and(z >= float(box_center[2] - box[5]/2.0), z <= float(box_center[2] + box[5]/2.0))
        idx = np.logical_and(idx_1, idx_2)
        idx = np.logical_and(idx, idx_3)
        depth_patch = point_cloud[idx, :]
        rotate_pc_along_y(point_cloud, -(np.squeeze(box[6])+np.pi/2)) #unrotate to prep for next iteration
        rotate_pc_along_y(depth_patch, -(np.squeeze(box[6])+np.pi/2))

        if depth_patch.size == 0:
            ids_3d[i] = -1
            depth_patches.append(None)
        else:
            if depth_patch.shape[0] > num_point:
                pc_in_box_fov = np.expand_dims(depth_patch[np.random.choice(range(depth_patch.shape[0]), size = (num_point), replace=False)], 0)
            else:

                pc_in_box_fov = np.expand_dims(
                            np.vstack([depth_patch,
                            depth_patch[np.random.choice(range(depth_patch.shape[0]), size = (num_point - depth_patch.shape[0]), replace=True)]])
                            , 0)
            depth_patches.append( get_center_view_point_set(pc_in_box_fov, rot_angles[i])[0])

    return depth_patches, ids_3d


def non_max_suppression_3D_prime(detections, boxes_3d, ids_3d, ids_2d, nms_thresh = 1, confidence = None):
    x = [boxes_3d[i][0] for i in range(len(boxes_3d))]
    z = [boxes_3d[i][2] for i in range(len(boxes_3d))]
    l = [boxes_3d[i][5] for i in range(len(boxes_3d))] #[3]
    w = [boxes_3d[i][3] for i in range(len(boxes_3d))] #[5]
    indices = deepsort_nms(boxes_3d, nms_thresh, np.squeeze(confidence))
    for i in range(len(ids_3d)):
        if i not in indices:
            ids_3d[i] = -1
            ids_2d[i] = -1
            boxes_3d[i] = None
            detections[i] = None
    return detections, boxes_3d, ids_2d, ids_3d

def non_max_suppression_3D(depth_patches, ids_3d, ids_2d, nms_thresh = 1, confidence = None):
    #depth_patches list of patches

    if len(depth_patches) == 0:
        return []

    pick = []

    if confidence is not None:
        idxs = np.argsort(confidence)
    else:
        idxs = list(range(len(depth_patches)))

    while len(idxs) > 0:
        last = len(idxs) - 1
        i = idxs[last]
        overlap = np.asarray([iou_3d(depth_patches[i], depth_patches[idxs[x]]) for x in range(last)])
        if np.any(overlap == -np.inf):
            idxs = np.delete(idxs, [last])
            continue
        pick.append(i)
        idxs = np.delete(
            idxs, np.concatenate(
                ([last], np.where(overlap > nms_thresh)[0])))
    for i in range(len(depth_patches)):
        if i not in pick:
            if ids_3d[i]!=-1:
                ids_2d[i] = -1
            ids_3d[i] = -1
    return depth_patches, ids_3d, ids_2d

def iou_3d(patch_1, patch_2):
    #Expecting patches of shape (N, 4) or (N,3) (numpy arrays)
    if patch_2 is None:
        return np.inf
    elif patch_1 is None:
        return -np.inf
    # Unique points
    patch_unique_1 = np.unique(patch_1, axis = 0)
    patch_unique_2 = np.unique(patch_2, axis = 0)
    intersection_points = 0
    for point_1_idx in range(patch_unique_1.shape[0]):
        point_distance = np.sqrt(np.sum((patch_unique_1[point_1_idx]-patch_unique_2)**2, axis = 1))
        intersection_points += np.any(point_distance<0.3)

    union_points = patch_unique_1.shape[0] + patch_unique_2.shape[0] - intersection_points

    iou = intersection_points/union_points

    return iou

def convert_detections(detections, features, appearance_features, detections_3d):
    detection_list = []
    if detections_3d is None:
        detections_3d = [None] * len(detections)
    for detection, feature, appearance_feature, detection_3d in zip(detections, features, appearance_features, detections_3d):
        x1, y1, x2, y2, conf, _, _ = detection
        box_2d = [x1, y1, x2-x1, y2-y1]
        if detection_3d is not None:
            x, y, z, l, w, h, theta = detection_3d
            box_3d = [x, y, z, l, w, h, theta]
        else:
            box_3d = None
        if feature is None:
            detection_list.append(Detection(box_2d, None, conf, appearance_feature, feature))
        else:
            detection_list.append(Detection(box_2d, box_3d, conf, appearance_feature, feature))

    return detection_list

def combine_features(features, depth_features, ids_3d, combination_model, depth_weight=1):

    combined_features = []
    appearance_features = []
    for i, (appearance_feature, depth_feature) in enumerate(zip(features, depth_features)):
        if not ids_3d[i]:
            depth_feature = torch.zeros(512, device=torch.device("cuda:0"))
        # appearance_feature = torch.zeros(512, device=torch.device("cuda:0"))
        combined_features.append(torch.cat([appearance_feature, depth_feature* depth_weight]))
        appearance_features.append(appearance_feature)

    if combination_model is not None and len(combined_features) > 0:
        combination_model.eval()
        combined_feature = torch.stack(combined_features)
        combined_features = combination_model(combined_feature).detach()
        combined_features = list(torch.unbind(combined_features))
    return combined_features, appearance_features

def filter(detections):
    for i, det in enumerate(detections): #Note image is 1242 x 375
        left = det[0]
        top = det[1]
        right = det[2]
        bottom = det[3]
        if (left < 10 or right > 1232) and (top < 10 or bottom > 365):
            detections[i] = None
    return detections


================================================
FILE: src/tracking_utils.py
================================================
import torch, sys, os, pdb
import numpy as np
from PIL import Image
from scipy.spatial import Delaunay
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir)))
from aligned_reid_utils import load_state_dict
from featurepointnet_model_util import rotate_pc_along_y
from deep_sort_utils import non_max_suppression as deepsort_nms
import math
from detection import Detection


def create_detector(config_path, weight_path, cuda):

    detector = Darknet(config_path)
    detector.load_weights(weight_path)
    if cuda:
        detector.cuda()
    detector.eval()
    return detector

def get_depth_patches(point_cloud, box_3d, ids_3d, rot_angles, num_point = 1024):
    #print(ids_3d)
    depth_patches = []
    for i, box in enumerate(box_3d):
        if ids_3d[i] == -1:
            depth_patches.append(None)
            continue
        box_center = np.asarray([ [box[0], box[1], box[2]] ])
        rotate_pc_along_y(box_center, np.pi/2 + np.squeeze(box[6]))
        box_center = box_center[0]
        rotate_pc_along_y(point_cloud, np.pi/2 + np.squeeze(box[6]))
        x = point_cloud[:, 0]
        y = point_cloud[:, 1]
        z = point_cloud[:, 2]
        idx_1 = np.logical_and(x >= float(box_center[0] - box[3]/2.0), x <= float(box_center[0] + box[3]/2.0))
        idx_2 = np.logical_and(y <= (box_center[1]+0.1), y >= float(box_center[1] - box[4]))
        idx_3 = np.logical_and(z >= float(box_center[2] - box[5]/2.0), z <= float(box_center[2] + box[5]/2.0))
        idx = np.logical_and(idx_1, idx_2)
        idx = np.logical_and(idx, idx_3)
        depth_patch = point_cloud[idx, :]
        rotate_pc_along_y(point_cloud, -(np.squeeze(box[6])+np.pi/2)) #unrotate to prep for next iteration
        rotate_pc_along_y(depth_patch, -(np.squeeze(box[6])+np.pi/2))

        if depth_patch.size == 0:
            ids_3d[i] = -1
            depth_patches.append(None)
        else:
            if depth_patch.shape[0] > num_point:
                pc_in_box_fov = np.expand_dims(depth_patch[np.random.choice(range(depth_patch.shape[0]), size = (num_point), replace=False)], 0)
            else:

                pc_in_box_fov = np.expand_dims(
                            np.vstack([depth_patch,
                            depth_patch[np.random.choice(range(depth_patch.shape[0]), size = (num_point - depth_patch.shape[0]), replace=True)]])
                            , 0)
            depth_patches.append( get_center_view_point_set(pc_in_box_fov, rot_angles[i])[0])

    return depth_patches, ids_3d


def non_max_suppression_3D_prime(detections, boxes_3d, ids_3d, ids_2d, nms_thresh = 1, confidence = None):
    x = [boxes_3d[i][0] for i in range(len(boxes_3d))]
    z = [boxes_3d[i][2] for i in range(len(boxes_3d))]
    l = [boxes_3d[i][5] for i in range(len(boxes_3d))] #[3]
    w = [boxes_3d[i][3] for i in range(len(boxes_3d))] #[5]
    indices = deepsort_nms(boxes_3d, nms_thresh, np.squeeze(confidence))
    for i in range(len(ids_3d)):
        if i not in indices:
            ids_3d[i] = -1
            ids_2d[i] = -1
            boxes_3d[i] = None
            detections[i] = None
    return detections, boxes_3d, ids_2d, ids_3d

def non_max_suppression_3D(depth_patches, ids_3d, ids_2d, nms_thresh = 1, confidence = None):
    #depth_patches list of patches

    if len(depth_patches) == 0:
        return []

    pick = []

    if confidence is not None:
        idxs = np.argsort(confidence)
    else:
        idxs = list(range(len(depth_patches)))

    while len(idxs) > 0:
        last = len(idxs) - 1
        i = idxs[last]
        overlap = np.asarray([iou_3d(depth_patches[i], depth_patches[idxs[x]]) for x in range(last)])
        if np.any(overlap == -np.inf):
            idxs = np.delete(idxs, [last])
            continue
        pick.append(i)
        idxs = np.delete(
            idxs, np.concatenate(
                ([last], np.where(overlap > nms_thresh)[0])))
    for i in range(len(depth_patches)):
        if i not in pick:
            if ids_3d[i]!=-1:
                ids_2d[i] = -1
            ids_3d[i] = -1
    return depth_patches, ids_3d, ids_2d

def iou_3d(patch_1, patch_2):
    #Expecting patches of shape (N, 4) or (N,3) (numpy arrays)
    if patch_2 is None:
        return np.inf
    elif patch_1 is None:
        return -np.inf
    # Unique points
    patch_unique_1 = np.unique(patch_1, axis = 0)
    patch_unique_2 = np.unique(patch_2, axis = 0)
    intersection_points = 0
    for point_1_idx in range(patch_unique_1.shape[0]):
        point_distance = np.sqrt(np.sum((patch_unique_1[point_1_idx]-patch_unique_2)**2, axis = 1))
        intersection_points += np.any(point_distance<0.3)

    union_points = patch_unique_1.shape[0] + patch_unique_2.shape[0] - intersection_points

    iou = intersection_points/union_points

    return iou

def convert_detections(detections, features, appearance_features, detections_3d):
    detection_list = []
    if detections_3d is None:
        detections_3d = [None] * len(detections)
    for detection, feature, appearance_feature, detection_3d in zip(detections, features, appearance_features, detections_3d):
        x1, y1, x2, y2, conf, _, _ = detection
        box_2d = [x1, y1, x2-x1, y2-y1]
        if detection_3d is not None:
            x, y, z, l, w, h, theta = detection_3d
            box_3d = [x, y, z, l, w, h, theta]
        else:
            box_3d = None
        if feature is None:
            detection_list.append(Detection(box_2d, None, conf, appearance_feature, feature))
        else:
            detection_list.append(Detection(box_2d, box_3d, conf, appearance_feature, feature))

    return detection_list

def combine_features(features, depth_features, ids_3d, combination_model, depth_weight=1):

    combined_features = []
    appearance_features = []
    for i, (appearance_feature, depth_feature) in enumerate(zip(features, depth_features)):
        if not ids_3d[i]:
            depth_feature = torch.zeros(512, device=torch.device("cuda:0"))
        # appearance_feature = torch.zeros(512, device=torch.device("cuda:0"))
        combined_features.append(torch.cat([appearance_feature, depth_feature* depth_weight]))
        appearance_features.append(appearance_feature)

    if combination_model is not None and len(combined_features) > 0:
        combination_model.eval()
        combined_feature = torch.stack(combined_features)
        combined_features = combination_model(combined_feature).detach()
        combined_features = list(torch.unbind(combined_features))
    return combined_features, appearance_features

def filter(detections):
    for i, det in enumerate(detections): #Note image is 1242 x 375
        left = det[0]
        top = det[1]
        right = det[2]
        bottom = det[3]
        if (left < 10 or right > 1232) and (top < 10 or bottom > 365):
            detections[i] = None
    return detections