Repository: StanfordVL/JRMOT_ROS
Branch: master
Commit: ca1e87e51ecf
Files: 100
Total size: 715.3 KB
Directory structure:
gitextract_mbelvxqp/
├── CMakeLists.txt
├── LICENSE
├── README.md
├── calib/
│ ├── cameras.yaml
│ └── defaults.yaml
├── config/
│ └── featurepointnet.cfg
├── launch/
│ └── jpda_tracker.launch
├── msg/
│ ├── __init__.py
│ ├── detection2d_with_feature.msg
│ ├── detection2d_with_feature_array.msg
│ ├── detection3d_with_feature.msg
│ └── detection3d_with_feature_array.msg
├── package.xml
├── paper_experiments/
│ ├── models/
│ │ ├── __init__.py
│ │ ├── aligned_reid_model.py
│ │ ├── combination_model.py
│ │ ├── deep_sort_model.py
│ │ ├── featurepointnet_model.py
│ │ ├── pointnet_model.py
│ │ ├── resnet_reid_models.py
│ │ └── yolo_models.py
│ ├── requirements.txt
│ ├── track.py
│ └── utils/
│ ├── EKF.py
│ ├── JPDA_matching.py
│ ├── aligned_reid_utils.py
│ ├── assign_ids_detections.py
│ ├── calibration.py
│ ├── combine_and_process_detections.py
│ ├── dataset.py
│ ├── deep_sort_utils.py
│ ├── detection.py
│ ├── double_measurement_kf.py
│ ├── evaluate_detections.py
│ ├── featurepointnet_model_util.py
│ ├── featurepointnet_tf_util.py
│ ├── imm.py
│ ├── iou_matching.py
│ ├── kf_2d.py
│ ├── kf_3d.py
│ ├── linear_assignment.py
│ ├── logger.py
│ ├── mbest_ilp.py
│ ├── nn_matching.py
│ ├── pointnet_tf_util.py
│ ├── pointnet_transform_nets.py
│ ├── read_detections.py
│ ├── resnet_reid_utils.py
│ ├── test_jpda.py
│ ├── test_kf/
│ │ ├── .gitignore
│ │ ├── run_kf_test.py
│ │ ├── single_track_4state_test.p.val
│ │ ├── single_track_6state_test.p.val
│ │ ├── two_track_4state_test.p.val
│ │ └── write_kf_test.py
│ ├── track.py
│ ├── track_3d.py
│ ├── tracker.py
│ ├── tracker_3d.py
│ ├── tracking_utils.py
│ ├── visualise.py
│ └── yolo_utils/
│ ├── __init__.py
│ ├── datasets.py
│ ├── parse_config.py
│ └── utils.py
├── requirements.txt
└── src/
├── 3d_detector.py
├── EKF.py
├── JPDA_matching.py
├── __init__.py
├── aligned_reid_model.py
├── aligned_reid_utils.py
├── calibration.py
├── combination_model.py
├── deep_sort_utils.py
├── detection.py
├── distances.py
├── double_measurement_kf.py
├── evaluation/
│ ├── __init__.py
│ ├── distances 2.py
│ └── distances.py
├── featurepointnet_model.py
├── featurepointnet_model_util.py
├── featurepointnet_tf_util.py
├── iou_matching.py
├── kf_2d.py
├── linear_assignment.py
├── mbest_ilp.py
├── nn_matching.py
├── pointnet_model.py
├── template 2.py
├── template.py
├── track_3d 2.py
├── track_3d.py
├── tracker_3d 2.py
├── tracker_3d.py
├── tracker_3d_node 2.py
├── tracker_3d_node.py
├── tracking_utils 2.py
└── tracking_utils.py
================================================
FILE CONTENTS
================================================
================================================
FILE: CMakeLists.txt
================================================
cmake_minimum_required(VERSION 2.8.3)
project(jpda_rospack)
## Compile as C++11, supported in ROS Kinetic and newer
# add_compile_options(-std=c++11)
## Find catkin macros and libraries
## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz)
## is used, also find other catkin packages
find_package(catkin REQUIRED COMPONENTS
roscpp
rospy
std_msgs
vision_msgs
message_generation
)
## System dependencies are found with CMake's conventions
# find_package(Boost REQUIRED COMPONENTS system)
## Uncomment this if the package has a setup.py. This macro ensures
## modules and global scripts declared therein get installed
## See http://ros.org/doc/api/catkin/html/user_guide/setup_dot_py.html
# catkin_python_setup()
################################################
## Declare ROS messages, services and actions ##
################################################
## To declare and build messages, services or actions from within this
## package, follow these steps:
## * Let MSG_DEP_SET be the set of packages whose message types you use in
## your messages/services/actions (e.g. std_msgs, actionlib_msgs, ...).
## * In the file package.xml:
## * add a build_depend tag for "message_generation"
## * add a build_depend and a exec_depend tag for each package in MSG_DEP_SET
## * If MSG_DEP_SET isn't empty the following dependency has been pulled in
## but can be declared for certainty nonetheless:
## * add a exec_depend tag for "message_runtime"
## * In this file (CMakeLists.txt):
## * add "message_generation" and every package in MSG_DEP_SET to
## find_package(catkin REQUIRED COMPONENTS ...)
## * add "message_runtime" and every package in MSG_DEP_SET to
## catkin_package(CATKIN_DEPENDS ...)
## * uncomment the add_*_files sections below as needed
## and list every .msg/.srv/.action file to be processed
## * uncomment the generate_messages entry below
## * add every package in MSG_DEP_SET to generate_messages(DEPENDENCIES ...)
## Generate messages in the 'msg' folder
add_message_files(
FILES
detection2d_with_feature.msg
detection2d_with_feature_array.msg
detection3d_with_feature.msg
detection3d_with_feature_array.msg
)
## Generate services in the 'srv' folder
# add_service_files(
# FILES
# Service1.srv
# Service2.srv
# )
## Generate actions in the 'action' folder
# add_action_files(
# FILES
# Action1.action
# Action2.action
# )
## Generate added messages and services with any dependencies listed here
generate_messages(
DEPENDENCIES
std_msgs
vision_msgs
jpda_rospack
)
################################################
## Declare ROS dynamic reconfigure parameters ##
################################################
## To declare and build dynamic reconfigure parameters within this
## package, follow these steps:
## * In the file package.xml:
## * add a build_depend and a exec_depend tag for "dynamic_reconfigure"
## * In this file (CMakeLists.txt):
## * add "dynamic_reconfigure" to
## find_package(catkin REQUIRED COMPONENTS ...)
## * uncomment the "generate_dynamic_reconfigure_options" section below
## and list every .cfg file to be processed
## Generate dynamic reconfigure parameters in the 'cfg' folder
# generate_dynamic_reconfigure_options(
# cfg/DynReconf1.cfg
# cfg/DynReconf2.cfg
# )
###################################
## catkin specific configuration ##
###################################
## The catkin_package macro generates cmake config files for your package
## Declare things to be passed to dependent projects
## INCLUDE_DIRS: uncomment this if your package contains header files
## LIBRARIES: libraries you create in this project that dependent projects also need
## CATKIN_DEPENDS: catkin_packages dependent projects also need
## DEPENDS: system dependencies of this project that dependent projects also need
catkin_package(
# INCLUDE_DIRS include
# LIBRARIES jpda_rospack
CATKIN_DEPENDS roscpp rospy std_msgs vision_msgs message_runtime
# DEPENDS system_lib
)
###########
## Build ##
###########
## Specify additional locations of header files
## Your package locations should be listed before other locations
include_directories(
# include
${catkin_INCLUDE_DIRS}
)
## Declare a C++ library
# add_library(${PROJECT_NAME}
# src/${PROJECT_NAME}/jpda_rospack.cpp
# )
## Add cmake target dependencies of the library
## as an example, code may need to be generated before libraries
## either from message generation or dynamic reconfigure
# add_dependencies(${PROJECT_NAME} ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})
## Declare a C++ executable
## With catkin_make all packages are built within a single CMake context
## The recommended prefix ensures that target names across packages don't collide
# add_executable(${PROJECT_NAME}_node src/jpda_rospack_node.cpp)
## Rename C++ executable without prefix
## The above recommended prefix causes long target names, the following renames the
## target back to the shorter version for ease of user use
## e.g. "rosrun someones_pkg node" instead of "rosrun someones_pkg someones_pkg_node"
# set_target_properties(${PROJECT_NAME}_node PROPERTIES OUTPUT_NAME node PREFIX "")
## Add cmake target dependencies of the executable
## same as for the library above
# add_dependencies(${PROJECT_NAME}_node ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})
## Specify libraries to link a library or executable target against
# target_link_libraries(${PROJECT_NAME}_node
# ${catkin_LIBRARIES}
# )
#############
## Install ##
#############
# all install targets should use catkin DESTINATION variables
# See http://ros.org/doc/api/catkin/html/adv_user_guide/variables.html
## Mark executable scripts (Python etc.) for installation
## in contrast to setup.py, you can choose the destination
# install(PROGRAMS
# scripts/my_python_script
# DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
# )
## Mark executables and/or libraries for installation
# install(TARGETS ${PROJECT_NAME} ${PROJECT_NAME}_node
# ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
# LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
# RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
# )
## Mark cpp header files for installation
# install(DIRECTORY include/${PROJECT_NAME}/
# DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION}
# FILES_MATCHING PATTERN "*.h"
# PATTERN ".svn" EXCLUDE
# )
## Mark other files for installation (e.g. launch and bag files, etc.)
# install(FILES
# # myfile1
# # myfile2
# DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
# )
#############
## Testing ##
#############
## Add gtest based cpp test target and link libraries
# catkin_add_gtest(${PROJECT_NAME}-test test/test_jpda_rospack.cpp)
# if(TARGET ${PROJECT_NAME}-test)
# target_link_libraries(${PROJECT_NAME}-test ${PROJECT_NAME})
# endif()
## Add folders to be run by python nosetests
# catkin_add_nosetests(test)
================================================
FILE: LICENSE
================================================
MIT License
Copyright (c) 2020 Stanford Vision and Learning Group
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: README.md
================================================
# JRMOT ROS package
The repository contains the code for the work "JRMOT: A Real-Time 3D Multi-Object Tracker and a New Large-Scale Dataset".
Note that due to the global pandemic, this repository is still a work in progress. Updates will be made as soon as possible.
## Introduction
JRMOT is a 3D multi object tracking system that:
- Is real-time
- Is online
- Fuses 2D and 3D information
- Achieves State of the Art performance on KITTI
We also release JRDB:
- A dataset with over 2 million annotated boxes and 3500 time consistent trajectories in 2D and 3D
- Captured in social, human-centric settings
- Captured by our social mobile-manipulator JackRabbot
- Contains 360 degree cylindrical images, stereo camera images, 3D pointclouds and more sensing modalties
All information, including download links for JRDB can be found [here](https://jrdb.stanford.edu).
## JRMOT

- Our system is built on top of state of the art 2D and 3D detectors (mask-RCNN and F-PointNet respectively). These detections are associated with predicted track locations at every time step.
- Association is done via a novel feature fusion, as well as a cost selection procedure, followed by Kalman state gating and JPDA.
- Given the JPDA output, we use both 2D and 3D detections in a novel multi-modal Kalman filter to update the track locations.
## Using the code
There are 3 nodes forming parts of the ROS package:
+ 3d_detector.py: Runs F-PointNet, which performs 3D detection and 3D feature extraction
+ template.py: Runs Aligned-Re-ID, which performs 2D feature extraction
+ tracker_3d_node.py: Performs tracking while taking both 2D detections + features and 3D detections + features as input
The launch file in the folder "launch" launches all 3 nodes.
## Dependencies
The following are dependencies of the code:
+ 2D detector: The 2D detector is not included in this package. To interface with your own 2D detector, please modify the file template.py to subscribe to the correct topic, and also to handle the conversion from ROS message to numpy array.
+ Spencer People Tracking messages: The final tracker output is in a Spencer People Tracking message. Please install this package and include these message types.
+ Various python packages: These can be found in [requirements.txt](./requirements.txt).. Please install all dependencies prior to running the code (including CUDA and cuDNN. Additionally, this code requires a solver called Gurobi. Instructions to install gurobipy can be found [here](https://www.gurobi.com/documentation/9.0/quickstart_mac/the_grb_python_interface_f.html).
+ Weight files: The trained weights, (trained on JRDB) for FPointNet and Aligne-ReID can be found [here](https://drive.google.com/open?id=1YQinMPVWEI44KezS9inXe0mvVnm4aL3s).
## Citation
If you find this work useful, please cite:
```
@INPROCEEDINGS{shenoi2020jrmot,
author={A. {Shenoi} and M. {Patel} and J. {Gwak} and P. {Goebel} and A. {Sadeghian} and H. {Rezatofighi} and R. {Mart\'in-Mart\'in} and S. {Savarese}},
booktitle={2020 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)},
title={JRMOT: A Real-Time 3D Multi-Object Tracker and a New Large-Scale Dataset},
year={2020},
volume={},
number={},
pages={10335-10342},
doi={10.1109/IROS45743.2020.9341635}}
```
If you utilise our dataset, please also cite:
```
@article{martin2019jrdb,
title={JRDB: A dataset and benchmark of egocentric visual perception for navigation in human environments},
author={Mart{\'i}n-Mart{\'i}n, Roberto and Patel, Mihir and Rezatofighi, Hamid and Shenoi, Abhijeet and Gwak, JunYoung and Frankel, Eric and Sadeghian, Amir and Savarese, Silvio},
journal={arXiv preprint arXiv:1910.11792},
year={2019}
}
```
##
================================================
FILE: calib/cameras.yaml
================================================
stitching:
radius: 3360000
rotation: 0
scalewidth: 1831
crop: 1
cameras:
# camera order matters!
sensor_0:
width: 752
height: 480
D: -0.336591 0.159742 0.00012697 -7.22557e-05 -0.0461953
# K = fx 0 cx
# 0 fy cy
# 0 0 1
K: >
476.71 0 350.738
0 479.505 209.532
0 0 1
R: >
0.999994 0.000654539 0.00340293
-0.000654519 1 -6.81963e-06
-0.00340293 4.59231e-06 0.999994
T: -0.0104242 -3.70974 -56.9177
sensor_1:
width: 752
height: 480
D: -0.335073 0.151959 -0.000232061 0.00032014 -0.0396825
K: >
483.254 0 365.33
0 485.78 210.953
0 0 1
R: >
0.305706 -0.00895443 -0.952084
0.0110396 0.999922 -0.00585963
0.952062 -0.0087193 0.305781
T: 0.93957 -4.05131 -52.03
sensor_2:
width: 752
height: 480
D: -0.338469 0.156256 -0.000385467 0.000295485 -0.0401965
K: >
483.911 0 355.144
0 486.466 223.026
0 0 1
R: >
-0.806828 0.0136361 -0.590629
0.00870468 0.999899 0.011194
0.590723 0.00389039 -0.806865
T: -0.25753 -6.54978 -47.7311
sensor_3:
width: 752
height: 480
D: -0.330848 0.14747 8.59247e-05 0.000262599 -0.0385311
K: >
475.807 0 339.53
0 478.371 188.481
0 0 1
R: >
-0.811334 0.0033829 0.584574
0.00046071 0.999987 -0.00514746
-0.584583 -0.00390699 -0.811324
T: 2.72207 -6.82928 -45.9778
sensor_4:
width: 752
height: 480
D: -0.34064 0.168338 0.000147292 0.000229372 -0.0516133
K: >
485.046 0 368.864
0 488.185 208.215
0 0 1
R: >
0.310275 0.00160497 0.950645
-0.00648686 0.999979 0.000428942
-0.950625 -0.00629979 0.310279
T: -0.333857 -5.12974 -56.0573
sensor_5:
width: 752
height: 480
D: -0.338422 0.163703 -0.000376267 7.73351e-06 -0.0479871
K: >
478.406 0 353.499
0 481.322 190.225
0 0 1
R: >
0.999995 0.00282205 0.00163291
-0.00282345 0.999996 0.000852931
-0.00163049 -0.000857537 0.999998
T: -0.903588 -126.851 -56.6256
sensor_6:
width: 752
height: 480
D: -0.340676 0.165511 -0.00035978 0.000181532 -0.0493721
K: >
480.459 0 362.503
0 482.924 197.949
0 0 1
R: >
0.308288 -0.0110391 -0.951229
-0.000933102 0.999929 -0.0119067
0.951293 0.00455829 0.308256
T: 1.74525 -127.214 -51.7722
sensor_7:
width: 752
height: 480
D: -0.344379 0.170343 -0.000137847 0.000141047 -0.0510536
K: >
486.491 0 361.559
0 489.22 210.547
0 0 1
R: >
-0.808201 0.0313998 -0.588068
0.026057 0.999506 0.0175574
0.588329 -0.00113337 -0.808621
T: -2.56535 -129.191 -47.5803
sensor_8:
width: 752
height: 480
D: -0.331228 0.144696 0.000117553 0.000566449 -0.0343506
K: >
476.708 0 354.16
0 479.424 209.383
0 0 1
R: >
-0.807384 -0.00296577 0.590019
-0.0122001 0.999857 -0.0116688
-0.589901 -0.0166195 -0.807305
T: 3.39727 -129.381 -45.2409
sensor_9:
width: 752
height: 480
D: -0.345189 0.180808 0.000276465 0.000131868 -0.062103
K: >
484.219 0 345.303
0 487.312 192.371
0 0 1
R: >
0.308505 0.00370159 0.951215
-0.00403535 0.999988 -0.00258261
-0.951214 -0.00304174 0.308517
T: 0.354966 -128.218 -54.0617
================================================
FILE: calib/defaults.yaml
================================================
calibrated:
# the lidar_to_rgb parameters allow tweaking of the transformation between lidar and rgb frames
# the default transformation is taken from the TF Tree
# NOTE: applied to the original (sensor/velodyne) frame [x forward, y left, z up]:
lidar_upper_to_rgb:
# in meters: [x,y,z]
translation: [0, 0, -0.33529]
# in radians: [x,y,z]
rotation: [0, 0, 0.085]
lidar_lower_to_rgb:
translation: [0, 0, 0.13511]
rotation: [0, 0, 0]
image:
# all in pixels
width: 3760
height: 480
# y-axis forward pixel offset (e.g. 3760/2 => 1880, b/c center of the cylindrical image is forward)
# TODO: move into calibrated params, when auto-calibration is possible
stitched_image_offset: 1880
frames:
# lookup for people transforms
global: base_link
# name of the rgb360 camera frame to which we wish to transform
rgb360: occam
================================================
FILE: config/featurepointnet.cfg
================================================
[general]
num_point = 1024
model_path = /home/sibot/jr2_catkin_ws/src/jpda_rospack/src/fpointnet_jrdb/model.ckpt
================================================
FILE: launch/jpda_tracker.launch
================================================
================================================
FILE: msg/__init__.py
================================================
================================================
FILE: msg/detection2d_with_feature.msg
================================================
# This message contains a 2D bounding box corresponding to the detection of a person
# Also contains the feature of this person used for re-ID
Header header #header timestamp is time of frame acquisition
uint64 x1 # x coordinate of the top left of the bounding box
uint64 y1 # y coordinate of the top left of the bounding box
uint64 x2 # x coordinate of the bottom right of the bounding box
uint64 y2 # y coordinate of the bottom right of the bounding box
float64[] feature # re-ID feature
uint8 frame_det_id #unique id of this detection within this frame (used for associating 2D and 3D detections)
bool valid # whether detection is valid (within the boundaries of the image and has minimum required size)
================================================
FILE: msg/detection2d_with_feature_array.msg
================================================
Header header
detection2d_with_feature[] detection2d_with_features
================================================
FILE: msg/detection3d_with_feature.msg
================================================
# This message contains a 3D bounding box corresponding to the detection of a person
# Also contains the feature of this person used for re-ID
Header header #header timestamp is time of frame acquisition
float32 x # x coordinate of the center of the bottom face of the bounding box
float32 y # y coordinate of the center of the bottom face of the bounding box
float32 z # x coordinate of the center of the bottom face of the bounding box
float32 l # size of bounding box along x dimension
float32 h # size of bounding box along y dimension
float32 w # size of bounding box along z dimension
float32 theta # rotation of bounding box with respect to the positive x axis
float64[] feature # re-ID feature
uint8 frame_det_id #unique id of this detection within this frame (used for associating 2D and 3D detections)
bool valid # whether detection is valid (enough lidar points)
================================================
FILE: msg/detection3d_with_feature_array.msg
================================================
Header header
detection3d_with_feature[] detection3d_with_features
================================================
FILE: package.xml
================================================
jpda_rospack
0.0.1
The jpda_rospack package
ashenoi
TODO
message_generation
message_runtime
catkin
roscpp
rospy
std_msgs
vision_msgs
roscpp
rospy
std_msgs
vision_msgs
roscpp
rospy
std_msgs
vision_msgs
================================================
FILE: paper_experiments/models/__init__.py
================================================
================================================
FILE: paper_experiments/models/aligned_reid_model.py
================================================
import torch
import torch.nn as nn
import torch.nn.init as init
import torch.nn.functional as F
import torch.utils.model_zoo as model_zoo
import os
import math
class Model(nn.Module):
def __init__(self, local_conv_out_channels=128, num_classes=None):
super(Model, self).__init__()
self.base = resnet50(pretrained=True)
planes = 2048
self.local_conv = nn.Conv2d(planes, local_conv_out_channels, 1)
self.local_bn = nn.BatchNorm2d(local_conv_out_channels)
self.local_relu = nn.ReLU(inplace=True)
if num_classes is not None:
self.fc = nn.Linear(planes, num_classes)
init.normal(self.fc.weight, std=0.001)
init.constant(self.fc.bias, 0)
def forward(self, x):
"""
Returns:
global_feat: shape [N, C]
local_feat: shape [N, H, c]
"""
# shape [N, C, H, W]
feat = self.base(x)
global_feat = F.avg_pool2d(feat, feat.size()[2:])
# shape [N, C]
global_feat = global_feat.view(global_feat.size(0), -1)
# shape [N, C, H, 1]
local_feat = torch.mean(feat, -1, keepdim=True)
local_feat = self.local_relu(self.local_bn(self.local_conv(local_feat)))
# shape [N, H, c]
local_feat = local_feat.squeeze(-1).permute(0, 2, 1)
if hasattr(self, 'fc'):
logits = self.fc(global_feat)
return global_feat, local_feat, logits
return global_feat, local_feat
__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
'resnet152']
model_urls = {
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
}
os.environ["TORCH_HOME"] = "./ResNet_Model"
def conv3x3(in_planes, out_planes, stride=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=False)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes * 4)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self, block, layers):
self.inplanes = 64
super(ResNet, self).__init__()
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
return x
def remove_fc(state_dict):
"""Remove the fc layer parameters from state_dict."""
new_state_dict = state_dict.copy()
for key, value in state_dict.items():
if key.startswith('fc.'):
del new_state_dict[key]
return new_state_dict
def resnet18(pretrained=False):
"""Constructs a ResNet-18 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(BasicBlock, [2, 2, 2, 2])
if pretrained:
model.load_state_dict(remove_fc(model_zoo.load_url(model_urls['resnet18'])))
return model
def resnet34(pretrained=False):
"""Constructs a ResNet-34 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(BasicBlock, [3, 4, 6, 3])
if pretrained:
model.load_state_dict(remove_fc(model_zoo.load_url(model_urls['resnet34'])))
return model
def resnet50(pretrained=False):
"""Constructs a ResNet-50 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(Bottleneck, [3, 4, 6, 3])
if pretrained:
model.load_state_dict(remove_fc(model_zoo.load_url(model_urls['resnet50'], model_dir="./ResNet_Model")))### ADDED MODEL_DIR
return model
def resnet101(pretrained=False):
"""Constructs a ResNet-101 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(Bottleneck, [3, 4, 23, 3])
if pretrained:
model.load_state_dict(
remove_fc(model_zoo.load_url(model_urls['resnet101'])))
return model
def resnet152(pretrained=False):
"""Constructs a ResNet-152 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(Bottleneck, [3, 8, 36, 3])
if pretrained:
model.load_state_dict(
remove_fc(model_zoo.load_url(model_urls['resnet152'])))
return model
================================================
FILE: paper_experiments/models/combination_model.py
================================================
import pdb
import numpy as np
import torch.nn as nn
class CombiNet(nn.Module):
def __init__(self, in_dim = 2560, hidden_units = 512, out_dim = 2560):
super().__init__()
self.fc1 = nn.Linear(in_dim, 2*hidden_units)
# self.bn1 = nn.BatchNorm1d(hidden_units)
self.fc2 = nn.Linear(2*hidden_units, 2*hidden_units)
# self.bn2 = nn.BatchNorm1d(2*hidden_units)
self.fc3 = nn.Linear(2*hidden_units, out_dim)
self.relu = nn.ReLU()
self.apply(weight_init)
def forward(self, x):
# out = nn.functional.normalize(x)
skip = x
out = self.fc1(x)
# out = self.bn1(out)
out = self.relu(out)
out = self.fc2(out)
# out = self.bn2(out)
out = self.relu(out)
out = self.fc3(out)
# out = nn.functional.normalize(out)
out += skip
return out
class CombiLSTM(nn.Module):
def __init__(self, in_dim = 2560, hidden_units = 512, out_dim = 2560):
super().__init__()
self.in_linear1 = nn.Linear(in_dim, hidden_units)
# self.bn1 = nn.BatchNorm1d(hidden_units)
self.in_linear2 = nn.Linear(hidden_units, hidden_units)
self.rnn = nn.LSTM(input_size = hidden_units, hidden_size = hidden_units, dropout = 0)
self.out_linear1 = nn.Linear(hidden_units, hidden_units)
# self.bn2 = nn.BatchNorm1d(hidden_units)
self.out_linear2 = nn.Linear(hidden_units, out_dim)
self.relu = nn.ReLU()
self.apply(weight_init)
def forward(self, x, hidden = None):
out = nn.functional.normalize(x)
skip = out
out = self.in_linear1(out)
# out = self.bn1(out)
out = self.relu(out)
out = self.in_linear2(out)
out = out.unsqueeze(1) #Adding batch dimension
if hidden is None:
out, hidden = self.rnn(out)
else:
out, hidden = self.rnn(out, hidden)
out = out.squeeze(1) #removing batch dimension
out = self.out_linear1(out)
# out = self.bn2(out)
out = self.relu(out)
out = self.out_linear2(out)
out = nn.functional.normalize(out)
out += skip
return out, hidden
def weight_init(m):
if type(m)==nn.Linear:
nn.init.xavier_normal_(m.weight, gain=np.sqrt(2))
elif type(m)==nn.LSTM:
nn.init.xavier_normal_(m.weight_ih_l0)
nn.init.xavier_normal_(m.weight_hh_l0)
================================================
FILE: paper_experiments/models/deep_sort_model.py
================================================
import tensorflow as tf
from skimage.transform import resize
import numpy as np
class ImageEncoder(object):
def __init__(self, checkpoint_filename="weights/deep_sort_weights.pb", input_name="images",
output_name="features"):
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
self.session = tf.Session(config=config)
with tf.gfile.GFile(checkpoint_filename, "rb") as file_handle:
graph_def = tf.GraphDef()
graph_def.ParseFromString(file_handle.read())
tf.import_graph_def(graph_def, name="net")
self.input_var = tf.get_default_graph().get_tensor_by_name(
"net/%s:0" % input_name)
self.output_var = tf.get_default_graph().get_tensor_by_name(
"net/%s:0" % output_name)
assert len(self.output_var.get_shape()) == 2
assert len(self.input_var.get_shape()) == 4
self.feature_dim = self.output_var.get_shape().as_list()[-1]
self.image_shape = self.input_var.get_shape().as_list()[1:]
def __call__(self, data_x):
#Resize input to expected size for model
data_x = resize(data_x[0], self.image_shape, anti_aliasing=True, mode='reflect')
data_x = np.expand_dims(data_x, 0)
out = self.session.run(self.output_var, feed_dict={self.input_var: data_x})
return out
if __name__ == '__main__':
encoder = ImageEncoder()
================================================
FILE: paper_experiments/models/featurepointnet_model.py
================================================
import os, pdb
import numpy as np
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)
import configparser
import utils.featurepointnet_tf_util as tf_util
import utils.featurepointnet_model_util as model_util
from utils.calibration import Calibration, OmniCalibration
batch_size = 45 #TODO: Update if needed?
class FPointNet():
def __init__(self, config_path):
parser = configparser.SafeConfigParser()
parser.read(config_path)
self.num_point = parser.getint('general', 'num_point')
self.model_path = parser.get('general', 'model_path')
with tf.device('/gpu:'+str('0')):
pointclouds_pl, one_hot_vec_pl, labels_pl, centers_pl, \
heading_class_label_pl, heading_residual_label_pl, \
size_class_label_pl, size_residual_label_pl = model_util.placeholder_inputs(batch_size, self.num_point)
is_training_pl = tf.placeholder(tf.bool, shape=())
end_points, depth_feature = self.get_model(pointclouds_pl, one_hot_vec_pl, is_training_pl)
self.object_pointcloud = tf.placeholder(tf.float32, shape=(None, None, 3))
#depth_feature = self.get_depth_feature_op(is_training_pl)
loss = model_util.get_loss(labels_pl, centers_pl, heading_class_label_pl, heading_residual_label_pl, size_class_label_pl, size_residual_label_pl, end_points)
self.saver = tf.train.Saver()
# Create a session
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.allow_soft_placement = True
self.sess = tf.Session(config=config)
#Initialize variables
self.sess.run(tf.global_variables_initializer())
# Restore variables from disk.
self.saver.restore(self.sess, self.model_path)
self.ops = {'pointclouds_pl': pointclouds_pl,
'one_hot_vec_pl': one_hot_vec_pl,
'labels_pl': labels_pl,
'centers_pl': centers_pl,
'heading_class_label_pl': heading_class_label_pl,
'heading_residual_label_pl': heading_residual_label_pl,
'size_class_label_pl': size_class_label_pl,
'size_residual_label_pl': size_residual_label_pl,
'is_training_pl': is_training_pl,
'logits': end_points['mask_logits'],
'center': end_points['center'],
'end_points': end_points,
'depth_feature':depth_feature,
'loss': loss}
# @profile
def __call__(self, input_point_cloud, rot_angle, peds=False):
'''
one_hot_vec = np.zeros((batch_size, 3))
feed_dict = {self.pointclouds_pl: input_point_cloud,
self.one_hot_vec_pl: one_hot_vec,
self.is_training_pl: False}
features = self.sess.run(self.feature,feed_dict=feed_dict)
return features '''
''' Run inference for frustum pointnets in batch mode '''
one_hot_vec = np.zeros((batch_size,3))
if peds:
one_hot_vec[:, 1] = 1
num_batches = input_point_cloud.shape[0]//batch_size + 1
num_inputs = input_point_cloud.shape[0]
if input_point_cloud.shape[0]%batch_size !=0:
input_point_cloud = np.vstack([input_point_cloud, np.zeros((batch_size - input_point_cloud.shape[0]%batch_size, self.num_point, 4))])
else:
num_batches -= 1
logits = np.zeros((input_point_cloud.shape[0], input_point_cloud.shape[1], 2))
centers = np.zeros((input_point_cloud.shape[0], 3))
heading_logits = np.zeros((input_point_cloud.shape[0], model_util.NUM_HEADING_BIN))
heading_residuals = np.zeros((input_point_cloud.shape[0], model_util.NUM_HEADING_BIN))
size_logits = np.zeros((input_point_cloud.shape[0], model_util.NUM_SIZE_CLUSTER))
size_residuals = np.zeros((input_point_cloud.shape[0], model_util.NUM_SIZE_CLUSTER, 3))
mask_mean_prob = np.zeros((input_point_cloud.shape[0],)) # Step scores
heading_prob = np.zeros((input_point_cloud.shape[0],))
size_prob = np.zeros((input_point_cloud.shape[0],))
scores = np.zeros((input_point_cloud.shape[0],)) # 3D box score
features = np.zeros((input_point_cloud.shape[0], 512))
for i in range(num_batches):
ep = self.ops['end_points']
feed_dict = {\
self.ops['pointclouds_pl']: input_point_cloud[i*batch_size: (i+1)*batch_size],
self.ops['one_hot_vec_pl']: one_hot_vec,
self.ops['is_training_pl']: False}
batch_logits, batch_centers, \
batch_heading_scores, batch_heading_residuals, \
batch_size_scores, batch_size_residuals, batch_features = \
self.sess.run([self.ops['logits'], self.ops['center'],
ep['heading_scores'], ep['heading_residuals'],
ep['size_scores'], ep['size_residuals'], self.ops['depth_feature']],
feed_dict=feed_dict)
logits[i*batch_size: (i+1)*batch_size] = batch_logits
centers[i*batch_size: (i+1)*batch_size] = batch_centers
heading_logits[i*batch_size: (i+1)*batch_size] = batch_heading_scores
heading_residuals[i*batch_size: (i+1)*batch_size] = batch_heading_residuals
size_logits[i*batch_size: (i+1)*batch_size] = batch_size_scores
size_residuals[i*batch_size: (i+1)*batch_size] = batch_size_residuals
features[i*batch_size: (i+1)*batch_size] = batch_features[:,0,:]
heading_cls = np.argmax(heading_logits, 1) # B
size_cls = np.argmax(size_logits, 1) # B
heading_res = np.vstack([heading_residuals[i, heading_cls[i]] for i in range(heading_cls.shape[0])])
size_res = np.vstack([size_residuals[i, size_cls[i], :] for i in range(size_cls.shape[0])])
#TODO: Make this accept batches if wanted
boxes = []
for i in range(num_inputs):
box = np.array(model_util.from_prediction_to_label_format(centers[i], heading_cls[i], heading_res[i], size_cls[i], size_res[i], rot_angle[i]))
box[6] = np.squeeze(box[6])
swp = box[5]
box[5] = box[4]
box[4] = swp
boxes.append(box)
boxes = np.vstack(boxes)
return boxes, mask_mean_prob[:num_inputs], features[:num_inputs]
def get_instance_seg_v1_net(self, point_cloud, one_hot_vec, is_training, bn_decay, end_points):
''' 3D instance segmentation PointNet v1 network.
Input:
point_cloud: TF tensor in shape (B,N,4)
frustum point clouds with XYZ and intensity in point channels
XYZs are in frustum coordinate
one_hot_vec: TF tensor in shape (B,3)
length-3 vectors indicating predicted object type
is_training: TF boolean scalar
bn_decay: TF float scalar
end_points: dict
Output:
logits: TF tensor in shape (B,N,2), scores for bkg/clutter and object
end_points: dict
'''
num_point = point_cloud.get_shape()[1].value
net = tf.expand_dims(point_cloud, 2)
net = tf_util.conv2d(net, 64, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv1', bn_decay=bn_decay)
net = tf_util.conv2d(net, 64, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv2', bn_decay=bn_decay)
point_feat = tf_util.conv2d(net, 64, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv3', bn_decay=bn_decay)
net = tf_util.conv2d(point_feat, 128, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv4', bn_decay=bn_decay)
net = tf_util.conv2d(net, 1024, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv5', bn_decay=bn_decay)
global_feat = tf_util.max_pool2d(net, [num_point,1],
padding='VALID', scope='maxpool')
global_feat = tf.concat([global_feat, tf.expand_dims(tf.expand_dims(one_hot_vec, 1), 1)], axis=3)
global_feat_expand = tf.tile(global_feat, [1, num_point, 1, 1])
concat_feat = tf.concat(axis=3, values=[point_feat, global_feat_expand])
net = tf_util.conv2d(concat_feat, 512, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv6', bn_decay=bn_decay)
net = tf_util.conv2d(net, 256, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv7', bn_decay=bn_decay)
net = tf_util.conv2d(net, 128, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv8', bn_decay=bn_decay)
net = tf_util.conv2d(net, 128, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv9', bn_decay=bn_decay)
net = tf_util.dropout(net, is_training, 'dp1', keep_prob=0.5)
logits = tf_util.conv2d(net, 2, [1,1],
padding='VALID', stride=[1,1], activation_fn=None,
scope='conv10')
logits = tf.squeeze(logits, [2]) # BxNxC
return logits, end_points
def get_3d_box_estimation_v1_net(self, object_point_cloud, one_hot_vec,is_training, bn_decay, end_points):
''' 3D Box Estimation PointNet v1 network.
Input:
object_point_cloud: TF tensor in shape (B,M,C)
point clouds in object coordinate
one_hot_vec: TF tensor in shape (B,3)
length-3 vectors indicating predicted object type
Output:
output: TF tensor in shape (B,3+NUM_HEADING_BIN*2+NUM_SIZE_CLUSTER*4)
including box centers, heading bin class scores and residuals,
and size cluster scores and residuals
'''
num_point = object_point_cloud.get_shape()[1].value
net = tf.expand_dims(object_point_cloud, 2)
net = tf_util.conv2d(net, 128, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv-reg1', bn_decay=bn_decay)
net = tf_util.conv2d(net, 128, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv-reg2', bn_decay=bn_decay)
net = tf_util.conv2d(net, 256, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv-reg3', bn_decay=bn_decay)
net = tf_util.conv2d(net, 512, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv-reg4', bn_decay=bn_decay)
features = tf.reduce_max(net, axis = 1)
net = tf_util.max_pool2d(net, [num_point,1],
padding='VALID', scope='maxpool2')
net = tf.squeeze(net, axis=[1,2])
net = tf.concat([net, one_hot_vec], axis=1)
net = tf_util.fully_connected(net, 512, scope='fc1', bn=True,
is_training=is_training, bn_decay=bn_decay)
net = tf_util.fully_connected(net, 256, scope='fc2', bn=True,
is_training=is_training, bn_decay=bn_decay)
# The first 3 numbers: box center coordinates (cx,cy,cz),
# the next NUM_HEADING_BIN*2: heading bin class scores and bin residuals
# next NUM_SIZE_CLUSTER*4: box cluster scores and residuals
output = tf_util.fully_connected(net,
3+model_util.NUM_HEADING_BIN*2+model_util.NUM_SIZE_CLUSTER*4, activation_fn=None, scope='fc3')
return output, end_points, features
def get_model(self, point_cloud, one_hot_vec, is_training, bn_decay=None):
''' Frustum PointNets model. The model predict 3D object masks and
amodel bounding boxes for objects in frustum point clouds.
Input:
point_cloud: TF tensor in shape (B,N,4)
frustum point clouds with XYZ and intensity in point channels
XYZs are in frustum coordinate
one_hot_vec: TF tensor in shape (B,3)
length-3 vectors indicating predicted object type
is_training: TF boolean scalar
bn_decay: TF float scalar
Output:
end_points: dict (map from name strings to TF tensors)
'''
end_points = {}
# 3D Instance Segmentation PointNet
logits, end_points = self.get_instance_seg_v1_net(\
point_cloud, one_hot_vec,
is_training, bn_decay, end_points)
end_points['mask_logits'] = logits
# Masking
# select masked points and translate to masked points' centroid
object_point_cloud_xyz, mask_xyz_mean, end_points = \
model_util.point_cloud_masking(point_cloud, logits, end_points)
# T-Net and coordinate translation
center_delta, end_points = model_util.get_center_regression_net(\
object_point_cloud_xyz, one_hot_vec,
is_training, bn_decay, end_points)
stage1_center = center_delta + mask_xyz_mean # Bx3
end_points['stage1_center'] = stage1_center
# Get object point cloud in object coordinate
object_point_cloud_xyz_new = \
object_point_cloud_xyz - tf.expand_dims(center_delta, 1)
# Amodel Box Estimation PointNet
output, end_points, features = self.get_3d_box_estimation_v1_net(\
object_point_cloud_xyz_new, one_hot_vec,
is_training, bn_decay, end_points)
# Parse output to 3D box parameters
end_points = model_util.parse_output_to_tensors(output, end_points)
end_points['center'] = end_points['center_boxnet'] + stage1_center # Bx3
return end_points, features
def get_depth_feature_op(self, is_training):
net = tf.expand_dims(self.object_pointcloud, 2)
net = tf_util.conv2d(net, 128, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv-reg1', bn_decay=None)
net = tf_util.conv2d(net, 128, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv-reg2', bn_decay=None)
net = tf_util.conv2d(net, 256, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv-reg3', bn_decay=None)
net = tf_util.conv2d(net, 512, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv-reg4', bn_decay=None)
net = tf.reduce_max(net, axis = 1)
return net
def get_depth_feature(self, object_pointcloud):
feed_dict = {self.object_pointcloud:object_pointcloud, self.ops['is_training_pl']:False}
depth_feature = self.sess.run([self.ops['depth_feature']], feed_dict = feed_dict)
return depth_feature
def softmax(self, x):
''' Numpy function for softmax'''
shape = x.shape
probs = np.exp(x - np.max(x, axis=len(shape)-1, keepdims=True))
probs /= np.sum(probs, axis=len(shape)-1, keepdims=True)
return probs
def create_depth_model(model, config_path):
#Note that folder path must be the folder containing the config.yaml file if omni_camera is True
if model == 'FPointNet':
return FPointNet(config_path)
elif model == 'PointNet':
return PointNet(config_path)
================================================
FILE: paper_experiments/models/pointnet_model.py
================================================
import os, pdb
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)
import configparser
from utils.pointnet_transform_nets import input_transform_net, feature_transform_net
import utils.pointnet_tf_util as pointnet_tf_util
class PointNet():
def __init__(self, config_path):
parser = configparser.SafeConfigParser()
parser.read(config_path)
num_points = parser.getint('general', 'num_point')
depth_model_path = parser.get('general', 'depth_model_path')
with tf.device('/gpu:'+str(0)):
self.pointclouds_pl, _ = self.placeholder_inputs(1, num_points)
self.is_training_pl = tf.placeholder(tf.bool, shape=())
# simple model
feature = self.get_model(self.pointclouds_pl, self.is_training_pl)
self.feature = feature
# Add ops to save and restore all the variables.
self.saver = tf.train.Saver()
#Create session
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.allow_soft_placement = True
config.log_device_placement = False
self.sess = tf.Session(config=config)
#Initialize variables
self.sess.run(tf.global_variables_initializer())
#Restore model weights
self.saver.restore(self.sess, depth_model_path)
def __call__(self, input_point_cloud):
feed_dict = {self.pointclouds_pl: input_point_cloud,
self.is_training_pl: False}
features = self.sess.run(self.feature,feed_dict=feed_dict)
return features
def placeholder_inputs(self, batch_size, num_point):
pointclouds_pl = tf.placeholder(tf.float32, shape=(batch_size, None, 3))
labels_pl = tf.placeholder(tf.int32, shape=(batch_size))
return pointclouds_pl, labels_pl
def get_model(self, point_cloud, is_training, bn_decay=None):
""" Classification PointNet, input is BxNx3, output Bx40 """
batch_size = point_cloud.get_shape()[0].value
end_points = {}
with tf.variable_scope('transform_net1', reuse=tf.AUTO_REUSE) as sc:
transform = input_transform_net(point_cloud, is_training, bn_decay, K=3)
point_cloud_transformed = tf.matmul(point_cloud, transform)
input_image = tf.expand_dims(point_cloud_transformed, -1)
net = pointnet_tf_util.conv2d(input_image, 64, [1,3],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv1', bn_decay=bn_decay)
net = pointnet_tf_util.conv2d(net, 64, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv2', bn_decay=bn_decay)
with tf.variable_scope('transform_net2', reuse=tf.AUTO_REUSE) as sc:
transform = feature_transform_net(net, is_training, bn_decay, K=64)
end_points['transform'] = transform
net_transformed = tf.matmul(tf.squeeze(net, axis=[2]), transform)
net_transformed = tf.expand_dims(net_transformed, [2])
net = pointnet_tf_util.conv2d(net_transformed, 64, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv3', bn_decay=bn_decay)
net = pointnet_tf_util.conv2d(net, 128, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv4', bn_decay=bn_decay)
net = pointnet_tf_util.conv2d(net, 1024, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv5', bn_decay=bn_decay)
# Symmetric function: max pooling
net = tf.reduce_max(net, axis = 1)
net = tf.reshape(net, [batch_size, -1])
feature = net
return feature
def get_loss(self, pred, label, end_points, reg_weight=0.001):
""" pred: B*NUM_CLASSES,
label: B, """
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=pred, labels=label)
classify_loss = tf.reduce_mean(loss)
tf.summary.scalar('classify loss', classify_loss)
# Enforce the transformation as orthogonal matrix
transform = end_points['transform'] # BxKxK
K = transform.get_shape()[1].value
mat_diff = tf.matmul(transform, tf.transpose(transform, perm=[0,2,1]))
mat_diff = mat_diff - tf.constant(np.eye(K), dtype=tf.float32)
mat_diff_loss = tf.nn.l2_loss(mat_diff)
tf.summary.scalar('mat loss', mat_diff_loss)
return classify_loss + mat_diff_loss * reg_weight
================================================
FILE: paper_experiments/models/resnet_reid_models.py
================================================
import torch
import torch.nn as nn
from torch.autograd import Variable
import torchvision.models as models
from torchvision import transforms
import torch.nn.functional as F
class FeatureResNet(nn.Module):
def __init__(self,n_layers=50,pretrained=True):
super(FeatureResNet,self).__init__()
if n_layers == 50:
old_model= models.resnet50(pretrained=pretrained)
elif n_layers == 34:
old_model= models.resnet34(pretrained=pretrained)
elif n_layers == 18:
old_model= models.resnet18(pretrained=pretrained)
else:
raise NotImplementedError('resnet%s is not found'%(n_layers))
for name,modules in old_model._modules.items():
if name.find('fc') == -1:
self.add_module(name,modules)
self.output_dim = old_model.fc.in_features
self.pretrained = pretrained
def forward(self,x):
for name,module in self._modules.items():
x = nn.parallel.data_parallel(module, x)
return x.view(x.size(0), -1)
class ResNet(nn.Module):
def __init__(self,n_id,n_layers=50,pretrained=True):
super(ResNet,self).__init__()
if n_layers == 50:
old_model= models.resnet50(pretrained=pretrained)
elif n_layers == 34:
old_model= models.resnet34(pretrained=pretrained)
elif n_layers == 18:
old_model= models.resnet18(pretrained=pretrained)
else:
raise NotImplementedError('resnet%s is not found'%(n_layers))
for name,modules in old_model._modules.items():
self.add_module(name,modules)
self.fc = nn.Linear(self.fc.in_features,n_id)
#########
self.pretrained = pretrained
def forward(self,x):
for name,module in self._modules.items():
if name != 'fc':
x = module(x)
out = self.fc(x.view(x.size(0),-1))
return out, x.view(x.size(0), -1)
class NLayersFC(nn.Module):
def __init__(self, in_dim, out_dim, hidden_dim=1, n_layers=0):
super(NLayersFC, self).__init__()
if n_layers == 0:
model = [nn.Linear(in_dim, out_dim)]
else:
model = []
model += [nn.Linear(in_dim, hidden_dim),
nn.ReLU(True)]
for i in range(n_layers-1):
model += [nn.Linear(hidden_dim, hidden_dim),
nn.ReLU(True)]
model += [nn.Linear(hidden_dim, out_dim)]
self.model = nn.Sequential(*model)
def forward(self, x):
return self.model(x)
class ICT_ResNet(nn.Module):
def __init__(self,n_id,n_color,n_type,n_layers=50,pretrained=True):
super(ICT_ResNet,self).__init__()
if n_layers == 50:
old_model= models.resnet50(pretrained=pretrained)
elif n_layers == 34:
old_model= models.resnet34(pretrained=pretrained)
elif n_layers == 18:
old_model= models.resnet18(pretrained=pretrained)
else:
raise NotImplementedError('resnet%s is not found'%(n_layers))
for name,modules in old_model._modules.items():
self.add_module(name,modules)
self.fc = nn.Linear(self.fc.in_features,n_id)
self.fc_c = nn.Linear(self.fc.in_features,n_color)
self.fc_t = nn.Linear(self.fc.in_features,n_type)
#########
self.pretrained = pretrained
def forward(self,x):
for name,module in self._modules.items():
if name.find('fc')==-1:
x = module(x)
x = x.view(x.size(0),-1)
x_i = self.fc(x)
x_c = self.fc_c(x)
x_t = self.fc_t(x)
return x_i,x_c,x_t
class TripletNet(nn.Module):
def __init__(self, net):
super(TripletNet, self).__init__()
self.net = net
def forward(self, x, y, z):
pred_x, feat_x = self.net(x)
pred_y, feat_y = self.net(y)
pred_z, feat_z = self.net(z)
dist_pos = F.pairwise_distance(feat_x, feat_y, 2)
dist_neg = F.pairwise_distance(feat_x, feat_z, 2)
return dist_pos, dist_neg, pred_x, pred_y, pred_z
if __name__ == '__main__':
netM = ICT_ResNet(n_id=1000,n_color=7,n_type=7,n_layers=18,pretrained=True).cuda()
print(netM)
output = netM(Variable(torch.ones(1,3,224,224).cuda()/2.))
print(output[0].size())
print(output[1].size())
print(output[2].size())
================================================
FILE: paper_experiments/models/yolo_models.py
================================================
from __future__ import division
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np
from PIL import Image
from utils.yolo_utils.parse_config import *
from utils.yolo_utils.utils import build_targets
from collections import defaultdict
import matplotlib.pyplot as plt
import matplotlib.patches as patches
def create_modules(module_defs):
"""
Constructs module list of layer blocks from module configuration in module_defs
"""
hyperparams = module_defs.pop(0)
output_filters = [int(hyperparams["channels"])]
module_list = nn.ModuleList()
for i, module_def in enumerate(module_defs):
modules = nn.Sequential()
if module_def["type"] == "convolutional":
bn = int(module_def["batch_normalize"])
filters = int(module_def["filters"])
kernel_size = int(module_def["size"])
pad = (kernel_size - 1) // 2 if int(module_def["pad"]) else 0
modules.add_module(
"conv_%d" % i,
nn.Conv2d(
in_channels=output_filters[-1],
out_channels=filters,
kernel_size=kernel_size,
stride=int(module_def["stride"]),
padding=pad,
bias=not bn,
),
)
if bn:
modules.add_module("batch_norm_%d" % i, nn.BatchNorm2d(filters))
if module_def["activation"] == "leaky":
modules.add_module("leaky_%d" % i, nn.LeakyReLU(0.1))
elif module_def["type"] == "maxpool":
kernel_size = int(module_def["size"])
stride = int(module_def["stride"])
if kernel_size == 2 and stride == 1:
padding = nn.ZeroPad2d((0, 1, 0, 1))
modules.add_module("_debug_padding_%d" % i, padding)
maxpool = nn.MaxPool2d(
kernel_size=int(module_def["size"]),
stride=int(module_def["stride"]),
padding=int((kernel_size - 1) // 2),
)
modules.add_module("maxpool_%d" % i, maxpool)
elif module_def["type"] == "upsample":
upsample = Interpolate(scale_factor=int(module_def["stride"]), mode="nearest")
modules.add_module("upsample_%d" % i, upsample)
elif module_def["type"] == "route":
layers = [int(x) for x in module_def["layers"].split(",")]
filters = sum([output_filters[layer_i] for layer_i in layers])
modules.add_module("route_%d" % i, EmptyLayer())
elif module_def["type"] == "shortcut":
filters = output_filters[int(module_def["from"])]
modules.add_module("shortcut_%d" % i, EmptyLayer())
elif module_def["type"] == "yolo":
anchor_idxs = [int(x) for x in module_def["mask"].split(",")]
# Extract anchors
anchors = [int(x) for x in module_def["anchors"].split(",")]
anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]
anchors = [anchors[i] for i in anchor_idxs]
num_classes = int(module_def["classes"])
img_height = int(hyperparams["height"])
# Define detection layer
yolo_layer = YOLOLayer(anchors, num_classes, img_height)
modules.add_module("yolo_%d" % i, yolo_layer)
# Register module list and number of output filters
module_list.append(modules)
output_filters.append(filters)
return hyperparams, module_list
class EmptyLayer(nn.Module):
"""Placeholder for 'route' and 'shortcut' layers"""
def __init__(self):
super(EmptyLayer, self).__init__()
class Interpolate(nn.Module):
def __init__(self, scale_factor, mode):
super(Interpolate, self).__init__()
self.interp = nn.functional.interpolate
self.scale_factor = scale_factor
self.mode = mode
def forward(self, x):
x = self.interp(x, scale_factor=self.scale_factor, mode=self.mode)
return x
class YOLOLayer(nn.Module):
"""Detection layer"""
def __init__(self, anchors, num_classes, img_dim):
super(YOLOLayer, self).__init__()
self.anchors = anchors
self.num_anchors = len(anchors)
self.num_classes = num_classes
self.bbox_attrs = 5 + num_classes
self.image_dim = img_dim
self.ignore_thres = 0.5
self.lambda_coord = 1
self.mse_loss = nn.MSELoss(reduction = 'elementwise_mean') # Coordinate loss
self.bce_loss = nn.BCELoss(reduction = 'elementwise_mean') # Confidence loss
self.ce_loss = nn.CrossEntropyLoss() # Class loss
def forward(self, x, targets=None):
nA = self.num_anchors
nB = x.size(0)
nG = x.size(2)
stride = self.image_dim / nG
# Tensors for cuda support
FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor
prediction = x.view(nB, nA, self.bbox_attrs, nG, nG).permute(0, 1, 3, 4, 2).contiguous()
# Get outputs
x = torch.sigmoid(prediction[..., 0]) # Center x
y = torch.sigmoid(prediction[..., 1]) # Center y
w = prediction[..., 2] # Width
h = prediction[..., 3] # Height
pred_conf = torch.sigmoid(prediction[..., 4]) # Conf
pred_cls = torch.sigmoid(prediction[..., 5:]) # Cls pred.
# Calculate offsets for each grid
grid_x = torch.arange(nG).repeat(nG, 1).view([1, 1, nG, nG]).type(FloatTensor)
grid_y = torch.arange(nG).repeat(nG, 1).t().view([1, 1, nG, nG]).type(FloatTensor)
scaled_anchors = FloatTensor([(a_w / stride, a_h / stride) for a_w, a_h in self.anchors])
anchor_w = scaled_anchors[:, 0:1].view((1, nA, 1, 1))
anchor_h = scaled_anchors[:, 1:2].view((1, nA, 1, 1))
# Add offset and scale with anchors
pred_boxes = FloatTensor(prediction[..., :4].shape)
pred_boxes[..., 0] = x.data + grid_x
pred_boxes[..., 1] = y.data + grid_y
pred_boxes[..., 2] = torch.exp(w.data) * anchor_w
pred_boxes[..., 3] = torch.exp(h.data) * anchor_h
# Training
if targets is not None:
if x.is_cuda:
self.mse_loss = self.mse_loss.cuda()
self.bce_loss = self.bce_loss.cuda()
self.ce_loss = self.ce_loss.cuda()
nGT, nCorrect, mask, conf_mask, tx, ty, tw, th, tconf, tcls = build_targets(
pred_boxes=pred_boxes.cpu().data,
pred_conf=pred_conf.cpu().data,
pred_cls=pred_cls.cpu().data,
target=targets.cpu().data,
anchors=scaled_anchors.cpu().data,
num_anchors=nA,
num_classes=self.num_classes,
grid_size=nG,
ignore_thres=self.ignore_thres,
img_dim=self.image_dim,
)
nProposals = int((pred_conf > 0.5).sum().item())
recall = float(nCorrect / nGT) if nGT else 1
precision = float(nCorrect / nProposals)
# Handle masks
mask = Variable(mask.type(ByteTensor))
conf_mask = Variable(conf_mask.type(ByteTensor))
# Handle target variables
tx = Variable(tx.type(FloatTensor), requires_grad=False)
ty = Variable(ty.type(FloatTensor), requires_grad=False)
tw = Variable(tw.type(FloatTensor), requires_grad=False)
th = Variable(th.type(FloatTensor), requires_grad=False)
tconf = Variable(tconf.type(FloatTensor), requires_grad=False)
tcls = Variable(tcls.type(LongTensor), requires_grad=False)
# Get conf mask where gt and where there is no gt
conf_mask_true = mask
conf_mask_false = conf_mask - mask
# Mask outputs to ignore non-existing objects
loss_x = self.mse_loss(x[mask], tx[mask])
loss_y = self.mse_loss(y[mask], ty[mask])
loss_w = self.mse_loss(w[mask], tw[mask])
loss_h = self.mse_loss(h[mask], th[mask])
loss_conf = self.bce_loss(pred_conf[conf_mask_false], tconf[conf_mask_false]) + self.bce_loss(
pred_conf[conf_mask_true], tconf[conf_mask_true]
)
loss_cls = (1 / nB) * self.ce_loss(pred_cls[mask], torch.argmax(tcls[mask], 1))
loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
return (
loss,
loss_x.item(),
loss_y.item(),
loss_w.item(),
loss_h.item(),
loss_conf.item(),
loss_cls.item(),
recall,
precision,
)
else:
# If not in training phase return predictions
output = torch.cat(
(
pred_boxes.view(nB, -1, 4) * stride,
pred_conf.view(nB, -1, 1),
pred_cls.view(nB, -1, self.num_classes),
),
-1,
)
return output
class Darknet(nn.Module):
"""YOLOv3 object detection model"""
def __init__(self, config_path):
super(Darknet, self).__init__()
self.module_defs = parse_model_config(config_path)
self.hyperparams, self.module_list = create_modules(self.module_defs)
self.seen = 0
self.header_info = np.array([0, 0, 0, self.seen, 0])
self.loss_names = ["x", "y", "w", "h", "conf", "cls", "recall", "precision"]
self.load_weights(self.module_defs[-1]['path'])
def forward(self, x, targets=None):
is_training = targets is not None
output = []
self.losses = defaultdict(float)
layer_outputs = []
for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
if module_def["type"] in ["convolutional", "upsample", "maxpool"]:
x = module(x)
elif module_def["type"] == "route":
layer_i = [int(x) for x in module_def["layers"].split(",")]
x = torch.cat([layer_outputs[i] for i in layer_i], 1)
elif module_def["type"] == "shortcut":
layer_i = int(module_def["from"])
x = layer_outputs[-1] + layer_outputs[layer_i]
elif module_def["type"] == "yolo":
# Train phase: get loss
if is_training:
x, *losses = module[0](x, targets)
for name, loss in zip(self.loss_names, losses):
self.losses[name] += loss
# Test phase: Get detections
else:
x = module(x)
output.append(x)
layer_outputs.append(x)
self.losses["recall"] /= 3
self.losses["precision"] /= 3
return sum(output) if is_training else torch.cat(output, 1)
def load_weights(self, weights_path):
"""Parses and loads the weights stored in 'weights_path'"""
# Open the weights file
fp = open(weights_path, "rb")
header = np.fromfile(fp, dtype=np.int32, count=5) # First five are header values
# Needed to write header when saving weights
self.header_info = header
self.seen = header[3]
weights = np.fromfile(fp, dtype=np.float32) # The rest are weights
fp.close()
ptr = 0
for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
if module_def["type"] == "convolutional":
conv_layer = module[0]
if module_def["batch_normalize"]:
# Load BN bias, weights, running mean and running variance
bn_layer = module[1]
num_b = bn_layer.bias.numel() # Number of biases
# Bias
bn_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.bias)
bn_layer.bias.data.copy_(bn_b)
ptr += num_b
# Weight
bn_w = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.weight)
bn_layer.weight.data.copy_(bn_w)
ptr += num_b
# Running Mean
bn_rm = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_mean)
bn_layer.running_mean.data.copy_(bn_rm)
ptr += num_b
# Running Var
bn_rv = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_var)
bn_layer.running_var.data.copy_(bn_rv)
ptr += num_b
else:
# Load conv. bias
num_b = conv_layer.bias.numel()
conv_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(conv_layer.bias)
conv_layer.bias.data.copy_(conv_b)
ptr += num_b
# Load conv. weights
num_w = conv_layer.weight.numel()
conv_w = torch.from_numpy(weights[ptr : ptr + num_w]).view_as(conv_layer.weight)
conv_layer.weight.data.copy_(conv_w)
ptr += num_w
"""
@:param path - path of the new weights file
@:param cutoff - save layers between 0 and cutoff (cutoff = -1 -> all are saved)
"""
def save_weights(self, path, cutoff=-1):
fp = open(path, "wb")
self.header_info[3] = self.seen
self.header_info.tofile(fp)
# Iterate through layers
for i, (module_def, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):
if module_def["type"] == "convolutional":
conv_layer = module[0]
# If batch norm, load bn first
if module_def["batch_normalize"]:
bn_layer = module[1]
bn_layer.bias.data.cpu().numpy().tofile(fp)
bn_layer.weight.data.cpu().numpy().tofile(fp)
bn_layer.running_mean.data.cpu().numpy().tofile(fp)
bn_layer.running_var.data.cpu().numpy().tofile(fp)
# Load conv bias
else:
conv_layer.bias.data.cpu().numpy().tofile(fp)
# Load conv weights
conv_layer.weight.data.cpu().numpy().tofile(fp)
fp.close()
================================================
FILE: paper_experiments/requirements.txt
================================================
absl-py==0.7.0
astor==0.7.1
backcall==0.1.0
bleach==3.3.0
catkin-pkg==0.4.12
certifi==2018.11.29
chardet==3.0.4
cloudpickle==0.7.0
cupy==6.0.0
cycler==0.10.0
Cython==0.29.7
dask==2021.10.0
decorator==4.3.2
defusedxml==0.5.0
docutils==0.14
entrypoints==0.3
fastrlock==0.4
ffmpeg==1.4
gast==0.2.2
grpcio==1.18.0
gurobipy==8.1.0
html5lib==0.9999999
idna==2.8
imageio==2.5.0
imageio-ffmpeg==0.3.0
ipydatawidgets==4.0.0
ipykernel==5.1.0
ipympl==0.2.1
ipython==7.16.3
ipython-genutils==0.2.0
ipyvolume==0.5.1
ipywebrtc==0.4.3
ipywidgets==7.4.2
jedi==0.13.2
Jinja2==2.11.3
jsonschema==2.6.0
jupyter-client==5.2.4
jupyter-core==4.4.0
jupyterlab==1.2.21
jupyterlab-server==0.2.0
kiwisolver==1.0.1
lap==0.4.0
lapjv==1.3.1
line-profiler==2.1.1
llvmlite==0.28.0
Markdown==3.0.1
MarkupSafe==1.1.0
matplotlib==3.0.2
mistune==0.8.4
nbconvert==5.4.1
nbformat==4.4.0
networkx==2.2
notebook==6.4.1
numba==0.43.1
numpy==1.21.0
open3d-python==0.7.0.0
opencv-python==4.2.0.32
pandas==0.24.1
pandocfilters==1.4.2
parso==0.3.3
pexpect==4.6.0
pickleshare==0.7.5
Pillow==9.0.0
pptk==0.1.0
prometheus-client==0.5.0
prompt-toolkit==2.0.8
protobuf==3.6.1
ptyprocess==0.6.0
pycocotools==2.0.0
Pygments==2.7.4
pyparsing==2.3.1
pypcd==0.1.1
python-dateutil==2.8.0
python-lzf==0.2.4
pythreejs==2.0.2
pytz==2018.9
PyWavelets==1.0.1
PyYAML==5.4
pyzmq==17.1.2
requests==2.21.0
rospkg==1.1.9
scikit-image==0.14.2
scikit-learn==0.20.2
scipy==1.2.0
seaborn==0.9.0
Send2Trash==1.5.0
six==1.12.0
sklearn==0.0
tensorboard==1.8.0
tensorboardX==1.6
tensorflow-gpu==2.5.2
termcolor==1.1.0
terminado==0.8.1
testpath==0.4.2
toolz==0.9.0
torch==1.0.1
torchvision==0.2.1
tornado==5.1.1
tqdm==4.30.0
traitlets==4.3.2
traittypes==0.2.1
urllib3==1.26.5
wcwidth==0.1.7
Werkzeug==0.15.3
widgetsnbextension==3.4.2
================================================
FILE: paper_experiments/track.py
================================================
import open3d as o3d
import torch
import argparse
import os, pdb, sys, copy, pickle
import time
import random
import numpy as np
import tensorflow as tf
from torch.utils.data import DataLoader
from tqdm import tqdm
from models.aligned_reid_model import Model as aligned_reid_model
from utils.yolo_utils.utils import non_max_suppression, load_classes
from models.combination_model import CombiNet, CombiLSTM
from utils.dataset import SequenceDataset, STIPDataset, collate_fn
from models.deep_sort_model import ImageEncoder as deep_sort_model
from utils.tracker import Tracker
from utils.tracker_3d import Tracker_3d
from utils.deep_sort_utils import non_max_suppression as deepsort_nms
from utils.visualise import draw_track
from utils.read_detections import read_ground_truth_2d_detections, read_ground_truth_3d_detections
from utils.tracking_utils import create_detector, convert_detections, combine_features
from utils.tracking_utils import non_max_suppression_3D, non_max_suppression_3D_prime
from utils.aligned_reid_utils import generate_features, generate_features_batched, get_image_patches, create_appearance_model
from utils.featurepointnet_model_util import generate_detections_3d, convert_depth_features
from models.featurepointnet_model import create_depth_model
def parse_arguments():
parser = argparse.ArgumentParser()
parser.add_argument('--sequence_folder', type=str, default='data/KITTI/sequences/0001', help='path to image sequence')
parser.add_argument('--output_folder', type=str, default='results', help='output folder')
parser.add_argument('--aligned_reid_ckpt', type=str, default='weights/aligned_reid_market_weights.ckpt', help='path to model config file')
parser.add_argument('--resnet_reid_ckpt', type=str, default='weights/resnet_reid.ckpt', help='path to model config file')
parser.add_argument('--depth_model', type=str, default='FPointNet', help='type of depth model to use')
parser.add_argument('--depth_config_path', type=str, default='config/featurepointnet.cfg', help='path to model config file')
parser.add_argument('--appearance_model', type=str, default='resnet_reid', help='type of appearance model to use aligned_reid or deepsort or resnet_reid')
parser.add_argument('--conf_thres', type=float, default=0.8, help='object confidence threshold')
parser.add_argument('--depth_weight', type=float, default=1, help='weight of depth feature while concatenating')
parser.add_argument('--nms_thresh', type=float, default=0.56, help='iou thresshold for non-maximum suppression')
parser.add_argument('--n_cpu', type=int, default=4, help='number of cpu threads to use during batch generation')
parser.add_argument('--use_cuda', type=bool, default=True, help='whether to use cuda if available')
parser.add_argument('-p', '--point_cloud', action='store_false', help='Use to disable pointcloud')
parser.add_argument('-o', '--optical_flow_initiation', action='store_false', help='Use to enable optical flow based velocity initiation')
parser.add_argument('-q', '--perfect', action='store_true', help='whether to use perfect assignments')
parser.add_argument('-g', '--ground_truth', action='store_true', help='whether to use ground truth detections')
parser.add_argument('-r', '--reference', action='store_false', help='whether to use reference detections')
parser.add_argument('-t', '--track_3d', action='store_true', help='whether to do 3d tracking')
parser.add_argument('--ref_det', type = str, default = 'new_rrc_subcnn_car', help='lsvm, subcnn, regionlets, maskrcnn')
parser.add_argument("--nn_budget", type=int, default=100, help="Maximum size of the appearance descriptors gallery. If None, no budget is enforced.")
parser.add_argument("--dummy_node_cost_app", type=float, default=0.99, help="Dummy node appearance cost for JPDA (or maximum distnce when using deepsort)")
parser.add_argument("--dummy_node_cost_iou", type=float, default=0.97, help="Dummy node iou cost for JPDA (or maximum distnce when using deepsort)")
parser.add_argument("-c", "--combine_features", action = 'store_false', help="Whether to use trained MLP to combine features")
parser.add_argument("-f", "--fpointnet", action = 'store_false', help="Whether to use F-PointNet for 3d detection")
parser.add_argument("--combo_model", default = 'weights/resnet_reid_fpointnet_combo_car/mlp__1570759353.0113978/best_checkpoint.tar"', help="Trained MLP checkpoint to combine features")
parser.add_argument("-j", "--JPDA", action = 'store_false', help="Whether to use JPDA for soft assignments")
parser.add_argument("-l", "--LSTM", action = 'store_true', help="Whether to use LSTM for feature combination and update")
parser.add_argument("--lstm_model", default = 'weights/aligned_reid_fpointnet_combo/lstm/best_checkpoint.tar', help="Trained LSTM checkpoint to combine features")
parser.add_argument("-m","--m_best_sol", type=int, default=10, help="Number of solutions for JPDA")
parser.add_argument("--log_data", action='store_true', help="Turn on full data logging")
parser.add_argument("--max_age", type=int, default=2, help="Number of misses before termination")
parser.add_argument("--n_init", type=int, default=2, help="Consecutive frames for tentative->confirmed")
parser.add_argument("--assn_thresh", type=float, default=0.65, help="min prob for match")
parser.add_argument("--matching_strategy", type=str, default="hungarian", help="matching strategy for JPDA (max_and_threshold, strict_max_pair, or hungarian)")
parser.add_argument("--kf_appearance_feature", type=bool, default=False, help="Whether to use kf state for apperance features")
parser.add_argument('-i', "--use_imm", action = 'store_true', help='Whether to use IMM')
parser.add_argument('-v', "--verbose", action = 'store_true', help='Verbose')
parser.add_argument('--kf_process', type=float, default=5.2, help='kf 2d process noise factor')
parser.add_argument('--kf_2d_meas', type=float, default=3.2, help='kf 2d measurement noise factor')
parser.add_argument('--kf_3d_meas', type=float, default=0.25, help='kf 3d measurement noise factor')
parser.add_argument('--pos_weight_3d', type=float, default=1, help='Weight on position covariance process noise in KF')
parser.add_argument('--pos_weight', type=float, default=0.006, help='Weight on position covariance process noise in KF')
parser.add_argument('--vel_weight', type=float, default=0.008, help='Weight on velocity covariance process noise in KF')
parser.add_argument('--theta_weight', type=float, default=0.02, help='Weight on velocity covariance process noise in KF')
parser.add_argument('--gate_limit', type=float, default=600, help='Maximum covariance value of the gate')
parser.add_argument('--initial_uncertainty', type=float, default=1, help='Uncertainty scaling for initial covariance of track')
parser.add_argument('--uncertainty_limit', type=float, default=1.5, help='Uncertainty limit at which to terminate tracks')
parser.add_argument("--gate_full_state", action='store_true', help="Whether to gate on full kalman state, default is only position")
parser.add_argument("--near_online", action = 'store_true', help="Whether to do near online tracking")
parser.add_argument("--omni", action = 'store_true', help="Omni directional camera (JRDB)")
opt = parser.parse_args()
opt.sequence_folder = opt.sequence_folder.rstrip(os.sep)
opt.using_cuda = torch.cuda.is_available() and opt.use_cuda
if not opt.point_cloud and opt.track_3d:
raise("Must provide point cloud if doing 3D tracking!")
if opt.verbose:
print(opt)
if not os.path.exists(opt.output_folder):
os.makedirs(opt.output_folder)
return opt
# @profile
def main(opt):
if opt.verbose:
print("------------------------")
print("RUNNING SET UP")
print("------------------------")
tf.logging.set_verbosity(40)
random.seed(0)
Tensor = torch.cuda.FloatTensor if opt.using_cuda else torch.FloatTensor
os.makedirs(opt.output_folder, exist_ok=True)
if opt.LSTM:
opt.max_cosine_distance = 1
lstm = CombiLSTM()
checkpoint = torch.load(opt.lstm_model)
lstm.load_state_dict(checkpoint['state_dict'])
if opt.using_cuda:
lstm.cuda()
lstm.eval()
else:
lstm = None
if opt.combine_features:
combination_model = CombiNet()
checkpoint = torch.load(opt.combo_model)
combination_model.load_state_dict(checkpoint['state_dict'])
if opt.using_cuda:
combination_model.cuda()
combination_model.eval()
else:
combination_model = None
dataset = SequenceDataset(opt.sequence_folder, point_cloud=opt.point_cloud, omni=opt.omni)
dataloader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=opt.n_cpu, collate_fn = collate_fn)
appearance_model = create_appearance_model(opt.appearance_model, opt.aligned_reid_ckpt, opt.resnet_reid_ckpt, opt.using_cuda)
if opt.point_cloud:
depth_model = create_depth_model(opt.depth_model, opt.depth_config_path)
if opt.track_3d:
tracker = Tracker_3d(appearance_model=appearance_model, cuda=opt.using_cuda, JPDA = opt.JPDA, m_best_sol=opt.m_best_sol,
max_age = opt.max_age, n_init=opt.n_init, assn_thresh=opt.assn_thresh,
matching_strategy=opt.matching_strategy,
gate_full_state=opt.gate_full_state,
kf_vel_params=(opt.pos_weight_3d, opt.pos_weight, opt.vel_weight, opt.theta_weight,
opt.kf_process, opt.kf_2d_meas, opt.kf_3d_meas, opt.initial_uncertainty),
calib=dataset.calib,
dummy_node_cost_iou=opt.dummy_node_cost_iou,
dummy_node_cost_app=opt.dummy_node_cost_app,
nn_budget=opt.nn_budget,
use_imm=opt.use_imm,
uncertainty_limit=opt.uncertainty_limit,
gate_limit=opt.gate_limit,
omni=opt.omni)
else:
tracker = Tracker(appearance_model=appearance_model, cuda=opt.using_cuda, JPDA = opt.JPDA, m_best_sol=opt.m_best_sol,
max_age = opt.max_age, n_init=opt.n_init, assn_thresh=opt.assn_thresh,
matching_strategy=opt.matching_strategy,
kf_appearance_feature=opt.kf_appearance_feature,
gate_full_state=opt.gate_full_state,
kf_vel_params=(opt.pos_weight, opt.vel_weight, opt.kf_process, opt.kf_2d_meas, opt.initial_uncertainty),
kf_walk_params=(opt.pos_weight, opt.vel_weight, opt.kf_process, opt.kf_2d_meas, opt.initial_uncertainty),
calib=dataset.calib,
dummy_node_cost_iou=opt.dummy_node_cost_iou,
dummy_node_cost_app=opt.dummy_node_cost_app,
nn_budget=opt.nn_budget,
use_imm=opt.use_imm,
uncertainty_limit=opt.uncertainty_limit,
optical_flow=opt.optical_flow_initiation,
gate_limit=opt.gate_limit)
results = []
results_3d = []
n_frames = len(dataloader)
if opt.log_data:
full_log = [{'tracks':[], 'detections':[], 'detections_3d':[]} for _ in range(n_frames)]
det_matrix = None
seq_name = os.path.split(opt.sequence_folder)[-1]
frame_times = []
if opt.verbose:
print("------------------------")
print("BEGINNING TRACKING OF SEQUENCE %s"%seq_name)
print("------------------------")
for frame_idx, img_path, input_img, point_cloud in tqdm(dataloader, ncols = 100, disable=not opt.verbose):
# if frame_idx > 120:
# break
# elif frame_idx < 98:
# continue
if opt.log_data:
full_log[frame_idx]['img_path'] = copy.copy(img_path)
input_img = input_img.type(Tensor)
if opt.reference:
detections, object_ids, det_matrix = read_ground_truth_2d_detections(os.path.join(opt.sequence_folder,'det',opt.ref_det+'.txt'), frame_idx, det_matrix, threshold = 0, nms_threshold = opt.nms_thresh)
elif opt.ground_truth:
detections, object_ids, det_matrix = read_ground_truth_2d_detections(os.path.join(opt.sequence_folder,'gt','gt.txt'), frame_idx, det_matrix, nms_threshold = opt.nms_thresh)
else:
raise("Must specify ground truth or detections")
# --- START OF TRACKING ---
# start_time = time.time()
if detections is None or len(detections)==0:
tracker.predict()
if opt.log_data:
full_log[frame_idx]['predicted_tracks'] = copy.deepcopy(tracker.tracks)
start_time = time.time()
tracker.update(input_img, [])
else:
total_dets = len(detections)
patches = get_image_patches(input_img, detections)
appearance_features = generate_features_batched(appearance_model, patches, opt, object_ids)
if opt.point_cloud:
if not opt.omni:
point_cloud = point_cloud[point_cloud[:,2]>=0]
if opt.fpointnet:
boxes_3d, valid_3d, _, scores_3d, depth_features = generate_detections_3d(depth_model,
detections, np.asarray(point_cloud),
dataset.calib, input_img.shape,
peds='ped' in opt.ref_det or opt.omni)
depth_features = convert_depth_features(depth_features, valid_3d)
else:
boxes_3d, valid_3d = read_ground_truth_3d_detections(os.path.join(opt.sequence_folder,'gt','3d_detections.txt'), frame_idx)
features, appearance_features = combine_features(appearance_features, depth_features, valid_3d, combination_model, depth_weight = opt.depth_weight)
# boxes_3d = boxes_3d[valid_3d != -1] # Old and buggy way of handling missing box
# detections = detections[valid_3d != -1]
if np.any(valid_3d == -1):
compare_2d = True
else:
compare_2d = False
if len(boxes_3d) > 0:
detections_3d = []
for idx, box in enumerate(boxes_3d):
if valid_3d[idx] == -1:
detections_3d.append(None)
else:
detections_3d.append(np.array(box).astype(np.float32))
else:
detections_3d = None
else:
appearance_features = [appearance_features[i] for i in range(total_dets)]
features = [None]*len(appearance_features)
compare_2d = True
detections_3d = None
detections = convert_detections(detections, features, appearance_features, detections_3d)
tracker.predict()
if opt.log_data:
full_log[frame_idx]['predicted_tracks'] = copy.deepcopy(tracker.tracks)
start_time = time.time()
tracker.update(input_img, detections, compare_2d)
# --- END OF TRACKING ---
end_time = time.time()
frame_times.append(end_time - start_time)
if opt.log_data:
full_tracks = copy.deepcopy(tracker.tracks)
temp_tracks = []
for track in full_tracks:
bbox = track.to_tlwh(None)
if not (bbox[0] < 0-10 or bbox[1] < 0-10 or bbox[0] + bbox[2] > input_img.shape[2]+10 or bbox[1] + bbox[3] > input_img.shape[1]+10):
temp_tracks.append(track)
full_log[frame_idx]['tracks'] = temp_tracks
full_log[frame_idx]['detections'] = copy.deepcopy(detections)
for track in tracker.tracks:
if opt.track_3d:
bbox_3d = track.to_tlwh3d()
else:
bbox = track.to_tlwh(None)
if bbox[0] < 0-10 or bbox[1] < 0-10 or bbox[0] + bbox[2] > input_img.shape[2]+10 or bbox[1] + bbox[3] > input_img.shape[1]+10:
continue
bbox[0] = max(0,bbox[0]) # Frame adjustments
bbox[1] = max(0,bbox[1])
bbox[2] = min(bbox[0]+bbox[2], input_img.shape[2])-bbox[0]
bbox[3] = min(bbox[1]+bbox[3], input_img.shape[1])-bbox[1]
track_status = 1
if not track.is_confirmed(): # or track.time_since_update > 0:
if opt.near_online:
if not track.is_confirmed():
track_status = 0
else:
track_status = 2
continue
else:
continue
if opt.near_online:
if opt.track_3d:
results_3d.append([frame_idx, track.track_id, bbox_3d[0], bbox_3d[1], bbox_3d[2], bbox_3d[3], bbox_3d[4], bbox_3d[5], bbox_3d[6], track_status])
else:
results.append([frame_idx, track.track_id, bbox[0], bbox[1], bbox[2], bbox[3], track_status])
if track_status == 1: #updates 0s
for row_i in range(len(results)):
if results[row_i][1] == track.track_id:
results[row_i][6] = 1
if opt.point_cloud:
if results_3d[row_i][1] == track.track_id:
results_3d[row_i][7] = 1
else:
if opt.track_3d:
results_3d.append([frame_idx, track.track_id, bbox_3d[0], bbox_3d[1], bbox_3d[2], bbox_3d[3], bbox_3d[4], bbox_3d[5], bbox_3d[6]])
else:
results.append([frame_idx, track.track_id, bbox[0], bbox[1], bbox[2], bbox[3]])
# if opt.point_cloud:
frame_times = np.asarray(frame_times)
if opt.verbose:
print("------------------------")
print("COMPLETED TRACKING, SAVING RESULTS")
print("------------------------")
print('\n\n','Total Tracking Time:',np.sum(frame_times),'Average Time Per Frame:',np.mean(frame_times))
if opt.track_3d:
output_file_3d = os.path.join(opt.output_folder, seq_name+"_3d.txt")
if len(results_3d) > 0:
with open(output_file_3d, 'w+') as f:
for row in results_3d:
if opt.near_online and row[9] != 1:
continue
print('%d,%d,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%.4f,1,1,1,-1' % (
row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8]), file=f)
else:
output_file = os.path.join(opt.output_folder, seq_name+".txt")
if len(results) > 0:
with open(output_file, 'w+') as f:
for row in results:
if opt.near_online and row[6] != 1:
continue
print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,1,1,-1' % (
row[0], row[1], row[2], row[3], row[4], row[5]), file=f)
if opt.log_data:
output_file = os.path.join(opt.output_folder, seq_name+".p")
with open(output_file, 'wb') as f:
pickle.dump(full_log, f)
if __name__=='__main__':
opt = parse_arguments()
main(opt)
================================================
FILE: paper_experiments/utils/EKF.py
================================================
# vim: expandtab:ts=4:sw=4
import numpy as np
import scipy.linalg
import pdb
"""
Table for the 0.95 quantile of the chi-square distribution with N degrees of
freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
function and used as Mahalanobis gating threshold.
"""
chi2inv95 = {
1: 3.8415,
2: 5.9915,
3: 7.8147,
4: 9.4877,
5: 11.070,
6: 12.592,
7: 14.067,
8: 15.507,
9: 16.919}
chi2inv90 = {
1: 2.706,
2: 4.605,
3: 6.251,
4: 7.779,
5: 9.236,
6: 10.645,
7: 12.017,
8: 13.363,
9: 14.684}
chi2inv975 = {
1: 5.025,
2: 7.378,
3: 9.348,
4: 11.143,
5: 12.833,
6: 14.449,
7: 16.013,
8: 17.535,
9: 19.023}
chi2inv10 = {
1: .016,
2: .221,
3: .584,
4: 1.064,
5: 1.610,
6: 2.204,
7: 2.833,
8: 3.490,
9: 4.168}
chi2inv995 = {
1: 0.0000393,
2: 0.0100,
3: .0717,
4: .207,
5: .412,
6: .676,
7: .989,
8: 1.344,
9: 1.735}
chi2inv75 = {
1: 1.323,
2: 2.773,
3: 4.108,
4: 5.385,
5: 6.626,
6: 7.841,
7: 9.037,
8: 10.22,
9: 11.39}
def squared_mahalanobis_distance(mean, covariance, measurements):
# cholesky factorization used to solve for
# z = d * inv(covariance)
# so z is also the solution to
# covariance * z = d
d = measurements - mean
# Note: The cholesky factorization is giving weird answers. This is marginally slower but correct
return np.matmul(np.matmul(d, np.linalg.inv(covariance)), d.T).diagonal()
# print("Measurements:", measurements)
# print("Mean:", mean)
# print("dshape:", d.shape, "d:", d)
# print("d*inv(cov)", np.matmul(d, np.linalg.inv(covariance)))
cholesky_factor = np.linalg.cholesky(covariance)
z = scipy.linalg.solve_triangular(
cholesky_factor, d.T, lower=True, check_finite=False,
overwrite_b=True)
squared_maha = np.sum(z * (measurements-mean).T, axis=0)
# print("Squared maha dist:", squared_maha)
# print("cov:", covariance)
# print("z", z, '\n')
return squared_maha
class EKF(object):
"""
Generic extended kalman filter class
"""
def __init__(self):
pass
def initiate(self, measurement):
"""Create track from unassociated measurement.
Parameters
----------
measurement : ndarray
Returns
-------
(ndarray, ndarray)
Returns the mean vector and covariance matrix of the new track.
Unobserved velocities are initialized to 0 mean.
"""
pass
def predict_mean(self, mean):
# Updates predicted state from previous state (function g)
# Calculates motion update Jacobian (Gt)
# Returns (g(mean), Gt)
pass
def get_process_noise(self, mean, covariance):
# Returns Rt the motion noise covariance
pass
def predict_covariance(self, mean, covariance):
pass
def project_mean(self, mean):
# Measurement prediction from state (function h)
# Calculations sensor update Jacobian (Ht)
# Returns (h(mean), Ht)
pass
def project_cov(self, mean, covariance):
pass
def predict(self, mean, covariance, last_detection, next_to_last_detection):
"""Run Kalman filter prediction step.
Parameters
----------
mean : ndarray
The mean vector of the object state at the previous
time step.
covariance : ndarray
The covariance matrix of the object state at the
previous time step.
Returns
-------
(ndarray, ndarray)
Returns the mean vector and covariance matrix of the predicted
state. Unobserved velocities are initialized to 0 mean.
"""
# Perform prediction
covariance = self.predict_covariance(mean, covariance, last_detection, next_to_last_detection)
mean = self.predict_mean(mean)
return mean, covariance
def get_innovation_cov(self, covariance):
pass
def project(self, mean, covariance):
"""Project state distribution to measurement space.
Parameters
----------
mean : ndarray
The state's mean vector
covariance : ndarray
The state's covariance matrix
Returns
-------
(ndarray, ndarray)
Returns the projected mean and covariance matrix of the given state
estimate.
"""
# Measurement uncertainty scaled by estimated height
return self.project_mean(mean), self.project_cov(mean, covariance)
def update(self, mean, covariance, measurement_t, marginalization=None, JPDA=False):
"""Run Kalman filter correction step.
Parameters
----------
mean : ndarray
The predicted state's mean vector (8 dimensional).
covariance : ndarray
The state's covariance matrix (8x8 dimensional).
measurement : ndarray
The 4 dimensional measurement vector (x, y, a, h), where (x, y)
is the center position, a the aspect ratio, and h the height of the
bounding box.
Returns
-------
(ndarray, ndarray)
Returns the measurement-corrected state distribution.
"""
predicted_measurement, innovation_cov = self.project(mean, covariance)
# cholesky factorization used to solve for kalman gain since
# K = covariance * update_mat.T * inv(innovation_cov)
# so K is also the solution to
# innovation_cov * K = covariance * update_mat.T
try:
chol_factor, lower = scipy.linalg.cho_factor(
innovation_cov, lower=True, check_finite=False)
kalman_gain = scipy.linalg.cho_solve(
(chol_factor, lower), np.dot(covariance, self._observation_mat.T).T,
check_finite=False).T
except:
# in case cholesky factorization fails, revert to standard solver
kalman_gain = np.linalg.solve(innovation_cov, np.dot(covariance, self._observation_mat.T).T).T
if JPDA:
# marginalization
innovation = np.zeros((self.ndim))
cov_soft = np.zeros((self.ndim, self.ndim))
for measurement_idx, measurement in enumerate(measurement_t):
p_ij = marginalization[measurement_idx + 1] # + 1 for dummy
y_ij = measurement - predicted_measurement
innovation += y_ij * p_ij
cov_soft += p_ij * np.outer(y_ij, y_ij)
cov_soft = cov_soft - np.outer(innovation, innovation)
P_star = covariance - np.linalg.multi_dot((
kalman_gain, innovation_cov, kalman_gain.T))
p_0 = marginalization[0]
P_0 = p_0 * covariance + (1 - p_0) * P_star
new_covariance = P_0 + np.linalg.multi_dot((kalman_gain, cov_soft, kalman_gain.T))
else:
innovation = measurement_t - predicted_measurement
new_covariance = covariance - np.linalg.multi_dot((
kalman_gain, innovation_cov, kalman_gain.T))
new_mean = mean + np.dot(innovation, kalman_gain.T)
return new_mean, new_covariance
================================================
FILE: paper_experiments/utils/JPDA_matching.py
================================================
# vim: expandtab:ts=4:sw=4
from __future__ import absolute_import
import numpy as np
from linear_assignment import min_marg_matching
import pdb
def get_unmatched(all_idx, matches, i, marginalization=None):
assigned = [match[i] for match in matches]
unmatched = set(all_idx) - set(assigned)
if marginalization is not None:
# from 1 for dummy node
in_gate_dets = np.nonzero(np.sum(
marginalization[:, 1:], axis=0))[0].tolist()
# unmatched = [d for d in unmatched if d not in in_gate_dets] # TODO: Filter by gate?
return list(unmatched)
class Matcher:
def __init__(self, detections, marginalizations, confirmed_tracks,
matching_strategy,
assignment_threshold=None):
self.detections = detections
self.marginalizations = marginalizations
self.confirmed_tracks = confirmed_tracks
self.assignment_threshold = assignment_threshold
self.detection_indices = np.arange(len(detections))
self.matching_strategy = matching_strategy
def match(self):
self.get_matches()
self.get_unmatched_tracks()
self.get_unmatched_detections()
return self.matches, self.unmatched_tracks, self.unmatched_detections
def get_matches(self):
if self.matching_strategy == "max_and_threshold":
self.max_and_threshold_matching()
elif self.matching_strategy == "hungarian":
self.hungarian()
elif self.matching_strategy == "max_match":
self.max_match()
elif self.matching_strategy == "none":
self.matches = []
else:
raise Exception('Unrecognized matching strategy: {}'.
format(self.matching_strategy))
def get_unmatched_tracks(self):
self.unmatched_tracks = get_unmatched(self.confirmed_tracks,
self.matches, 0)
def get_unmatched_detections(self):
self.unmatched_detections = get_unmatched(self.detection_indices, self.matches, 1, self.marginalizations)
def max_match(self):
self.matches = []
if self.marginalizations.shape[0] == 0:
return
detection_map = {}
for i, track_idx in enumerate(self.confirmed_tracks):
marginalization = self.marginalizations[i,:]
detection_id = np.argmax(marginalization) - 1 # subtract one for dummy
if detection_id < 0:
continue
if detection_id not in detection_map.keys():
detection_map[detection_id] = track_idx
else:
cur_track = detection_map[detection_id]
track_update = track_idx if self.marginalizations[track_idx, detection_id] > self.marginalizations[cur_track, detection_id] else cur_track
detection_map[detection_id] = track_update
threshold_p = marginalization[detection_id + 1]
if threshold_p < self.assignment_threshold:
continue
for detection in detection_map.keys():
self.matches.append((detection_map[detection], detection))
def max_and_threshold_matching(self):
self.matches = []
if self.marginalizations.shape[0] == 0:
return
for i, track_idx in enumerate(self.confirmed_tracks):
marginalization = self.marginalizations[i,:]
detection_id = np.argmax(marginalization) - 1 # subtract one for dummy
if detection_id < 0:
continue
threshold_p = marginalization[detection_id + 1]
if threshold_p < self.assignment_threshold:
continue
self.matches.append((track_idx, detection_id))
def hungarian(self):
self.matches, _, _ = min_marg_matching(self.marginalizations,
self.confirmed_tracks,
self.assignment_threshold)
================================================
FILE: paper_experiments/utils/aligned_reid_utils.py
================================================
from __future__ import print_function
import os
import os.path as osp
import pickle
from scipy import io
import datetime
import time
from contextlib import contextmanager
import numpy as np
from PIL import Image
import torch
from torch.autograd import Variable
from models.aligned_reid_model import Model as aligned_reid_model
from models.deep_sort_model import ImageEncoder as deep_sort_model
from utils.resnet_reid_utils import ResNet_Loader
def time_str(fmt=None):
if fmt is None:
fmt = '%Y-%m-%d_%H:%M:%S'
return datetime.datetime.today().strftime(fmt)
def load_pickle(path):
"""Check and load pickle object.
According to this post: https://stackoverflow.com/a/41733927, cPickle and
disabling garbage collector helps with loading speed."""
assert osp.exists(path)
# gc.disable()
with open(path, 'rb') as f:
ret = pickle.load(f)
# gc.enable()
return ret
def save_pickle(obj, path):
"""Create dir and save file."""
may_make_dir(osp.dirname(osp.abspath(path)))
with open(path, 'wb') as f:
pickle.dump(obj, f, protocol=2)
def save_mat(ndarray, path):
"""Save a numpy ndarray as .mat file."""
io.savemat(path, dict(ndarray=ndarray))
def to_scalar(vt):
"""Transform a length-1 pytorch Variable or Tensor to scalar.
Suppose tx is a torch Tensor with shape tx.size() = torch.Size([1]),
then npx = tx.cpu().numpy() has shape (1,), not 1."""
if isinstance(vt, Variable):
return vt.data.cpu().numpy().flatten()[0]
if torch.is_tensor(vt):
return vt.cpu().numpy().flatten()[0]
raise TypeError('Input should be a variable or tensor')
def transfer_optim_state(state, device_id=-1):
"""Transfer an optimizer.state to cpu or specified gpu, which means
transferring tensors of the optimizer.state to specified device.
The modification is in place for the state.
Args:
state: An torch.optim.Optimizer.state
device_id: gpu id, or -1 which means transferring to cpu
"""
for key, val in state.items():
if isinstance(val, dict):
transfer_optim_state(val, device_id=device_id)
elif isinstance(val, Variable):
raise RuntimeError("Oops, state[{}] is a Variable!".format(key))
elif isinstance(val, torch.nn.Parameter):
raise RuntimeError("Oops, state[{}] is a Parameter!".format(key))
else:
try:
if device_id == -1:
state[key] = val.cpu()
else:
state[key] = val.cuda(device=device_id)
except:
pass
def may_transfer_optims(optims, device_id=-1):
"""Transfer optimizers to cpu or specified gpu, which means transferring
tensors of the optimizer to specified device. The modification is in place
for the optimizers.
Args:
optims: A list, which members are either torch.nn.optimizer or None.
device_id: gpu id, or -1 which means transferring to cpu
"""
for optim in optims:
if isinstance(optim, torch.optim.Optimizer):
transfer_optim_state(optim.state, device_id=device_id)
def may_transfer_modules_optims(modules_and_or_optims, device_id=-1):
"""Transfer optimizers/modules to cpu or specified gpu.
Args:
modules_and_or_optims: A list, which members are either torch.nn.optimizer
or torch.nn.Module or None.
device_id: gpu id, or -1 which means transferring to cpu
"""
for item in modules_and_or_optims:
if isinstance(item, torch.optim.Optimizer):
transfer_optim_state(item.state, device_id=device_id)
elif isinstance(item, torch.nn.Module):
if device_id == -1:
item.cpu()
else:
item.cuda(device=device_id)
elif item is not None:
print('[Warning] Invalid type {}'.format(item.__class__.__name__))
class TransferVarTensor(object):
"""Return a copy of the input Variable or Tensor on specified device."""
def __init__(self, device_id=-1):
self.device_id = device_id
def __call__(self, var_or_tensor):
return var_or_tensor.cpu() if self.device_id == -1 \
else var_or_tensor.cuda(self.device_id)
class TransferModulesOptims(object):
"""Transfer optimizers/modules to cpu or specified gpu."""
def __init__(self, device_id=-1):
self.device_id = device_id
def __call__(self, modules_and_or_optims):
may_transfer_modules_optims(modules_and_or_optims, self.device_id)
def set_devices(sys_device_ids):
"""
It sets some GPUs to be visible and returns some wrappers to transferring
Variables/Tensors and Modules/Optimizers.
Args:
sys_device_ids: a tuple; which GPUs to use
e.g. sys_device_ids = (), only use cpu
sys_device_ids = (3,), use the 4th gpu
sys_device_ids = (0, 1, 2, 3,), use first 4 gpus
sys_device_ids = (0, 2, 4,), use the 1st, 3rd and 5th gpus
Returns:
TVT: a `TransferVarTensor` callable
TMO: a `TransferModulesOptims` callable
"""
# Set the CUDA_VISIBLE_DEVICES environment variable
import os
visible_devices = ''
for i in sys_device_ids:
visible_devices += '{}, '.format(i)
os.environ['CUDA_VISIBLE_DEVICES'] = visible_devices
# Return wrappers.
# Models and user defined Variables/Tensors would be transferred to the
# first device.
device_id = 0 if len(sys_device_ids) > 0 else -1
TVT = TransferVarTensor(device_id)
TMO = TransferModulesOptims(device_id)
return TVT, TMO
def set_devices_for_ml(sys_device_ids):
"""This version is for mutual learning.
It sets some GPUs to be visible and returns some wrappers to transferring
Variables/Tensors and Modules/Optimizers.
Args:
sys_device_ids: a tuple of tuples; which devices to use for each model,
len(sys_device_ids) should be equal to number of models. Examples:
sys_device_ids = ((-1,), (-1,))
the two models both on CPU
sys_device_ids = ((-1,), (2,))
the 1st model on CPU, the 2nd model on GPU 2
sys_device_ids = ((3,),)
the only one model on the 4th gpu
sys_device_ids = ((0, 1), (2, 3))
the 1st model on GPU 0 and 1, the 2nd model on GPU 2 and 3
sys_device_ids = ((0,), (0,))
the two models both on GPU 0
sys_device_ids = ((0,), (0,), (1,), (1,))
the 1st and 2nd model on GPU 0, the 3rd and 4th model on GPU 1
Returns:
TVTs: a list of `TransferVarTensor` callables, one for one model.
TMOs: a list of `TransferModulesOptims` callables, one for one model.
relative_device_ids: a list of lists; `sys_device_ids` transformed to
relative ids; to be used in `DataParallel`
"""
import os
all_ids = []
for ids in sys_device_ids:
all_ids += ids
unique_sys_device_ids = list(set(all_ids))
unique_sys_device_ids.sort()
if -1 in unique_sys_device_ids:
unique_sys_device_ids.remove(-1)
# Set the CUDA_VISIBLE_DEVICES environment variable
visible_devices = ''
for i in unique_sys_device_ids:
visible_devices += '{}, '.format(i)
os.environ['CUDA_VISIBLE_DEVICES'] = visible_devices
# Return wrappers
relative_device_ids = []
TVTs, TMOs = [], []
for ids in sys_device_ids:
relative_ids = []
for id in ids:
if id != -1:
id = find_index(unique_sys_device_ids, id)
relative_ids.append(id)
relative_device_ids.append(relative_ids)
# Models and user defined Variables/Tensors would be transferred to the
# first device.
TVTs.append(TransferVarTensor(relative_ids[0]))
TMOs.append(TransferModulesOptims(relative_ids[0]))
return TVTs, TMOs, relative_device_ids
def load_ckpt(modules_optims, ckpt_file, load_to_cpu=True, verbose=True):
"""Load state_dict's of modules/optimizers from file.
Args:
modules_optims: A list, which members are either torch.nn.optimizer
or torch.nn.Module.
ckpt_file: The file path.
load_to_cpu: Boolean. Whether to transform tensors in modules/optimizers
to cpu type.
"""
map_location = (lambda storage, loc: storage) if load_to_cpu else None
ckpt = torch.load(ckpt_file, map_location=map_location)
for m, sd in zip(modules_optims, ckpt['state_dicts']):
if 'fc.weight' in sd:
del sd['fc.weight']
if 'fc.bias' in sd:
del sd['fc.bias']
load_state_dict(m, sd)
if verbose:
print('Resume from ckpt {}, \nepoch {}, \nscores {}'.format(
ckpt_file, ckpt['ep'], ckpt['scores']))
return ckpt['ep'], ckpt['scores']
def save_ckpt(modules_optims, ep, scores, ckpt_file):
"""Save state_dict's of modules/optimizers to file.
Args:
modules_optims: A list, which members are either torch.nn.optimizer
or torch.nn.Module.
ep: the current epoch number
scores: the performance of current model
ckpt_file: The file path.
Note:
torch.save() reserves device type and id of tensors to save, so when
loading ckpt, you have to inform torch.load() to load these tensors to
cpu or your desired gpu, if you change devices.
"""
state_dicts = [m.state_dict() for m in modules_optims]
ckpt = dict(state_dicts=state_dicts,
ep=ep,
scores=scores)
may_make_dir(osp.dirname(osp.abspath(ckpt_file)))
torch.save(ckpt, ckpt_file)
def load_state_dict(model, src_state_dict):
"""Copy parameters and buffers from `src_state_dict` into `model` and its
descendants. The `src_state_dict.keys()` NEED NOT exactly match
`model.state_dict().keys()`. For dict key mismatch, just
skip it; for copying error, just output warnings and proceed.
Arguments:
model: A torch.nn.Module object.
src_state_dict (dict): A dict containing parameters and persistent buffers.
Note:
This is modified from torch.nn.modules.module.load_state_dict(), to make
the warnings and errors more detailed.
"""
from torch.nn import Parameter
dest_state_dict = model.state_dict()
for name, param in src_state_dict.items():
### CHANGED HERE FOR FINE TUNING
if name not in dest_state_dict:
continue
if isinstance(param, Parameter):
# backwards compatibility for serialized parameters
param = param.data
try:
dest_state_dict[name].copy_(param)
except Exception as e:
print("Warning: Error occurs when copying '{}': {}"
.format(name, str(e)))
# src_missing = set(dest_state_dict.keys()) - set(src_state_dict.keys())
# if len(src_missing) > 0:
# print("Keys not found in source state_dict: ")
# for n in src_missing:
# print('\t', n)
# dest_missing = set(src_state_dict.keys()) - set(dest_state_dict.keys())
# if len(dest_missing) > 0:
# print("Keys not found in destination state_dict: ")
# for n in dest_missing:
# print('\t', n)
def is_iterable(obj):
return hasattr(obj, '__len__')
def may_set_mode(maybe_modules, mode):
"""maybe_modules: an object or a list of objects."""
assert mode in ['train', 'eval']
if not is_iterable(maybe_modules):
maybe_modules = [maybe_modules]
for m in maybe_modules:
if isinstance(m, torch.nn.Module):
if mode == 'train':
m.train()
else:
m.eval()
def may_make_dir(path):
"""
Args:
path: a dir, or result of `osp.dirname(osp.abspath(file_path))`
Note:
`osp.exists('')` returns `False`, while `osp.exists('.')` returns `True`!
"""
# This clause has mistakes:
# if path is None or '':
if path in [None, '']:
return
if not osp.exists(path):
os.makedirs(path)
class AverageMeter(object):
"""Modified from Tong Xiao's open-reid.
Computes and stores the average and current value"""
def __init__(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
self.val = val
self.sum += val * n
self.count += n
self.avg = float(self.sum) / (self.count + 1e-20)
class RunningAverageMeter(object):
"""Computes and stores the running average and current value"""
def __init__(self, hist=0.99):
self.val = None
self.avg = None
self.hist = hist
def reset(self):
self.val = None
self.avg = None
def update(self, val):
if self.avg is None:
self.avg = val
else:
self.avg = self.avg * self.hist + val * (1 - self.hist)
self.val = val
class RecentAverageMeter(object):
"""Stores and computes the average of recent values."""
def __init__(self, hist_size=100):
self.hist_size = hist_size
self.fifo = []
self.val = 0
def reset(self):
self.fifo = []
self.val = 0
def update(self, val):
self.val = val
self.fifo.append(val)
if len(self.fifo) > self.hist_size:
del self.fifo[0]
@property
def avg(self):
assert len(self.fifo) > 0
return float(sum(self.fifo)) / len(self.fifo)
def get_model_wrapper(model, multi_gpu):
from torch.nn.parallel import DataParallel
if multi_gpu:
return DataParallel(model)
else:
return model
class ReDirectSTD(object):
"""Modified from Tong Xiao's `Logger` in open-reid.
This class overwrites sys.stdout or sys.stderr, so that console logs can
also be written to file.
Args:
fpath: file path
console: one of ['stdout', 'stderr']
immediately_visible: If `False`, the file is opened only once and closed
after exiting. In this case, the message written to file may not be
immediately visible (Because the file handle is occupied by the
program?). If `True`, each writing operation of the console will
open, write to, and close the file. If your program has tons of writing
operations, the cost of opening and closing file may be obvious. (?)
Usage example:
`ReDirectSTD('stdout.txt', 'stdout', False)`
`ReDirectSTD('stderr.txt', 'stderr', False)`
NOTE: File will be deleted if already existing. Log dir and file is created
lazily -- if no message is written, the dir and file will not be created.
"""
def __init__(self, fpath=None, console='stdout', immediately_visible=False):
import sys
import os
import os.path as osp
assert console in ['stdout', 'stderr']
self.console = sys.stdout if console == 'stdout' else sys.stderr
self.file = fpath
self.f = None
self.immediately_visible = immediately_visible
if fpath is not None:
# Remove existing log file.
if osp.exists(fpath):
os.remove(fpath)
# Overwrite
if console == 'stdout':
sys.stdout = self
else:
sys.stderr = self
def __del__(self):
self.close()
def __enter__(self):
pass
def __exit__(self, *args):
self.close()
def write(self, msg):
self.console.write(msg)
if self.file is not None:
may_make_dir(os.path.dirname(osp.abspath(self.file)))
if self.immediately_visible:
with open(self.file, 'a') as f:
f.write(msg)
else:
if self.f is None:
self.f = open(self.file, 'w')
self.f.write(msg)
def flush(self):
self.console.flush()
if self.f is not None:
self.f.flush()
import os
os.fsync(self.f.fileno())
def close(self):
self.console.close()
if self.f is not None:
self.f.close()
def set_seed(seed):
import random
random.seed(seed)
print('setting random-seed to {}'.format(seed))
import numpy as np
np.random.seed(seed)
print('setting np-random-seed to {}'.format(seed))
import torch
torch.backends.cudnn.enabled = False
print('cudnn.enabled set to {}'.format(torch.backends.cudnn.enabled))
# set seed for CPU
torch.manual_seed(seed)
print('setting torch-seed to {}'.format(seed))
def print_array(array, fmt='{:.2f}', end=' '):
"""Print a 1-D tuple, list, or numpy array containing digits."""
s = ''
for x in array:
s += fmt.format(float(x)) + end
s += '\n'
print(s)
return s
# Great idea from https://github.com/amdegroot/ssd.pytorch
def str2bool(v):
return v.lower() in ("yes", "true", "t", "1")
def tight_float_str(x, fmt='{:.4f}'):
return fmt.format(x).rstrip('0').rstrip('.')
def find_index(seq, item):
for i, x in enumerate(seq):
if item == x:
return i
return -1
def adjust_lr_exp(optimizer, base_lr, ep, total_ep, start_decay_at_ep):
"""Decay exponentially in the later phase of training. All parameters in the
optimizer share the same learning rate.
Args:
optimizer: a pytorch `Optimizer` object
base_lr: starting learning rate
ep: current epoch, ep >= 1
total_ep: total number of epochs to train
start_decay_at_ep: start decaying at the BEGINNING of this epoch
Example:
base_lr = 2e-4
total_ep = 300
start_decay_at_ep = 201
It means the learning rate starts at 2e-4 and begins decaying after 200
epochs. And training stops after 300 epochs.
NOTE:
It is meant to be called at the BEGINNING of an epoch.
"""
assert ep >= 1, "Current epoch number should be >= 1"
if ep < start_decay_at_ep:
return
for g in optimizer.param_groups:
g['lr'] = (base_lr * (0.001 ** (float(ep + 1 - start_decay_at_ep)
/ (total_ep + 1 - start_decay_at_ep))))
print('=====> lr adjusted to {:.10f}'.format(g['lr']).rstrip('0'))
def adjust_lr_staircase(optimizer, base_lr, ep, decay_at_epochs, factor):
"""Multiplied by a factor at the BEGINNING of specified epochs. All
parameters in the optimizer share the same learning rate.
Args:
optimizer: a pytorch `Optimizer` object
base_lr: starting learning rate
ep: current epoch, ep >= 1
decay_at_epochs: a list or tuple; learning rate is multiplied by a factor
at the BEGINNING of these epochs
factor: a number in range (0, 1)
Example:
base_lr = 1e-3
decay_at_epochs = [51, 101]
factor = 0.1
It means the learning rate starts at 1e-3 and is multiplied by 0.1 at the
BEGINNING of the 51'st epoch, and then further multiplied by 0.1 at the
BEGINNING of the 101'st epoch, then stays unchanged till the end of
training.
NOTE:
It is meant to be called at the BEGINNING of an epoch.
"""
assert ep >= 1, "Current epoch number should be >= 1"
if ep not in decay_at_epochs:
return
ind = find_index(decay_at_epochs, ep)
for g in optimizer.param_groups:
g['lr'] = base_lr * factor ** (ind + 1)
print('=====> lr adjusted to {:.10f}'.format(g['lr']).rstrip('0'))
@contextmanager
def measure_time(enter_msg):
st = time.time()
print(enter_msg)
yield
print('Done, {:.2f}s'.format(time.time() - st))
# @profile
def generate_features(appearance_model, patches, opt, object_ids = None):
Tensor = torch.cuda.FloatTensor if opt.using_cuda else torch.FloatTensor
features = []
for i, patch in enumerate(patches):
if patch is None or patch.nelement()==0:
features.append(None)
continue
patch = patch.unsqueeze(0)
if opt.perfect:
feature = torch.zeros(1024)
feature[object_ids[i]] = 1
feature = feature.type(Tensor)
else:
if opt.appearance_model == 'aligned_reid':
with torch.no_grad():
feature ,_ = appearance_model(patch.cuda())
feature = feature.squeeze(0).type(Tensor)
elif opt.appearance_model == 'deepsort':
patch = patch.permute(0,2,3,1).cpu().numpy()
feature = appearance_model(patch)
feature = feature[0]
elif opt.appearance_model == 'resnet_reid':
patch = patch.permute(0,2,3,1)
feature = appearance_model.inference([patch.squeeze(0)])
feature = feature[0][0].type(Tensor)
features.append(feature)
return features
# @profile
def generate_features_batched(appearance_model, patches, opt, object_ids = None):
if opt.perfect or opt.appearance_model == 'deepsort': # Do old/slow way if perfect features or deepsort features
return generate_features(appearance_model, patches, opt, object_ids)
if opt.appearance_model == 'resnet_reid':
Tensor = torch.cuda.FloatTensor if opt.using_cuda else torch.FloatTensor
features = []
resnet_patches = []
for i, patch in enumerate(patches):
if patch is None or patch.nelement()==0:
features.append(None)
else:
features.append(1)
resnet_patches.append(patch.permute(1,2,0))
resnet_features = appearance_model.inference(resnet_patches)
ctr = 0
for i in range(len(features)):
if features[i] is not None:
features[i] = resnet_features[ctr].type(Tensor)
ctr += 1
return features
elif opt.appearance_model == 'aligned_reid':
return generate_features(appearance_model, patches, opt, object_ids) #TODO: Fix batched appearance features. This currently gives bad features
Tensor = torch.cuda.FloatTensor if opt.using_cuda else torch.FloatTensor
maxx = -1
maxy = -1
idxs = []
features = []
for i, patch in enumerate(patches):
if patch is None or patch.nelement()==0:
continue
maxx = max(maxx, patch.size()[1])
maxy = max(maxy, patch.size()[2])
idxs.append(i)
if(maxx==-1 and maxy==-1):
return features
batch = torch.zeros(len(idxs),3,maxx,maxy).cuda()
for i, idx in enumerate(idxs):
patch = patches[idx]
patchx = patch.size()[1]
patchy = patch.size()[2]
batch[i,:,:patchx,:patchy] = patch
with torch.no_grad():
features_torch, _ = appearance_model(batch)
features_torch = features_torch.type(Tensor)
i = 0
ctr = 0
for idx in idxs:
while(i < idx):
features.append(None)
i+=1
features.append(features_torch[ctr,:])
i+=1
ctr+=1
while(i conf_threshold]
########
det_frames = detections[:, 0]
det_confidence = detections[:, 6]
gt_boxes = np.asarray(list(zip(gt[:, 2], gt[:, 3], gt[:, 4], gt[:, 5])))
det_boxes = np.asarray(list(zip(detections[:, 2], detections[:, 3], detections[:, 4], detections[:, 5])))
out_matrix = []
assigned_ids = []
for frame in np.unique(det_frames):
frame_mask_det = det_frames == frame
frame_mask_gt = gt_frames == frame
gt_ids = gt[frame_mask_gt, 1]
frame_gt_boxes = gt_boxes[frame_mask_gt]
frame_det_boxes = det_boxes[frame_mask_det]
for i, det_box in enumerate(frame_det_boxes):
iou_list = np.asarray([iou(gt_box, det_box) for gt_box in frame_gt_boxes])
iou_sorted = np.argsort(iou_list)
positive_idx = np.where(iou_list >= iou_threshold)[0]
if len(positive_idx)==0:
assigned_ids.append(-1)
else:
assigned_ids.append(gt_ids[iou_sorted[-1]])
assigned_ids = np.expand_dims(np.asarray(assigned_ids), 1)
try:
out_matrix = np.hstack([np.expand_dims(detections[:,0], 1), assigned_ids, detections[:,2:]])
except:
pdb.set_trace()
np.savetxt(detection_path, out_matrix, delimiter=',', fmt = '%.2f')
return
if __name__=='__main__':
ap = []
KITTI_root = 'data/KITTI/sequences'
for sequence in tqdm(range(21)):
assign_detection_id(os.path.join(KITTI_root, '%.4d'%sequence, 'det','rrc_subcnn_car_det.txt'),
os.path.join(KITTI_root, '%.4d'%sequence, 'gt', 'gt_car.txt'))
================================================
FILE: paper_experiments/utils/calibration.py
================================================
import numpy as np
import cv2
import os
import yaml
import torch
import pdb
class Calibration(object):
''' Calibration matrices and utils
3d XYZ in .txt are in rect camera coord.
2d box xy are in image2 coord
Points in .bin are in Velodyne coord.
y_image2 = P^2_rect * x_rect
y_image2 = P^2_rect * R0_rect * Tr_velo_to_cam * x_velo
x_ref = Tr_velo_to_cam * x_velo
x_rect = R0_rect * x_ref
P^2_rect = [f^2_u, 0, c^2_u, -f^2_u b^2_x;
0, f^2_v, c^2_v, -f^2_v b^2_y;
0, 0, 1, 0]
= K * [1|t]
image2 coord:
----> x-axis (u)
|
|
v y-axis (v)
velodyne coord:
front x, left y, up z
rect/ref camera coord:
right x, down y, front z
Ref (KITTI paper): http://www.cvlibs.net/publications/Geiger2013IJRR.pdf
TODO(rqi): do matrix multiplication only once for each projection.
'''
def __init__(self, calib_filepath):
calibs = self.read_calib_file(calib_filepath)
# Projection matrix from rect camera coord to image2 coord
self.P = calibs['P2']
self.P = np.reshape(self.P, [3,4])
self.P_torch = torch.from_numpy(self.P).float().cuda()
# Rigid transform from Velodyne coord to reference camera coord
try:
self.V2C = calibs['Tr_velo_to_cam']
except:
self.V2C = calibs['Tr_velo_cam']
self.V2C = np.reshape(self.V2C, [3,4])
self.C2V = inverse_rigid_trans(self.V2C)
# Rotation from reference camera coord to rect camera coord
try:
self.R0 = calibs['R0_rect']
except:
self.R0 = calibs['R_rect']
self.R0 = np.reshape(self.R0,[3,3])
self.R0_torch = torch.from_numpy(self.R0).float().cuda()
RA = np.zeros((4,4))
RA[:3,:3] = self.R0
RA[3,3] = 1
self.D = np.matmul(self.P,RA).T
self.D_torch = torch.from_numpy(self.D).float().cuda()
# Camera intrinsics and extrinsics
self.c_u = self.P[0,2]
self.c_v = self.P[1,2]
self.f_u = self.P[0,0]
self.f_v = self.P[1,1]
self.b_x = self.P[0,3]/(-self.f_u) # relative
self.b_y = self.P[1,3]/(-self.f_v)
def read_calib_file(self, filepath):
''' Read in a calibration file and parse into a dictionary.
Ref: https://github.com/utiasSTARS/pykitti/blob/master/pykitti/utils.py
'''
data = {}
with open(filepath, 'r') as f:
for line in f.readlines():
line = line.rstrip()
if len(line)==0: continue
key, value = line.split(' ', 1)
if key.endswith(':'):
key = key[:-1]
# The only non-float values in these files are dates, which
# we don't care about anyway
try:
data[key] = np.array([float(x) for x in value.split()])
except ValueError:
pass
return data
def read_calib_from_video(self, calib_root_dir):
''' Read calibration for camera 2 from video calib files.
there are calib_cam_to_cam and calib_velo_to_cam under the calib_root_dir
'''
data = {}
cam2cam = self.read_calib_file(os.path.join(calib_root_dir, 'calib_cam_to_cam.txt'))
velo2cam = self.read_calib_file(os.path.join(calib_root_dir, 'calib_velo_to_cam.txt'))
Tr_velo_to_cam = np.zeros((3,4))
Tr_velo_to_cam[0:3,0:3] = np.reshape(velo2cam['R'], [3,3])
Tr_velo_to_cam[:,3] = velo2cam['T']
data['Tr_velo_to_cam'] = np.reshape(Tr_velo_to_cam, [12])
data['R0_rect'] = cam2cam['R_rect_00']
data['P2'] = cam2cam['P_rect_02']
return data
def cart2hom(self, pts_3d):
''' Input: nx3 points in Cartesian
Oupput: nx4 points in Homogeneous by appending 1
'''
n = pts_3d.shape[0]
pts_3d_hom = np.hstack((pts_3d, np.ones((n,1))))
return pts_3d_hom
def cart2hom_torch(self, pts_3d):
n = pts_3d.size()[0]
pts_3d_hom = torch.cat((pts_3d, torch.ones(n,1).to("cuda:0")), 1)
return pts_3d_hom
# ===========================
# ------- 3d to 3d ----------
# ===========================
def project_velo_to_ref(self, pts_3d_velo):
pts_3d_velo = self.cart2hom(pts_3d_velo) # nx4
return np.dot(pts_3d_velo, np.transpose(self.V2C))
def project_ref_to_velo(self, pts_3d_ref):
pts_3d_ref = self.cart2hom(pts_3d_ref) # nx4
return np.dot(pts_3d_ref, np.transpose(self.C2V))
def project_rect_to_ref(self, pts_3d_rect):
''' Input and Output are nx3 points '''
return np.transpose(np.dot(np.linalg.inv(self.R0), np.transpose(pts_3d_rect)))
def project_ref_to_rect(self, pts_3d_ref):
''' Input and Output are nx3 points '''
return np.transpose(np.dot(self.R0, np.transpose(pts_3d_ref)))
def project_ref_to_rect_torch(self, pts_3d_ref):
''' Input and Output are nx3 points '''
return torch.transpose(torch.matmul(self.R0_torch, torch.transpose(pts_3d_ref,0,1)),0,1)
def project_rect_to_velo(self, pts_3d_rect):
''' Input: nx3 points in rect camera coord.
Output: nx3 points in velodyne coord.
'''
pts_3d_ref = self.project_rect_to_ref(pts_3d_rect)
return self.project_ref_to_velo(pts_3d_ref)
def project_velo_to_rect(self, pts_3d_velo):
pts_3d_ref = self.project_velo_to_ref(pts_3d_velo)
return self.project_ref_to_rect(pts_3d_ref)
# ===========================
# ------- 3d to 2d ----------
# ===========================
def project_rect_to_image(self, pts_3d_rect):
''' Input: nx3 points in rect camera coord.
Output: nx2 points in image2 coord.
'''
pts_3d_rect = self.cart2hom(pts_3d_rect)
pts_2d = np.dot(pts_3d_rect, np.transpose(self.P)) # nx3
pts_2d[:,0] /= pts_2d[:,2]
pts_2d[:,1] /= pts_2d[:,2]
return pts_2d[:,0:2]
def project_rect_to_image_torch(self, pts_3d_rect):
''' Input: nx3 points in rect camera coord.
Output: nx2 points in image2 coord.
'''
pts_3d_rect = self.cart2hom_torch(pts_3d_rect)
pts_2d = torch.matmul(pts_3d_rect, torch.transpose(self.P_torch,0,1)) # nx3
pts_2d[:,0] /= pts_2d[:,2]
pts_2d[:,1] /= pts_2d[:,2]
return pts_2d[:,0:2]
def project_ref_to_image_torch(self, pts_3d_ref):
''' Input: nx3 points in ref camera coord.
Output: nx2 points in image2 coord.
'''
pts_3d_ref = self.cart2hom_torch(pts_3d_ref)
pts_2d = torch.matmul(pts_3d_ref, self.D_torch) # nx3
pts_2d[:,0] /= pts_2d[:,2]
pts_2d[:,1] /= pts_2d[:,2]
return pts_2d[:,0:2]
def project_velo_to_image(self, pts_3d_velo):
''' Input: nx3 points in velodyne coord.
Output: nx2 points in image2 coord.
'''
pts_3d_rect = self.project_velo_to_rect(pts_3d_velo)
return self.project_rect_to_image(pts_3d_rect)
# ===========================
# ------- 2d to 3d ----------
# ===========================
def project_image_to_rect(self, uv_depth):
''' Input: nx3 first two channels are uv, 3rd channel
is depth in rect camera coord.
Output: nx3 points in rect camera coord.
'''
n = uv_depth.shape[0]
x = ((uv_depth[:,0]-self.c_u)*uv_depth[:,2])/self.f_u + self.b_x
y = ((uv_depth[:,1]-self.c_v)*uv_depth[:,2])/self.f_v + self.b_y
pts_3d_rect = np.zeros((n,3))
pts_3d_rect[:,0] = x
pts_3d_rect[:,1] = y
pts_3d_rect[:,2] = uv_depth[:,2]
return pts_3d_rect
def project_image_to_velo(self, uv_depth):
pts_3d_rect = self.project_image_to_rect(uv_depth)
return self.project_rect_to_velo(pts_3d_rect)
def rotx(t):
''' 3D Rotation about the x-axis. '''
c = np.cos(t)
s = np.sin(t)
return np.array([[1, 0, 0],
[0, c, -s],
[0, s, c]])
def roty(t):
''' Rotation about the y-axis. '''
c = np.cos(t)
s = np.sin(t)
return np.array([[c, 0, s],
[0, 1, 0],
[-s, 0, c]])
def rotz(t):
''' Rotation about the z-axis. '''
c = np.cos(t)
s = np.sin(t)
return np.array([[c, -s, 0],
[s, c, 0],
[0, 0, 1]])
def transform_from_rot_trans(R, t):
''' Transforation matrix from rotation matrix and translation vector. '''
R = R.reshape(3, 3)
t = t.reshape(3, 1)
return np.vstack((np.hstack([R, t]), [0, 0, 0, 1]))
def inverse_rigid_trans(Tr):
''' Inverse a rigid body transform matrix (3x4 as [R|t])
[R'|-R't; 0|1]
'''
inv_Tr = np.zeros_like(Tr) # 3x4
inv_Tr[0:3,0:3] = np.transpose(Tr[0:3,0:3])
inv_Tr[0:3,3] = np.dot(-np.transpose(Tr[0:3,0:3]), Tr[0:3,3])
return inv_Tr
def read_label(label_filename):
lines = [line.rstrip() for line in open(label_filename)]
objects = [Object3d(line) for line in lines]
return objects
def load_image(img_filename):
return cv2.imread(img_filename)
def load_velo_scan(velo_filename):
scan = np.fromfile(velo_filename, dtype=np.float32)
scan = scan.reshape((-1, 4))
return scan
def project_to_image(pts_3d, P):
''' Project 3d points to image plane.
Usage: pts_2d = projectToImage(pts_3d, P)
input: pts_3d: nx3 matrix
P: 3x4 projection matrix
output: pts_2d: nx2 matrix
P(3x4) dot pts_3d_extended(4xn) = projected_pts_2d(3xn)
=> normalize projected_pts_2d(2xn)
<=> pts_3d_extended(nx4) dot P'(4x3) = projected_pts_2d(nx3)
=> normalize projected_pts_2d(nx2)
'''
n = pts_3d.shape[0]
pts_3d_extend = np.hstack((pts_3d, np.ones((n,1))))
print(('pts_3d_extend shape: ', pts_3d_extend.shape))
pts_2d = np.dot(pts_3d_extend, np.transpose(P)) # nx3
pts_2d[:,0] /= pts_2d[:,2]
pts_2d[:,1] /= pts_2d[:,2]
return pts_2d[:,0:2]
def compute_box_3d(obj, P):
''' Takes an object and a projection matrix (P) and projects the 3d
bounding box into the image plane.
Returns:
corners_2d: (8,2) array in left image coord.
corners_3d: (8,3) array in in rect camera coord.
'''
# compute rotational matrix around yaw axis
R = roty(obj.ry)
# 3d bounding box dimensions
l = obj.l;
w = obj.w;
h = obj.h;
# 3d bounding box corners
x_corners = [l/2,l/2,-l/2,-l/2,l/2,l/2,-l/2,-l/2];
y_corners = [0,0,0,0,-h,-h,-h,-h];
z_corners = [w/2,-w/2,-w/2,w/2,w/2,-w/2,-w/2,w/2];
# rotate and translate 3d bounding box
corners_3d = np.dot(R, np.vstack([x_corners,y_corners,z_corners]))
#print corners_3d.shape
corners_3d[0,:] = corners_3d[0,:] + obj.t[0];
corners_3d[1,:] = corners_3d[1,:] + obj.t[1];
corners_3d[2,:] = corners_3d[2,:] + obj.t[2];
#print 'cornsers_3d: ', corners_3d
# only draw 3d bounding box for objs in front of the camera
if np.any(corners_3d[2,:]<0.1):
corners_2d = None
return corners_2d, np.transpose(corners_3d)
# project the 3d bounding box into the image plane
corners_2d = project_to_image(np.transpose(corners_3d), P);
#print 'corners_2d: ', corners_2d
return corners_2d, np.transpose(corners_3d)
def compute_orientation_3d(obj, P):
''' Takes an object and a projection matrix (P) and projects the 3d
object orientation vector into the image plane.
Returns:
orientation_2d: (2,2) array in left image coord.
orientation_3d: (2,3) array in in rect camera coord.
'''
# compute rotational matrix around yaw axis
R = roty(obj.ry)
# orientation in object coordinate system
orientation_3d = np.array([[0.0, obj.l],[0,0],[0,0]])
# rotate and translate in camera coordinate system, project in image
orientation_3d = np.dot(R, orientation_3d)
orientation_3d[0,:] = orientation_3d[0,:] + obj.t[0]
orientation_3d[1,:] = orientation_3d[1,:] + obj.t[1]
orientation_3d[2,:] = orientation_3d[2,:] + obj.t[2]
# vector behind image plane?
if np.any(orientation_3d[2,:]<0.1):
orientation_2d = None
return orientation_2d, np.transpose(orientation_3d)
# project orientation into the image plane
orientation_2d = project_to_image(np.transpose(orientation_3d), P);
return orientation_2d, np.transpose(orientation_3d)
def draw_projected_box3d(image, qs, color=(255,255,255), thickness=2):
''' Draw 3d bounding box in image
qs: (8,3) array of vertices for the 3d box in following order:
1 -------- 0
/| /|
2 -------- 3 .
| | | |
. 5 -------- 4
|/ |/
6 -------- 7
'''
qs = qs.astype(np.int32)
for k in range(0,4):
# Ref: http://docs.enthought.com/mayavi/mayavi/auto/mlab_helper_functions.html
i,j=k,(k+1)%4
# use LINE_AA for opencv3
cv2.line(image, (qs[i,0],qs[i,1]), (qs[j,0],qs[j,1]), color, thickness, cv2.CV_AA)
i,j=k+4,(k+1)%4 + 4
cv2.line(image, (qs[i,0],qs[i,1]), (qs[j,0],qs[j,1]), color, thickness, cv2.CV_AA)
i,j=k,k+4
cv2.line(image, (qs[i,0],qs[i,1]), (qs[j,0],qs[j,1]), color, thickness, cv2.CV_AA)
return image
class OmniCalibration(Calibration):
def __init__(self, calib_folder):
global_config = os.path.join(calib_folder, 'defaults.yaml')
camera_config = os.path.join(calib_folder, 'cameras.yaml')
with open(global_config) as f:
self.global_config_dict = yaml.safe_load(f)
with open(camera_config) as f:
self.camera_config_dict = yaml.safe_load(f)
self.median_focal_length_y = self.calculate_median_param_value(param = 'f_y')
self.median_optical_center_y = self.calculate_median_param_value(param = 't_y')
# image shape is (color channels, height, width)
self.img_shape = 3, self.global_config_dict['image']['height'], self.global_config_dict['image']['width']
def project_ref_to_image_torch(self, pointcloud):
theta = (torch.atan2(pointcloud[:, 0], pointcloud[:, 2]) + np.pi) %(2*np.pi)
horizontal_fraction = theta/ (2*np.pi)
x = (horizontal_fraction * self.img_shape[2]) % self.img_shape[2]
y = -self.median_focal_length_y*(pointcloud[:, 1]*torch.cos(theta)/pointcloud[:, 2]) + self.median_optical_center_y
pts_2d = torch.stack([x, y], dim=1)
return pts_2d
def project_image_to_rect(self, uvdepth):
theta = (uvdepth[:, 0]/self.img_shape[2])*2*np.pi - np.pi
z = uvdepth[:, 2]*np.cos(theta)
x = uvdepth[:, 2]*np.sin(theta)
y = z*-1*(uvdepth[:, 1] - self.median_optical_center_y)/(self.median_focal_length_y * np.cos(theta))
return np.stack([x,y,z], axis=1)
def project_velo_to_ref(self, pointcloud):
pointcloud = pointcloud[:, [1, 2, 0]]
pointcloud[:, 0] *= -1
pointcloud[:, 1] *= -1
return pointcloud
def move_lidar_to_camera_frame(self, pointcloud, upper = True):
# assumed only rotation about z axis
if upper:
pointcloud -= self.global_config_dict['lidar_upper_to_rgb']['translation']
theta = self.global_config_dict['lidar_upper_to_rgb']['rotation'][-1]
else:
pointcloud -= self.global_config_dict['lidar_lower_to_rgb']['translation']
theta = self.global_config_dict['lidar_lower_to_rgb']['rotation'][-1]
rotation_matrix = torch.Tensor([[np.cos(theta), np.sin(theta)], [-np.sin(theta), np.cos(theta)]]).type(pointcloud.type())
pointcloud[:, :2] = torch.matmul(rotation_matrix.unsqueeze(0), pointcloud[:, :2].transpose(0,1)).transpose(0,1)
return pointcloud
def calculate_median_param_value(self, param):
if param=='f_y':
idx = 4
elif param == 'f_x':
idx = 0
elif param == 't_y':
idx = 5
elif param == 't_x':
idx = 2
elif param == 's':
idx = 1
else:
raise 'Wrong parameter!'
omni_camera = ['sensor_0', 'sensor_2', 'sensor_4', 'sensor_6', 'sensor_8']
parameter_list = []
for sensor, camera_params in self.camera_config_dict['cameras'].items():
if sensor not in omni_camera:
continue
K_matrix = camera_params['K'].split(' ')
parameter_list.append(float(K_matrix[idx]))
return np.median(parameter_list)
================================================
FILE: paper_experiments/utils/combine_and_process_detections.py
================================================
import os
from os import listdir
from os.path import isfile, join
#root = "/cvgl2/u/mihirp/depth_tracking/data/JRDB/sequences/"
#root = "/cvgl2/u/mihirp/depth_tracking/data/JRDB/test_sequences/"
#root = "/cvgl2/u/mihirp/depth_tracking/data/KITTI/sequences/"
root = "/cvgl2/u/mihirp/depth_tracking/data/KITTI/test_sequences/"
file_name = "new_subcnn_faster_rcnn"
# file_name = "detectron2_x101"
def threshold(filename, thresh, min, max):
detections = []
with open(filename, 'r') as readfile:
dets = readfile.read().split('\n')
dets = dets[:len(dets)-1] #filter out last line which is just \n
for det in dets:
parsedet = det.split(' ')
score = float(parsedet[len(parsedet)-1])
parsedet[len(parsedet)-1] = str((float(parsedet[len(parsedet)-1]) - thresh) / (max - thresh))
if(score > thresh):
detections.append(parsedet)
return detections
for seq in sorted(os.listdir(root)): #21 for normal, 29 for testing
path = os.path.join(root,seq,'det')
with open(os.path.join(path,file_name+'_raw.txt'), 'w') as f:
pred_dets = []
#pred_dets.append(threshold(os.path.join(path,'rrc.txt'), .05, 0, 1))
pred_dets.append(threshold(os.path.join(path,'subcnn.txt'), .8, 0, 1))
#pred_dets.append(threshold(os.path.join(path,'faster_rcnn.txt'), .99, 0, 1))
#pred_dets.append(threshold(os.path.join(path,'detectron2_x101.txt'), .9, 0, 1))
#pred_dets.append(threshold(path+'regionlets.txt', 5, -5, 25))
if len(pred_dets[0]) == 0:
continue
max_frames = int((pred_dets[0])[len(pred_dets[0])-1][0])
det_ctrs = [0,0,0,0]
for frame in range(max_frames+1):
frame_num = 0
for j in range(1): #TODO: Update to number of detectors used
while det_ctrs[j] < len(pred_dets[j]) and int( (pred_dets[j])[det_ctrs[j]][0]) == frame:
(pred_dets[j])[det_ctrs[j]][1] = str(frame_num)
frame_num+=1
f.write( " ".join( (pred_dets[j])[det_ctrs[j]] )+'\n')
det_ctrs[j]+=1
# Counts max/min of scores
for ctr, pred_det in enumerate(pred_dets):
minval = 1000
maxval = 0
for detection in pred_det:
score = detection[len(detection)-1]
if float(score)>maxval:
maxval = float(score)
if float(score)>> boxes = [d.roi for d in detections]
>>> scores = [d.confidence for d in detections]
>>> indices = non_max_suppression(boxes, max_bbox_overlap, scores)
>>> detections = [detections[i] for i in indices]
Parameters
----------
boxes : ndarray
Array of ROIs (x, y, width, height).
max_bbox_overlap : float
ROIs that overlap more than this values are suppressed.
scores : Optional[array_like]
Detector confidence score.
Returns
-------
List[int]
Returns indices of detections that have survived non-maxima suppression.
"""
if len(boxes) == 0:
return []
boxes = boxes.astype(np.float)
pick = []
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2] + boxes[:, 0]
y2 = boxes[:, 3] + boxes[:, 1]
area = (x2 - x1 + 1) * (y2 - y1 + 1)
if scores is not None:
idxs = np.argsort(scores)
else:
idxs = np.argsort(y2)
while len(idxs) > 0:
last = len(idxs) - 1
i = idxs[last]
pick.append(i)
xx1 = np.maximum(x1[i], x1[idxs[:last]])
yy1 = np.maximum(y1[i], y1[idxs[:last]])
xx2 = np.minimum(x2[i], x2[idxs[:last]])
yy2 = np.minimum(y2[i], y2[idxs[:last]])
w = np.maximum(0, xx2 - xx1 + 1)
h = np.maximum(0, yy2 - yy1 + 1)
#overlap = (w * h) / (area[idxs[:last]]) # + area[idxs[last:last+1]] - w * h) #changed from deepsort to sum both areas
overlap = (w * h) / (area[idxs[:last]] + area[idxs[last:last+1]] - w * h) #changed from deepsort to sum both areas
threshold = np.where(y2[i]-y1[i] < 50, max_bbox_overlap-0.1, max_bbox_overlap)
idxs = np.delete(
idxs, np.concatenate(
([last], np.where(overlap > threshold)[0])))
return pick
================================================
FILE: paper_experiments/utils/detection.py
================================================
# vim: expandtab:ts=4:sw=4
import numpy as np
class Detection(object):
"""
This class represents a bounding box detection in a single image.
Parameters
----------
tlwh : array_like
Bounding box in format `(x, y, w, h)`.
confidence : float
Detector confidence score.
feature : array_like
A feature vector that describes the object contained in this image.
Attributes
----------
tlwh : ndarray
Bounding box in format `(top left x, top left y, width, height)`.
confidence : ndarray
Detector confidence score.
feature : ndarray | NoneType
A feature vector that describes the object contained in this image.
"""
def __init__(self, tlwh, box_3d, confidence, appearance_feature, feature):
self.tlwh = np.asarray(tlwh, dtype=np.float)
# Note that detections format is centre of 3D box and dimensions (not bottom face)
self.box_3d = box_3d
if box_3d is not None:
self.box_3d[1] -= box_3d[4]/2
self.box_3d = np.asarray(box_3d, dtype=np.float32)
self.confidence = float(confidence)
self.appearance_feature = np.asarray(appearance_feature, dtype=np.float32)
if feature is not None:
self.feature = np.asarray(feature, dtype = np.float32)
else:
self.feature = None
def to_tlbr(self):
"""Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
`(top left, bottom right)`.
"""
ret = self.tlwh.copy()
ret[2:] += ret[:2]
return ret
def to_xyah(self):
"""Convert bounding box to format `(center x, center y, aspect ratio,
height)`, where the aspect ratio is `width / height`.
"""
ret = self.tlwh.copy()
ret[:2] += ret[2:] / 2
ret[2] /= ret[3]
return ret
def to_xywh(self):
"""Convert bounding box to format `(center x, center y, aspect ratio,
height)`, where the aspect ratio is `width / height`.
"""
ret = self.tlwh.copy()
ret[:2] += ret[2:] / 2
return ret
def get_3d_distance(self):
if self.box_3d is not None:
return np.sqrt(self.box_3d[0]**2 + self.box_3d[2]**2)
================================================
FILE: paper_experiments/utils/double_measurement_kf.py
================================================
import random
import numpy as np
import scipy.linalg
import EKF
import pdb
import kf_2d
import os
import pickle
import torch
from copy import deepcopy
import matplotlib.pyplot as plt
from read_detections import read_ground_truth_3d_detections, read_ground_truth_2d_detections
np.set_printoptions(precision=4, suppress=True)
from calibration import Calibration
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from evaluation.distances import iou_matrix
class KF_3D(kf_2d.KalmanFilter2D):
"""
3D Kalman Filter that tracks objets in 3D space
The 8-dimensional state space
x, y, z, l, h, w, theta, vx, vz
contains the bounding box center position (x, z), the heading angle theta, the
box dimensions l, w, h, and the x and z velocities.
Object motion follows a constant velocity model. The bounding box location
(x, y) is taken as direct observation of the state space (linear
observation model).
"""
def __init__(self, calib, pos_weight_3d, pos_weight, velocity_weight, theta_weight,
std_process, std_measurement_2d, std_measurement_3d,
initial_uncertainty, omni = True, debug=True):
self.ndim, self.dt = 9, 1.
# Create Kalman filter model matrices.
# Motion model is constant velocity, i.e. x = x + Vx*dt
self._motion_mat = np.eye(self.ndim, self.ndim)
self._motion_mat[0, 7] = self.dt
self._motion_mat[2, 8] = self.dt
# Sensor model is direct observation, i.e. x = x
self._observation_mat = np.eye(self.ndim - 2, self.ndim)
if omni:
self.x_constant = calib.img_shape[2]/(2*np.pi)
self.y_constant = calib.median_focal_length_y
self.calib = calib
else:
self.projection_matrix = calib.P
self.omni = omni
self._std_weight_pos_3d = pos_weight_3d
self._std_weight_pos = pos_weight
self._std_weight_vel = velocity_weight
self._std_weight_theta= theta_weight
self._std_weight_process = std_process
self._initial_uncertainty = initial_uncertainty
self._std_weight_measurement_2d = std_measurement_2d
self._std_weight_measurement_3d = std_measurement_3d
self.debug = debug
def initiate(self, measurement_3d):
mean_pos = measurement_3d
mean_vel = np.zeros((2,))
mean = np.r_[mean_pos, mean_vel]
std = [
self._std_weight_pos_3d * measurement_3d[0],
self._std_weight_pos_3d * measurement_3d[1],
self._std_weight_pos_3d * measurement_3d[2],
self._std_weight_pos_3d * measurement_3d[3],
self._std_weight_pos_3d * measurement_3d[4],
self._std_weight_pos_3d * measurement_3d[5],
self._std_weight_theta,
self._std_weight_vel,
self._std_weight_vel]
covariance = np.diag(np.square(std))*(self._initial_uncertainty*self._std_weight_process)**2
return mean, covariance
def get_process_noise(self, mean):
std_pos = [
self._std_weight_pos_3d, # x
self._std_weight_pos_3d, # y
self._std_weight_pos_3d, # z
self._std_weight_pos_3d, # l
self._std_weight_pos_3d, # h
self._std_weight_pos_3d, # w
self._std_weight_theta # theta
]
std_vel = [
self._std_weight_vel, # x
self._std_weight_vel, # z
]
self._motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))*self._std_weight_process**2
return motion_cov
def get_2d_measurement_noise(self, measurement_2d):
# Returns Qt the sensor noise covariance
# Measurement uncertainty scaled by estimated height
std = [
self._std_weight_pos*measurement_2d[2],
self._std_weight_pos*measurement_2d[3],
self._std_weight_pos*measurement_2d[2],
self._std_weight_pos*measurement_2d[3]]
innovation_cov = np.diag(np.square(std))*self._std_weight_measurement_2d**2
return innovation_cov
def get_3d_measurement_noise(self, measurement):
# Returns Qt the sensor noise covariance
# Measurement uncertainty scaled by estimated height
std = [
self._std_weight_pos_3d * measurement[0], # x
self._std_weight_pos_3d * measurement[1], # y
self._std_weight_pos_3d * measurement[2], # z
self._std_weight_pos_3d * measurement[3], # l
self._std_weight_pos_3d * measurement[4], # h
self._std_weight_pos_3d * measurement[5], # w
self._std_weight_theta # theta
]
innovation_cov = np.diag(np.square(std))*self._std_weight_measurement_3d**2
return innovation_cov
def gating_distance(self, mean, covariance, measurements,
only_position=False,
use_3d=True):
"""Compute gating distance between state distribution and measurements.
A suitable distance threshold can be obtained from `chi2inv95`. If
`only_position` is False, the chi-square distribution has 4 degrees of
freedom, otherwise 2.
Parameters
----------
mean : ndarray
Mean vector over the state distribution (8 dimensional).
covariance : ndarray
Covariance of the state distribution (8x8 dimensional).
measurements : ndarray
An Nx4 dimensional matrix of N measurements, each in
format (x, y, a, h) where (x, y) is the bounding box center
position, a the aspect ratio, and h the height.
only_position : Optional[bool]
If True, distance computation is done with respect to the bounding
box center position only.
Returns
-------
ndarray
Returns an array of length N, where the i-th element contains the
squared Mahalanobis distance between (mean, covariance) and
`measurements[i]`.
"""
if not use_3d:
corner_points, corner_points_3d = self.calculate_corners(mean)
H_2d = self.get_2d_measurement_matrix(mean, corner_points, corner_points_3d)
min_x, min_y = np.amin(corner_points, axis = 0)[:2]
max_x, max_y = np.amax(corner_points, axis = 0)[:2]
cov = self.project_cov_2d(mean, covariance, H_2d)
mean = np.array([min_x, min_y, max_x - min_x, max_y - min_y])
else:
mean, cov = mean[:7], covariance[:7, :7]
if only_position:
if use_3d:
mean, cov = mean[:3], cov[:3, :3]
measurements = measurements[:, :3]
else:
mean, cov = mean[:2], cov[:2, :2]
measurements = measurements[:, :2]
return EKF.squared_mahalanobis_distance(mean, cov, measurements)
def project_cov(self, mean, covariance):
# Returns S the innovation covariance (projected covariance)
measurement_noise = self.get_3d_measurement_noise(mean)
innovation_cov = (np.linalg.multi_dot((self._observation_mat, covariance,
self._observation_mat.T))
+ measurement_noise)
return innovation_cov
def project_cov_2d(self, mean, covariance, H_2d):
# Returns S the innovation covariance (projected covariance)
measurement_noise = self.get_2d_measurement_noise(mean)
innovation_cov = (np.linalg.multi_dot((H_2d, covariance,
H_2d.T))
+ measurement_noise)
return innovation_cov
# @profile
def update(self, mean, covariance, measurement_2d, measurement_3d = None, marginalization=None, JPDA=False):
"""Run Kalman filter correction step.
Parameters
----------
mean : ndarray
The predicted state's mean vector (9 dimensional).
covariance : ndarray
The state's covariance matrix (9x9 dimensional).
measurement_2d : ndarray
The 4 dimensional measurement vector (x, y, w, h), where (x, y)
is the center position, a the aspect ratio, and h the height of the
bounding box.
measurement_3d : ndarray
The 7 dimensional measurement vector (x, y, z, l, h, w, theta), where (x, y, z)
is the center bottom of the box, l, q, h are the dimensions of the bounding box
theta is the orientation angle w.r.t. the positive x axis.
Returns
-------
(ndarray, ndarray)
Returns the measurement-corrected state distribution.
"""
if np.any(np.isnan(mean)):
return mean, covariance
out_cov = covariance
H_3d = self._observation_mat
do_3d = True
covariance_3d = None
for meas in measurement_3d:
if meas is None:
do_3d = False
break
if do_3d:
S_matrix = self.project_cov(mean, out_cov)
try:
chol_factor, lower = scipy.linalg.cho_factor(
S_matrix, lower=True, check_finite=False)
kalman_gain = scipy.linalg.cho_solve(
(chol_factor, lower), np.dot(out_cov, H_3d.T).T,
check_finite=False).T
except:
# in case cholesky factorization fails, revert to standard solver
kalman_gain = np.linalg.multi_dot((out_cov, H_3d.T, np.linalg.inv(S_matrix)))
out_cov -= np.linalg.multi_dot((kalman_gain, S_matrix, kalman_gain.T))
if JPDA:
innovation_3d = 0
cov_uncertainty_3d = 0
for i, detection_3d in enumerate(measurement_3d):
innovation_partial = detection_3d - mean[:7]
innovation_3d += innovation_partial * marginalization[i+1]
cov_uncertainty_3d += marginalization[i+1] * np.outer(innovation_partial, innovation_partial)
partial_cov = cov_uncertainty_3d-np.outer(innovation_3d, innovation_3d)
out_cov *= 1 - marginalization[0]
out_cov += np.linalg.multi_dot((kalman_gain, partial_cov, kalman_gain.T))
out_cov += marginalization[0]*covariance
else:
out_cov = out_cov - np.linalg.multi_dot((kalman_gain, H_3d, out_cov))
innovation_3d = measurement_3d - mean[:7]
mean = mean + np.dot(kalman_gain, innovation_3d)
post_3d_mean = mean
covariance_3d = out_cov
if measurement_2d is not None:
corner_points, corner_points_3d = self.calculate_corners(mean)
H_2d = self.get_2d_measurement_matrix(mean, corner_points, corner_points_3d)
#update based on 2D
min_x, min_y = np.amin(corner_points, axis = 0)[:2]
max_x, max_y = np.amax(corner_points, axis = 0)[:2]
S_matrix = self.project_cov_2d(np.array([min_x, min_y, max_x - min_x, max_y - min_y]), out_cov, H_2d)
try:
chol_factor, lower = scipy.linalg.cho_factor(
S_matrix, lower=True, check_finite=False)
kalman_gain = scipy.linalg.cho_solve(
(chol_factor, lower), np.dot(out_cov, H_2d.T).T,
check_finite=False).T
except:
# in case cholesky factorization fails, revert to standard solver
kalman_gain = np.linalg.multi_dot((out_cov, H_2d.T, np.linalg.inv(S_matrix)))
out_cov = np.dot(np.eye(*out_cov.shape)-np.dot(kalman_gain, H_2d), out_cov)
if JPDA:
innovation_2d = 0
cov_uncertainty_2d = 0
for i, detection_2d in enumerate(measurement_2d):
innovation_partial = detection_2d[:4] - np.array([min_x, min_y, max_x - min_x, max_y - min_y])
innovation_2d += innovation_partial * marginalization[i+1] # +1 to account for dummy node
cov_uncertainty_2d += marginalization[i+1] * np.outer(innovation_partial, innovation_partial)
partial_cov = cov_uncertainty_2d-np.outer(innovation_2d, innovation_2d)
out_cov *= 1 - marginalization[0]
out_cov += np.linalg.multi_dot((kalman_gain, partial_cov, kalman_gain.T))
if covariance_3d is None:
out_cov += marginalization[0]*covariance
else:
out_cov += marginalization[0]*covariance_3d
else:
innovation_2d = measurement_2d[:4] - np.array([min_x, min_y, max_x - min_x, max_y - min_y])
mean = mean + np.dot(kalman_gain, innovation_2d)
if self.debug:
return mean, out_cov, post_3d_mean
return mean, out_cov
# @profile
def get_2d_measurement_matrix(self, mean, corner_points, corner_points_3d):
min_x = np.inf
min_x_idx = None
max_x = -np.inf
max_x_idx = None
min_y = np.inf
min_y_idx = None
max_y = -np.inf
max_y_idx = None
for idx, pt in enumerate(corner_points):
if pt[0] < min_x:
min_x_idx = idx
min_x = pt[0]
if pt[0] > max_x:
max_x_idx = idx
max_x = pt[0]
if pt[1] < min_y:
min_y_idx = idx
min_y = pt[1]
if pt[1] > max_y:
max_y_idx = idx
max_y = pt[1]
if self.omni:
jac_x = np.dot(self.jacobian_omni(corner_points_3d[min_x_idx])[0], self.corner_jacobian(mean, min_x_idx))
jac_y = np.dot(self.jacobian_omni(corner_points_3d[min_y_idx])[1], self.corner_jacobian(mean, min_y_idx))
jac_w = np.dot(self.jacobian_omni(corner_points_3d[max_x_idx])[0], self.corner_jacobian(mean, max_x_idx)) - jac_x
jac_h = np.dot(self.jacobian_omni(corner_points_3d[max_y_idx])[1], self.corner_jacobian(mean, max_y_idx)) - jac_y
else:
jac_x = np.dot(self.jacobian(corner_points_3d[min_x_idx])[0], self.corner_jacobian(mean, min_x_idx))
jac_y = np.dot(self.jacobian(corner_points_3d[min_y_idx])[1], self.corner_jacobian(mean, min_y_idx))
jac_w = np.dot(self.jacobian(corner_points_3d[max_x_idx])[0], self.corner_jacobian(mean, max_x_idx)) - jac_x
jac_h = np.dot(self.jacobian(corner_points_3d[max_y_idx])[1], self.corner_jacobian(mean, max_y_idx)) - jac_y
jac = np.vstack([jac_x, jac_y, jac_w, jac_h])
jac = np.hstack([jac, np.zeros((jac.shape[0], 2))])
return jac
# Jacobian for projective transformation
def jacobian(self, pt_3d):
den = np.sum(self.projection_matrix[2] * pt_3d)
dxy = (1 - self.projection_matrix[2] * pt_3d/den) * self.projection_matrix[0:2]/den
return dxy[:, :3]
def jacobian_omni(self, pt_3d):
jac = np.zeros((2, 3))
x, y, z = pt_3d[0], pt_3d[1], pt_3d[2]
denominator = (x**2 + z**2)
jac[0, 0] = -self.x_constant*(2*x*(z**2)/denominator)
jac[0, 0] /= denominator
jac[0, 2] = self.x_constant*2*z/denominator
jac[0, 2] *= 1 - (z**2)/denominator
jac[1, 0] = self.y_constant*x*y/denominator
jac[1, 1] = -self.y_constant
jac[1,2] = self.y_constant*z*y/denominator
jac[1, :] /= np.sqrt(denominator)
return jac
def calculate_corners(self, box):
x,y,z,l,h,w,theta = box[:7]
pt_3d = []
x_delta_1 = np.cos(theta)*l/2+np.sin(theta)*w/2
x_delta_2 = np.cos(theta)*l/2 - np.sin(theta)*w/2
z_delta_1 = np.sin(theta)*l/2-np.cos(theta)*w/2
z_delta_2 = np.sin(theta)*l/2+np.cos(theta)*w/2
pt_3d.append((x+x_delta_1, y + h/2, z+z_delta_1, 1))
pt_3d.append((x+x_delta_2, y + h/2, z+z_delta_2, 1))
pt_3d.append((x-x_delta_2, y + h/2, z-z_delta_2, 1))
pt_3d.append((x-x_delta_1, y + h/2, z-z_delta_1, 1))
pt_3d.append((x+x_delta_1, y - h/2, z+z_delta_1, 1))
pt_3d.append((x+x_delta_2, y - h/2, z+z_delta_2, 1))
pt_3d.append((x-x_delta_2, y - h/2, z-z_delta_2, 1))
pt_3d.append((x-x_delta_1, y - h/2, z-z_delta_1, 1))
pts_3d = np.vstack(pt_3d)
pts_2d = self.project_2d(pts_3d)
return pts_2d, pts_3d
def corner_jacobian(self, pt_3d, corner_idx):
_, _, _, l, _, w, theta = pt_3d[:7]
jac = np.eye(3,7)
jac[1, 4] = 0.5 if corner_idx < 4 else -0.5
jac[0, 3] = 0.5*np.sin(theta) if corner_idx % 4 < 2 else -0.5*np.sin(theta)
jac[0, 5] = 0.5*np.cos(theta) if corner_idx % 2 == 0 else -0.5*np.cos(theta)
jac[2, 3] = 0.5*np.cos(theta) if corner_idx%4 < 2 else -0.5*np.cos(theta)
jac[2, 5] = 0.5*np.sin(theta) if corner_idx%2 == 0 else -0.5*np.sin(theta)
if corner_idx%4 == 0:
jac[0, 6] = -np.sin(theta)*l/2 + np.cos(theta)*w/2
jac[2, 6] = np.cos(theta)*l/2 + np.sin(theta)*w/2
elif corner_idx%4==1:
jac[0, 6] = -np.sin(theta)*l/2 - np.cos(theta)*w/2
jac[2, 6] = np.cos(theta)*l/2 - np.sin(theta)*w/2
elif corner_idx%4==2:
jac[0, 6] = +np.sin(theta)*l/2 + np.cos(theta)*w/2
jac[2, 6] = -np.cos(theta)*l/2 + np.sin(theta)*w/2
else:
jac[0, 6] = +np.sin(theta)*l/2 - np.cos(theta)*w/2
jac[2, 6] = -np.cos(theta)*l/2 - np.sin(theta)*w/2
return jac
def project_2d(self, pts_3d):
if self.omni:
pts_2d = np.array(self.calib.project_ref_to_image_torch(torch.from_numpy(pts_3d)))
else:
pts_2d = np.dot(pts_3d, self.projection_matrix.T)
pts_2d /= np.expand_dims(pts_2d[:, 2], 1)
return pts_2d[:, :2]
def swap(detections_3d, iou, idx, swap_prob = 0):
if random.random() > swap_prob:
return detections_3d[idx]
else:
iou_row = iou[idx]
iou_row[idx] = -1
max_idx = np.argmax(iou_row)
if iou_row[max_idx] > 0.4:
# print("SWAP")
return detections_3d[max_idx]
else:
return detections_3d[idx]
if __name__ == '__main__':
seq = '0001'
gt_path = os.path.join('data','KITTI','sequences', seq, 'gt')
prob_3d_list = [0.6]
prob_2d_list = [0.9]
swap_prob = 0
std_3d = 0.2
std_2d = 5
boxes_3d, ids, frame_3d = read_ground_truth_3d_detections(os.path.join(gt_path, '3d_detections.txt'), None)
boxes_2d, object_ids, frame_2d = read_ground_truth_2d_detections(os.path.join(gt_path, 'gt.txt'), None, nms_threshold = 1)
boxes_2d[:,2] -= boxes_2d[:,0]
boxes_2d[:,3] -= boxes_2d[:,1]
boxes_3d[:,1] -= boxes_3d[:, 4]/2
calib = Calibration(os.path.join(os.path.dirname(gt_path), 'calib', seq+'.txt'))
pos_weight = 0.05
pos_weight_2d = 0.006
velocity_weight = 0.0007
theta_weight = 0.000300
std_process = 2
std_measurement_2d = 2.6
std_measurement_3d = 0.01
initial_uncertainty = 1
kf = KF_3D(calib, pos_weight, pos_weight_2d, velocity_weight, theta_weight,
std_process, std_measurement_2d, std_measurement_3d,
initial_uncertainty, omni=False, debug=True)
final_errors = np.zeros((len(prob_2d_list), len(prob_3d_list)))
random.seed(14295)
np.random.seed(14295)
for idx_3d, prob_3d in enumerate(prob_3d_list):
for idx_2d, prob_2d in enumerate(prob_2d_list):
id_means = {idx:[] for idx in np.unique(ids)}
id_means_2d = {idx:[] for idx in np.unique(ids)}
id_preds = {idx:[] for idx in np.unique(ids)}
id_meas = {idx:[] for idx in np.unique(ids)}
id_errors = {idx:[] for idx in np.unique(ids)}
for frame in sorted(np.unique(frame_2d)):
frame_mask = frame_2d==frame
frame_boxes_2d = boxes_2d[frame_mask]
frame_boxes_3d = boxes_3d[frame_mask]
frame_ids = ids[frame_mask]
iou = 1-iou_matrix(frame_boxes_2d[:,:4], frame_boxes_2d[:,:4], max_iou=10) #output of function is 1 - IoU
for idx, object_id in enumerate(frame_ids):
if frame_boxes_3d[idx][2] > 30:
continue
noise_2d = np.random.randn(*frame_boxes_2d[idx].shape)*std_2d
noise_3d = np.random.randn(*frame_boxes_3d[idx].shape)*std_3d
if len(id_means[object_id.item()]) == 0:
mean, cov = kf.initiate(frame_boxes_2d[idx]+noise_2d, frame_boxes_3d[idx]+noise_3d)
id_means[object_id.item()].append((mean, cov, frame))
# id_preds[object_id.item()].append((mean, cov, frame))
# id_meas[object_id.item()].append((frame_boxes_3d[idx], frame_boxes_2d[idx], frame))
# id_errors[object_id.item()].append((np.sqrt(np.sum((mean[:3] - frame_boxes_3d[idx][:3])**2)), frame))
continue
mean, cov = kf.predict(id_means[object_id.item()][-1][0], id_means[object_id.item()][-1][1])
id_preds[object_id.item()].append((mean, cov, frame))
# if object_id.item()==3:
# print("3D box: ", frame_boxes_3d[idx])
# print("Old mean:", id_means[object_id.item()][0])
# print("Predicted mean:", mean)
# pdb.set_trace()
if random.random() < prob_2d:
if random.random() < prob_3d:
mean, cov, mean_2d = kf.update(mean, cov, frame_boxes_2d[idx]+noise_2d, swap(frame_boxes_3d, iou, idx, swap_prob)+noise_3d)
else:
mean, cov, mean_2d = kf.update(mean, cov, frame_boxes_2d[idx]+noise_2d, None)
# if object_id.item()==12:
# print("Updated mean after 2D:", mean_2d)
# print("Updated mean after 3D:", mean)
# print("Error:", np.sqrt(np.sum((mean[:3] - frame_boxes_3d[idx][:3])**2)))
# if np.sqrt(np.sum((mean[:3] - frame_boxes_3d[idx][:3])**2)) > 1:
# pdb.set_trace()
id_means[object_id.item()].append((mean, cov, frame))
id_means_2d[object_id.item()].append((mean_2d, frame))
id_meas[object_id.item()].append((frame_boxes_3d[idx], frame_boxes_2d[idx], frame))
id_errors[object_id.item()].append((np.sqrt(np.sum((mean[:3] - frame_boxes_3d[idx][:3])**2)), frame))
errors = [np.mean(error[0]) for idx, error in id_errors.items() if len(error) > 0]
final_errors[idx_2d, idx_3d] = np.mean(errors)
print("3D prob: %f %% & 2D prob: %f %% & swap prob: %f %% RMSE: %f"%(prob_3d*100, prob_2d*100, swap_prob*100, final_errors[idx_2d, idx_3d]))
# if :
with open('results/kf_mean_pickle.p', 'wb') as f:
pickle.dump([id_means, id_means_2d, id_meas, id_preds], f)
print(final_errors)
================================================
FILE: paper_experiments/utils/evaluate_detections.py
================================================
import numpy as np
import os
import pdb
from tqdm import tqdm
from deep_sort_utils import non_max_suppression as deepsort_nms
from visualise import draw_track
import matplotlib.pyplot as plt
from PIL import Image
def evaluate_detections(detection_path_1, detection_path_2, detection_path_3, detection_path_4, gt_path):
#expecting detections and gt in file with format as in read_detections.py
# applies confidence thresholding
try:
detections_1 = np.loadtxt(detection_path_1, delimiter=',')
# detections_2 = np.loadtxt(detection_path_2, delimiter=',')
# detections_3 = np.loadtxt(detection_path_3, delimiter=',')
# detections_4 = np.loadtxt(detection_path_4, delimiter=',')
# detections = np.concatenate([detections_1, detections_2, detections_3, detections_4])
detections = detections_1
gt = np.loadtxt(gt_path, delimiter=',')
except:
return
gt_frames = gt[:, 0]
det_confidence = detections[:, 6]
###CONFIDENCE THRESHOLD
detections = detections[det_confidence > 0.9]
########
print("Average number of detections per frame = %f"%(detections.shape[0]/len(np.unique(gt_frames))))
det_frames = detections[:, 0]
det_confidence = detections[:, 6]
gt_boxes = np.asarray(list(zip(gt[:, 2], gt[:, 3], gt[:, 4], gt[:, 5])))
det_boxes = np.asarray(list(zip(detections[:, 2], detections[:, 3], detections[:, 4], detections[:, 5])))
assignments = []
missed_detections = 0
for frame in np.unique(gt_frames):
frame_mask_det = det_frames == frame
frame_mask_gt = gt_frames == frame
frame_gt_boxes = gt_boxes[frame_mask_gt]
frame_det_boxes = det_boxes[frame_mask_det]
frame_confidence = det_confidence[frame_mask_det]
x1 = np.expand_dims(detections[frame_mask_det,2].astype(np.float32), 1)
y1 = np.expand_dims(detections[frame_mask_det,3].astype(np.float32), 1)
w = np.expand_dims(detections[frame_mask_det,4].astype(np.float32), 1)
h = np.expand_dims(detections[frame_mask_det,5].astype(np.float32), 1)
conf = np.expand_dims(detections[frame_mask_det,6].astype(np.float32), 1)
boxes = np.hstack([x1, y1, w, h])
indices = deepsort_nms(boxes, 0.75, np.squeeze(conf))
frame_det_boxes = frame_det_boxes[indices]
# print(frame_confidence)
positive_arr = np.asarray([False]*len(frame_det_boxes))
for i, gt_box in enumerate(frame_gt_boxes):
iou_list = np.asarray([iou(gt_box, det_box) for det_box in frame_det_boxes])
positive_idx = np.where(iou_list >= 0.5)[0]
if len(positive_idx) == 0:
missed_detections += 1
plt.figure(0)
plt.imshow(Image.open(os.path.join(os.path.split(detection_path_1)[0], '..','imgs','%.6d.png'%frame)))
draw_track(None, gt_box, det = False)
for det_box in frame_det_boxes:
draw_track(None, det_box, det = True)
# print(det_box)
# print('Boxes:')
# print(boxes)
# print('FRAME DONE')
plt.show()
positive_arr[positive_idx] = True
assignments.extend(list(zip(positive_arr, frame_confidence)))
assignments = sorted(assignments, key = lambda x: x[1], reverse = True)
predictions = list(zip(*assignments))[0]
true_positives = np.cumsum(predictions)
false_negatives = np.cumsum(predictions[::-1])[::-1]+missed_detections
precision = true_positives/range(1,len(true_positives)+1)
recall = true_positives/(true_positives + false_negatives)
print("Total missed detections = %d"%missed_detections)
base = 0
idx = []
for i,recall_val in enumerate(recall):
if recall_val > base:
base += 0.1
idx.append(i)
if base >1:
break
precision_vals = [np.amax(precision[index:]) for index in idx]
if len(precision_vals) < 11:
precision_vals.extend([0]*(11-len(precision_vals)))
print(precision_vals)
return np.mean(precision_vals)
def iou(bbox_1, bbox_2):
x1_1, y1_1, w_1, h_1 = bbox_1
x1_2, y1_2, w_2, h_2 = bbox_2
x2_1 = x1_1 + w_1
y2_1 = y1_1 + h_1
x2_2 = x1_2 + w_2
y2_2 = y1_2 + h_2
area_1 = abs(x2_1 - x1_1)*abs(y2_1-y1_1)
area_2 = abs(x2_2 - x1_2)*abs(y2_2-y1_2)
intersection = max(0, (min(x2_1, x2_2) - max(x1_1, x1_2))) * max(0, (min(y2_1, y2_2) - max(y1_1, y1_2)))
union = area_1 + area_2 - intersection
return intersection / union
if __name__=='__main__':
ap = []
KITTI_root = 'data/KITTI/sequences'
for sequence in tqdm(range(21)):
ap.append(evaluate_detections(os.path.join(KITTI_root, '%.4d'%sequence, 'det','subcnn_car_det.txt'),
os.path.join(KITTI_root, '%.4d'%sequence, 'det','rrc_car_det.txt'),
os.path.join(KITTI_root, '%.4d'%sequence, 'det','lsvm_car_det.txt'),
os.path.join(KITTI_root, '%.4d'%sequence, 'det','regionlets_car_det.txt'),
os.path.join(KITTI_root, '%.4d'%sequence, 'gt', 'gt_car.txt')))
ap = [ap_val for ap_val in ap if ap_val is not None]
print("FINAL AVERAGE PRECISION OVER ALL SEQUENCES IS: %f"%np.mean(ap))
================================================
FILE: paper_experiments/utils/featurepointnet_model_util.py
================================================
import open3d as o3d
import numpy as np
import tensorflow as tf
import os
import sys
import torch
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(BASE_DIR)
import featurepointnet_tf_util as tf_util
# -----------------
# Global Constants
# -----------------
NUM_HEADING_BIN = 12
NUM_SIZE_CLUSTER = 8 # one cluster for each type
NUM_OBJECT_POINT = 512
g_type2class={'Car':0, 'Van':1, 'Truck':2, 'Pedestrian':3,
'Person_sitting':4, 'Cyclist':5, 'Tram':6, 'Misc':7}
g_class2type = {g_type2class[t]:t for t in g_type2class}
g_type2onehotclass = {'Car': 0, 'Pedestrian': 1, 'Cyclist': 2}
#Added 0.5m and 0.2m for car and pedestrian to make boxes slightly bigger
g_type_mean_size = {'Car': np.array([3.88311640418,1.62856739989,1.52563191462]),
'Van': np.array([5.06763659,1.9007158,2.20532825]),
'Truck': np.array([10.13586957,2.58549199,3.2520595]),
'Pedestrian': np.array([0.84422524,0.66068622,1.76255119]),
'Person_sitting': np.array([0.80057803,0.5983815,1.27450867]),
'Cyclist': np.array([1.76282397,0.59706367,1.73698127]),
'Tram': np.array([16.17150617,2.53246914,3.53079012]),
'Misc': np.array([3.64300781,1.54298177,1.92320313])}
g_mean_size_arr = np.zeros((NUM_SIZE_CLUSTER, 3)) # size clustrs
for i in range(NUM_SIZE_CLUSTER):
g_mean_size_arr[i,:] = g_type_mean_size[g_class2type[i]]
# -----------------
# TF Functions Helpers
# -----------------
def tf_gather_object_pc(point_cloud, mask, npoints=512):
''' Gather object point clouds according to predicted masks.
Input:
point_cloud: TF tensor in shape (B,N,C)
mask: TF tensor in shape (B,N) of 0 (not pick) or 1 (pick)
npoints: int scalar, maximum number of points to keep (default: 512)
Output:
object_pc: TF tensor in shape (B,npoint,C)
indices: TF int tensor in shape (B,npoint,2)
'''
def mask_to_indices(mask):
indices = np.zeros((mask.shape[0], npoints, 2), dtype=np.int32)
for i in range(mask.shape[0]):
pos_indices = np.where(mask[i,:]>0.5)[0]
# skip cases when pos_indices is empty
if len(pos_indices) > 0:
if len(pos_indices) > npoints:
choice = np.random.choice(len(pos_indices),
npoints, replace=False)
else:
choice = np.random.choice(len(pos_indices),
npoints-len(pos_indices), replace=True)
choice = np.concatenate((np.arange(len(pos_indices)), choice))
np.random.shuffle(choice)
indices[i,:,1] = pos_indices[choice]
indices[i,:,0] = i
return indices
indices = tf.py_func(mask_to_indices, [mask], tf.int32)
object_pc = tf.gather_nd(point_cloud, indices)
return object_pc, indices
def get_box3d_corners_helper(centers, headings, sizes):
""" TF layer. Input: (N,3), (N,), (N,3), Output: (N,8,3) """
#print '-----', centers
N = centers.get_shape()[0].value
l = tf.slice(sizes, [0,0], [-1,1]) # (N,1)
w = tf.slice(sizes, [0,1], [-1,1]) # (N,1)
h = tf.slice(sizes, [0,2], [-1,1]) # (N,1)
#print l,w,h
x_corners = tf.concat([l/2,l/2,-l/2,-l/2,l/2,l/2,-l/2,-l/2], axis=1) # (N,8)
y_corners = tf.concat([h/2,h/2,h/2,h/2,-h/2,-h/2,-h/2,-h/2], axis=1) # (N,8)
z_corners = tf.concat([w/2,-w/2,-w/2,w/2,w/2,-w/2,-w/2,w/2], axis=1) # (N,8)
corners = tf.concat([tf.expand_dims(x_corners,1), tf.expand_dims(y_corners,1), tf.expand_dims(z_corners,1)], axis=1) # (N,3,8)
#print x_corners, y_corners, z_corners
c = tf.cos(headings)
s = tf.sin(headings)
ones = tf.ones([N], dtype=tf.float32)
zeros = tf.zeros([N], dtype=tf.float32)
row1 = tf.stack([c,zeros,s], axis=1) # (N,3)
row2 = tf.stack([zeros,ones,zeros], axis=1)
row3 = tf.stack([-s,zeros,c], axis=1)
R = tf.concat([tf.expand_dims(row1,1), tf.expand_dims(row2,1), tf.expand_dims(row3,1)], axis=1) # (N,3,3)
#print row1, row2, row3, R, N
corners_3d = tf.matmul(R, corners) # (N,3,8)
corners_3d += tf.tile(tf.expand_dims(centers,2), [1,1,8]) # (N,3,8)
corners_3d = tf.transpose(corners_3d, perm=[0,2,1]) # (N,8,3)
return corners_3d
def get_box3d_corners(center, heading_residuals, size_residuals):
""" TF layer.
Inputs:
center: (B,3)
heading_residuals: (B,NH)
size_residuals: (B,NS,3)
Outputs:
box3d_corners: (B,NH,NS,8,3) tensor
"""
batch_size = center.get_shape()[0].value
heading_bin_centers = tf.constant(np.arange(0,2*np.pi,2*np.pi/NUM_HEADING_BIN), dtype=tf.float32) # (NH,)
headings = heading_residuals + tf.expand_dims(heading_bin_centers, 0) # (B,NH)
mean_sizes = tf.expand_dims(tf.constant(g_mean_size_arr, dtype=tf.float32), 0) + size_residuals # (B,NS,1)
sizes = mean_sizes + size_residuals # (B,NS,3)
sizes = tf.tile(tf.expand_dims(sizes,1), [1,NUM_HEADING_BIN,1,1]) # (B,NH,NS,3)
headings = tf.tile(tf.expand_dims(headings,-1), [1,1,NUM_SIZE_CLUSTER]) # (B,NH,NS)
centers = tf.tile(tf.expand_dims(tf.expand_dims(center,1),1), [1,NUM_HEADING_BIN, NUM_SIZE_CLUSTER,1]) # (B,NH,NS,3)
N = batch_size*NUM_HEADING_BIN*NUM_SIZE_CLUSTER
corners_3d = get_box3d_corners_helper(tf.reshape(centers, [N,3]), tf.reshape(headings, [N]), tf.reshape(sizes, [N,3]))
return tf.reshape(corners_3d, [batch_size, NUM_HEADING_BIN, NUM_SIZE_CLUSTER, 8, 3])
def huber_loss(error, delta):
abs_error = tf.abs(error)
quadratic = tf.minimum(abs_error, delta)
linear = (abs_error - quadratic)
losses = 0.5 * quadratic**2 + delta * linear
return tf.reduce_mean(losses)
def parse_output_to_tensors(output, end_points):
''' Parse batch output to separate tensors (added to end_points)
Input:
output: TF tensor in shape (B,3+2*NUM_HEADING_BIN+4*NUM_SIZE_CLUSTER)
end_points: dict
Output:
end_points: dict (updated)
'''
batch_size = output.get_shape()[0].value
center = tf.slice(output, [0,0], [-1,3])
end_points['center_boxnet'] = center
heading_scores = tf.slice(output, [0,3], [-1,NUM_HEADING_BIN])
heading_residuals_normalized = tf.slice(output, [0,3+NUM_HEADING_BIN],
[-1,NUM_HEADING_BIN])
end_points['heading_scores'] = heading_scores # BxNUM_HEADING_BIN
end_points['heading_residuals_normalized'] = \
heading_residuals_normalized # BxNUM_HEADING_BIN (-1 to 1)
end_points['heading_residuals'] = \
heading_residuals_normalized * (np.pi/NUM_HEADING_BIN) # BxNUM_HEADING_BIN
size_scores = tf.slice(output, [0,3+NUM_HEADING_BIN*2],
[-1,NUM_SIZE_CLUSTER]) # BxNUM_SIZE_CLUSTER
size_residuals_normalized = tf.slice(output,
[0,3+NUM_HEADING_BIN*2+NUM_SIZE_CLUSTER], [-1,NUM_SIZE_CLUSTER*3])
size_residuals_normalized = tf.reshape(size_residuals_normalized,
[batch_size, NUM_SIZE_CLUSTER, 3]) # BxNUM_SIZE_CLUSTERx3
end_points['size_scores'] = size_scores
end_points['size_residuals_normalized'] = size_residuals_normalized
end_points['size_residuals'] = size_residuals_normalized * \
tf.expand_dims(tf.constant(g_mean_size_arr, dtype=tf.float32), 0)
return end_points
# -----------------
# Box Parsing Helpers
# -----------------
def from_prediction_to_label_format(center, angle_class, angle_res,\
size_class, size_res, rot_angle):
''' Convert predicted box parameters to label format. '''
l,w,h = class2size(size_class, size_res)
ry = class2angle(angle_class, angle_res, NUM_HEADING_BIN) + rot_angle
tx,ty,tz = rotate_pc_along_y(np.expand_dims(center,0),-rot_angle).squeeze()
ty += h/2.0
return tx,ty,tz,l,w,h,ry
def size2class(size, type_name):
''' Convert 3D bounding box size to template class and residuals.
todo (rqi): support multiple size clusters per type.
Input:
size: numpy array of shape (3,) for (l,w,h)
type_name: string
Output:
size_class: int scalar
size_residual: numpy array of shape (3,)
'''
size_class = g_type2class[type_name]
size_residual = size - g_type_mean_size[type_name]
return size_class, size_residual
def class2size(pred_cls, residual):
''' Inverse function to size2class. '''
mean_size = g_type_mean_size[g_class2type[pred_cls]]
return mean_size + residual
def angle2class(angle, num_class):
''' Convert continuous angle to discrete class and residual.
Input:
angle: rad scalar, from 0-2pi (or -pi~pi), class center at
0, 1*(2pi/N), 2*(2pi/N) ... (N-1)*(2pi/N)
num_class: int scalar, number of classes N
Output:
class_id, int, among 0,1,...,N-1
residual_angle: float, a number such that
class*(2pi/N) + residual_angle = angle
'''
angle = angle%(2*np.pi)
assert(angle>=0 and angle<=2*np.pi)
angle_per_class = 2*np.pi/float(num_class)
shifted_angle = (angle+angle_per_class/2)%(2*np.pi)
class_id = int(shifted_angle/angle_per_class)
residual_angle = shifted_angle - \
(class_id * angle_per_class + angle_per_class/2)
return class_id, residual_angle
def class2angle(pred_cls, residual, num_class, to_label_format=True):
''' Inverse function to angle2class.
If to_label_format, adjust angle to the range as in labels.
'''
angle_per_class = 2*np.pi/float(num_class)
angle_center = pred_cls * angle_per_class
angle = angle_center + residual
if to_label_format and angle>np.pi:
angle = angle - 2*np.pi
return angle
def rotate_pc_along_y(pc, rot_angle):
'''
Input:
pc: numpy array (N,C), first 3 channels are XYZ
z is facing forward, x is left ward, y is downward
rot_angle: rad scalar
Output:
pc: updated pc with XYZ rotated
'''
cosval = np.cos(rot_angle)
sinval = np.sin(rot_angle)
rotmat = np.array([[cosval, -sinval],[sinval, cosval]])
pc[:,[0,2]] = np.dot(pc[:,[0,2]], np.transpose(rotmat))
return pc
# --------------------------------------
# Shared subgraphs for v1 and v2 models
# --------------------------------------
def placeholder_inputs(batch_size, num_point):
''' Get useful placeholder tensors.
Input:
batch_size: scalar int
num_point: scalar int
Output:
TF placeholders for inputs and ground truths
'''
pointclouds_pl = tf.placeholder(tf.float32,
shape=(batch_size, num_point, 4))
one_hot_vec_pl = tf.placeholder(tf.float32, shape=(batch_size, 3))
# labels_pl is for segmentation label
labels_pl = tf.placeholder(tf.int32, shape=(batch_size, num_point))
centers_pl = tf.placeholder(tf.float32, shape=(batch_size, 3))
heading_class_label_pl = tf.placeholder(tf.int32, shape=(batch_size,))
heading_residual_label_pl = tf.placeholder(tf.float32, shape=(batch_size,))
size_class_label_pl = tf.placeholder(tf.int32, shape=(batch_size,))
size_residual_label_pl = tf.placeholder(tf.float32, shape=(batch_size,3))
return pointclouds_pl, one_hot_vec_pl, labels_pl, centers_pl, \
heading_class_label_pl, heading_residual_label_pl, \
size_class_label_pl, size_residual_label_pl
def point_cloud_masking(point_cloud, logits, end_points, xyz_only=True):
''' Select point cloud with predicted 3D mask,
translate coordinates to the masked points centroid.
Input:
point_cloud: TF tensor in shape (B,N,C)
logits: TF tensor in shape (B,N,2)
end_points: dict
xyz_only: boolean, if True only return XYZ channels
Output:
object_point_cloud: TF tensor in shape (B,M,3)
for simplicity we only keep XYZ here
M = NUM_OBJECT_POINT as a hyper-parameter
mask_xyz_mean: TF tensor in shape (B,3)
'''
batch_size = point_cloud.get_shape()[0].value
num_point = point_cloud.get_shape()[1].value
mask = tf.slice(logits,[0,0,0],[-1,-1,1]) < \
tf.slice(logits,[0,0,1],[-1,-1,1])
mask = tf.to_float(mask) # BxNx1
mask_count = tf.tile(tf.reduce_sum(mask,axis=1,keep_dims=True),
[1,1,3]) # Bx1x3
point_cloud_xyz = tf.slice(point_cloud, [0,0,0], [-1,-1,3]) # BxNx3
mask_xyz_mean = tf.reduce_sum(tf.tile(mask, [1,1,3])*point_cloud_xyz,
axis=1, keep_dims=True) # Bx1x3
mask = tf.squeeze(mask, axis=[2]) # BxN
end_points['mask'] = mask
mask_xyz_mean = mask_xyz_mean/tf.maximum(mask_count,1) # Bx1x3
# Translate to masked points' centroid
point_cloud_xyz_stage1 = point_cloud_xyz - \
tf.tile(mask_xyz_mean, [1,num_point,1])
if xyz_only:
point_cloud_stage1 = point_cloud_xyz_stage1
else:
point_cloud_features = tf.slice(point_cloud, [0,0,3], [-1,-1,-1])
point_cloud_stage1 = tf.concat(\
[point_cloud_xyz_stage1, point_cloud_features], axis=-1)
num_channels = point_cloud_stage1.get_shape()[2].value
object_point_cloud, _ = tf_gather_object_pc(point_cloud_stage1,
mask, NUM_OBJECT_POINT)
object_point_cloud.set_shape([batch_size, NUM_OBJECT_POINT, num_channels])
return object_point_cloud, tf.squeeze(mask_xyz_mean, axis=1), end_points
def get_center_regression_net(object_point_cloud, one_hot_vec,
is_training, bn_decay, end_points):
''' Regression network for center delta. a.k.a. T-Net.
Input:
object_point_cloud: TF tensor in shape (B,M,C)
point clouds in 3D mask coordinate
one_hot_vec: TF tensor in shape (B,3)
length-3 vectors indicating predicted object type
Output:
predicted_center: TF tensor in shape (B,3)
'''
num_point = object_point_cloud.get_shape()[1].value
net = tf.expand_dims(object_point_cloud, 2)
net = tf_util.conv2d(net, 128, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv-reg1-stage1', bn_decay=bn_decay)
net = tf_util.conv2d(net, 128, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv-reg2-stage1', bn_decay=bn_decay)
net = tf_util.conv2d(net, 256, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv-reg3-stage1', bn_decay=bn_decay)
net = tf_util.max_pool2d(net, [num_point,1],
padding='VALID', scope='maxpool-stage1')
net = tf.squeeze(net, axis=[1,2])
net = tf.concat([net, one_hot_vec], axis=1)
net = tf_util.fully_connected(net, 256, scope='fc1-stage1', bn=True,
is_training=is_training, bn_decay=bn_decay)
net = tf_util.fully_connected(net, 128, scope='fc2-stage1', bn=True,
is_training=is_training, bn_decay=bn_decay)
predicted_center = tf_util.fully_connected(net, 3, activation_fn=None,
scope='fc3-stage1')
return predicted_center, end_points
def get_loss(mask_label, center_label, \
heading_class_label, heading_residual_label, \
size_class_label, size_residual_label, \
end_points, \
corner_loss_weight=10.0, \
box_loss_weight=1.0):
''' Loss functions for 3D object detection.
Input:
mask_label: TF int32 tensor in shape (B,N)
center_label: TF tensor in shape (B,3)
heading_class_label: TF int32 tensor in shape (B,)
heading_residual_label: TF tensor in shape (B,)
size_class_label: TF tensor int32 in shape (B,)
size_residual_label: TF tensor tensor in shape (B,)
end_points: dict, outputs from our model
corner_loss_weight: float scalar
box_loss_weight: float scalar
Output:
total_loss: TF scalar tensor
the total_loss is also added to the losses collection
'''
# 3D Segmentation loss
mask_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(\
logits=end_points['mask_logits'], labels=mask_label))
tf.summary.scalar('3d mask loss', mask_loss)
# Center regression losses
center_dist = tf.norm(center_label - end_points['center'], axis=-1)
center_loss = huber_loss(center_dist, delta=2.0)
tf.summary.scalar('center loss', center_loss)
stage1_center_dist = tf.norm(center_label - \
end_points['stage1_center'], axis=-1)
stage1_center_loss = huber_loss(stage1_center_dist, delta=1.0)
tf.summary.scalar('stage1 center loss', stage1_center_loss)
# Heading loss
heading_class_loss = tf.reduce_mean( \
tf.nn.sparse_softmax_cross_entropy_with_logits( \
logits=end_points['heading_scores'], labels=heading_class_label))
tf.summary.scalar('heading class loss', heading_class_loss)
hcls_onehot = tf.one_hot(heading_class_label,
depth=NUM_HEADING_BIN,
on_value=1, off_value=0, axis=-1) # BxNUM_HEADING_BIN
heading_residual_normalized_label = \
heading_residual_label / (np.pi/NUM_HEADING_BIN)
heading_residual_normalized_loss = huber_loss(tf.reduce_sum( \
end_points['heading_residuals_normalized']*tf.to_float(hcls_onehot), axis=1) - \
heading_residual_normalized_label, delta=1.0)
tf.summary.scalar('heading residual normalized loss',
heading_residual_normalized_loss)
# Size loss
size_class_loss = tf.reduce_mean( \
tf.nn.sparse_softmax_cross_entropy_with_logits( \
logits=end_points['size_scores'], labels=size_class_label))
tf.summary.scalar('size class loss', size_class_loss)
scls_onehot = tf.one_hot(size_class_label,
depth=NUM_SIZE_CLUSTER,
on_value=1, off_value=0, axis=-1) # BxNUM_SIZE_CLUSTER
scls_onehot_tiled = tf.tile(tf.expand_dims( \
tf.to_float(scls_onehot), -1), [1,1,3]) # BxNUM_SIZE_CLUSTERx3
predicted_size_residual_normalized = tf.reduce_sum( \
end_points['size_residuals_normalized']*scls_onehot_tiled, axis=[1]) # Bx3
mean_size_arr_expand = tf.expand_dims( \
tf.constant(g_mean_size_arr, dtype=tf.float32),0) # 1xNUM_SIZE_CLUSTERx3
mean_size_label = tf.reduce_sum( \
scls_onehot_tiled * mean_size_arr_expand, axis=[1]) # Bx3
size_residual_label_normalized = size_residual_label / mean_size_label
size_normalized_dist = tf.norm( \
size_residual_label_normalized - predicted_size_residual_normalized,
axis=-1)
size_residual_normalized_loss = huber_loss(size_normalized_dist, delta=1.0)
tf.summary.scalar('size residual normalized loss',
size_residual_normalized_loss)
# Corner loss
# We select the predicted corners corresponding to the
# GT heading bin and size cluster.
corners_3d = get_box3d_corners(end_points['center'],
end_points['heading_residuals'],
end_points['size_residuals']) # (B,NH,NS,8,3)
gt_mask = tf.tile(tf.expand_dims(hcls_onehot, 2), [1,1,NUM_SIZE_CLUSTER]) * \
tf.tile(tf.expand_dims(scls_onehot,1), [1,NUM_HEADING_BIN,1]) # (B,NH,NS)
corners_3d_pred = tf.reduce_sum( \
tf.to_float(tf.expand_dims(tf.expand_dims(gt_mask,-1),-1)) * corners_3d,
axis=[1,2]) # (B,8,3)
heading_bin_centers = tf.constant( \
np.arange(0,2*np.pi,2*np.pi/NUM_HEADING_BIN), dtype=tf.float32) # (NH,)
heading_label = tf.expand_dims(heading_residual_label,1) + \
tf.expand_dims(heading_bin_centers, 0) # (B,NH)
heading_label = tf.reduce_sum(tf.to_float(hcls_onehot)*heading_label, 1)
mean_sizes = tf.expand_dims( \
tf.constant(g_mean_size_arr, dtype=tf.float32), 0) # (1,NS,3)
size_label = mean_sizes + \
tf.expand_dims(size_residual_label, 1) # (1,NS,3) + (B,1,3) = (B,NS,3)
size_label = tf.reduce_sum( \
tf.expand_dims(tf.to_float(scls_onehot),-1)*size_label, axis=[1]) # (B,3)
corners_3d_gt = get_box3d_corners_helper( \
center_label, heading_label, size_label) # (B,8,3)
corners_3d_gt_flip = get_box3d_corners_helper( \
center_label, heading_label+np.pi, size_label) # (B,8,3)
corners_dist = tf.minimum(tf.norm(corners_3d_pred - corners_3d_gt, axis=-1),
tf.norm(corners_3d_pred - corners_3d_gt_flip, axis=-1))
corners_loss = huber_loss(corners_dist, delta=1.0)
tf.summary.scalar('corners loss', corners_loss)
# Weighted sum of all losses
total_loss = mask_loss + box_loss_weight * (center_loss + \
heading_class_loss + size_class_loss + \
heading_residual_normalized_loss*20 + \
size_residual_normalized_loss*20 + \
stage1_center_loss + \
corner_loss_weight*corners_loss)
tf.add_to_collection('losses', total_loss)
return total_loss
def get_lidar_in_image_fov(pc_velo, calib, xmin, ymin, xmax, ymax,
clip_distance=40.0):
''' Filter lidar points, keep those in image FOV '''
#pts_2d = calib.project_rect_to_image(calib.project_ref_to_rect(pc_velo))
#pts_2d = calib.project_rect_to_image_torch(calib.project_ref_to_rect_torch(torch.from_numpy(pc_velo).cuda()))
pts_2d = calib.project_ref_to_image_torch(torch.from_numpy(pc_velo).cuda())
pts_2d = pts_2d.cpu().numpy()
fov_inds = (pts_2d[:,0]=xmin) & \
(pts_2d[:,1]=ymin)
# fov_inds = fov_inds & (pc_velo[:,2]=xmin) & \
(pc_image_coord[:,1]=ymin)
pc_in_box_fov = point_cloud[box_fov_inds,:]
box_center = np.array([xmax+xmin, ymin+ymax])/2
uvdepth = np.zeros((1,3))
uvdepth[0,0:2] = box_center
uvdepth[0,2] = 20 # some random depth
box2d_center_rect = calib.project_image_to_rect(uvdepth)
frustum_angle = np.pi/2 - np.arctan2(box2d_center_rect[0,2],
box2d_center_rect[0,0])
rot_angles.append(frustum_angle)
if len(pc_in_box_fov) num_point:
pc_in_box_fov = np.expand_dims(pc_in_box_fov[np.random.choice(range(pc_in_box_fov.shape[0]), size = (num_point), replace=False)], 0)
else:
pc_in_box_fov = np.expand_dims(np.vstack([pc_in_box_fov, pc_in_box_fov[np.random.choice(range(pc_in_box_fov.shape[0]), size = (num_point-pc_in_box_fov.shape[0]), replace=True)]]), 0)
pc_in_box_fov[0] = rotate_pc_along_y(pc_in_box_fov[0], frustum_angle)
# frustum = o3d.geometry.PointCloud()
# out = pc_in_box_fov[0, :, :3]
# # out = out[:, [2, 0, 1]]
# # out[:, 1] *= -1
# # out[:, 2] *= -1
# frustum.points = o3d.utility.Vector3dVector(out)
# o3d.io.write_point_cloud("pc_frustum_sample_rot_1.xyz", frustum)
# # rot_angles.append(0) #no frsturum rotation
# import pdb; pdb.set_trace()
point_clouds.append(pc_in_box_fov)
return point_clouds, rot_angles, ids_3d
# @profile
def generate_detections_3d(detector, detections_2d, point_cloud, calib, img_shape, peds=False):
_, img_height, img_width = img_shape
_, pc_image_coord, img_fov_inds = get_lidar_in_image_fov(np.copy(point_cloud[:,:3]), calib, 0, 0, img_width, img_height)
pc_image_coord = pc_image_coord[img_fov_inds,:]
point_cloud = point_cloud[img_fov_inds,:]
point_cloud_frustrums, rot_angles, ids_3d = preprocess_pointcloud(detections_2d, point_cloud, pc_image_coord, calib, num_point = detector.num_point)
point_cloud_frustrums = np.vstack(point_cloud_frustrums)
boxes_3d, scores_3d, depth_features = detector(point_cloud_frustrums, np.asarray(rot_angles), peds)
for i in range(len(ids_3d)):
if ids_3d[i] == -1:
boxes_3d[i] = None
return boxes_3d, ids_3d, rot_angles, scores_3d, depth_features
def convert_depth_features(depth_features_orig, ids_3d, cuda = True):
Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
depth_features = []
for i, depth_feature_orig in enumerate(depth_features_orig):
if depth_feature_orig is None or ids_3d[i] == -1:
depth_features.append(None)
else:
depth_features.append(torch.Tensor(depth_feature_orig).type(Tensor))
return depth_features
================================================
FILE: paper_experiments/utils/featurepointnet_tf_util.py
================================================
""" Wrapper functions for TensorFlow layers.
Author: Charles R. Qi
Date: November 2017
"""
import numpy as np
import tensorflow as tf
def _variable_on_cpu(name, shape, initializer, use_fp16=False):
"""Helper to create a Variable stored on CPU memory.
Args:
name: name of the variable
shape: list of ints
initializer: initializer for Variable
Returns:
Variable Tensor
"""
with tf.device("/cpu:0"):
dtype = tf.float16 if use_fp16 else tf.float32
var = tf.get_variable(name, shape, initializer=initializer, dtype=dtype)
return var
def _variable_with_weight_decay(name, shape, stddev, wd, use_xavier=True):
"""Helper to create an initialized Variable with weight decay.
Note that the Variable is initialized with a truncated normal distribution.
A weight decay is added only if one is specified.
Args:
name: name of the variable
shape: list of ints
stddev: standard deviation of a truncated Gaussian
wd: add L2Loss weight decay multiplied by this float. If None, weight
decay is not added for this Variable.
use_xavier: bool, whether to use xavier initializer
Returns:
Variable Tensor
"""
if use_xavier:
initializer = tf.contrib.layers.xavier_initializer()
else:
initializer = tf.truncated_normal_initializer(stddev=stddev)
var = _variable_on_cpu(name, shape, initializer)
if wd is not None:
weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss')
tf.add_to_collection('losses', weight_decay)
return var
def conv1d(inputs,
num_output_channels,
kernel_size,
scope,
stride=1,
padding='SAME',
data_format='NHWC',
use_xavier=True,
stddev=1e-3,
weight_decay=None,
activation_fn=tf.nn.relu,
bn=False,
bn_decay=None,
is_training=None):
""" 1D convolution with non-linear operation.
Args:
inputs: 3-D tensor variable BxLxC
num_output_channels: int
kernel_size: int
scope: string
stride: int
padding: 'SAME' or 'VALID'
data_format: 'NHWC' or 'NCHW'
use_xavier: bool, use xavier_initializer if true
stddev: float, stddev for truncated_normal init
weight_decay: float
activation_fn: function
bn: bool, whether to use batch norm
bn_decay: float or float tensor variable in [0,1]
is_training: bool Tensor variable
Returns:
Variable tensor
"""
with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
assert(data_format=='NHWC' or data_format=='NCHW')
if data_format == 'NHWC':
num_in_channels = inputs.get_shape()[-1].value
elif data_format=='NCHW':
num_in_channels = inputs.get_shape()[1].value
kernel_shape = [kernel_size,
num_in_channels, num_output_channels]
kernel = _variable_with_weight_decay('weights',
shape=kernel_shape,
use_xavier=use_xavier,
stddev=stddev,
wd=weight_decay)
outputs = tf.nn.conv1d(inputs, kernel,
stride=stride,
padding=padding,
data_format=data_format)
biases = _variable_on_cpu('biases', [num_output_channels],
tf.constant_initializer(0.0))
outputs = tf.nn.bias_add(outputs, biases, data_format=data_format)
if bn:
outputs = batch_norm_for_conv1d(outputs, is_training,
bn_decay=bn_decay, scope='bn',
data_format=data_format)
if activation_fn is not None:
outputs = activation_fn(outputs)
return outputs
def conv2d(inputs,
num_output_channels,
kernel_size,
scope,
stride=[1, 1],
padding='SAME',
data_format='NHWC',
use_xavier=True,
stddev=1e-3,
weight_decay=None,
activation_fn=tf.nn.relu,
bn=False,
bn_decay=None,
is_training=None):
""" 2D convolution with non-linear operation.
Args:
inputs: 4-D tensor variable BxHxWxC
num_output_channels: int
kernel_size: a list of 2 ints
scope: string
stride: a list of 2 ints
padding: 'SAME' or 'VALID'
data_format: 'NHWC' or 'NCHW'
use_xavier: bool, use xavier_initializer if true
stddev: float, stddev for truncated_normal init
weight_decay: float
activation_fn: function
bn: bool, whether to use batch norm
bn_decay: float or float tensor variable in [0,1]
is_training: bool Tensor variable
Returns:
Variable tensor
"""
with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
kernel_h, kernel_w = kernel_size
assert(data_format=='NHWC' or data_format=='NCHW')
if data_format == 'NHWC':
num_in_channels = inputs.get_shape()[-1].value
elif data_format=='NCHW':
num_in_channels = inputs.get_shape()[1].value
kernel_shape = [kernel_h, kernel_w,
num_in_channels, num_output_channels]
kernel = _variable_with_weight_decay('weights',
shape=kernel_shape,
use_xavier=use_xavier,
stddev=stddev,
wd=weight_decay)
stride_h, stride_w = stride
outputs = tf.nn.conv2d(inputs, kernel,
[1, stride_h, stride_w, 1],
padding=padding,
data_format=data_format)
biases = _variable_on_cpu('biases', [num_output_channels],
tf.constant_initializer(0.0))
outputs = tf.nn.bias_add(outputs, biases, data_format=data_format)
if bn:
outputs = batch_norm_for_conv2d(outputs, is_training,
bn_decay=bn_decay, scope='bn',
data_format=data_format)
if activation_fn is not None:
outputs = activation_fn(outputs)
return outputs
def conv2d_transpose(inputs,
num_output_channels,
kernel_size,
scope,
stride=[1, 1],
padding='SAME',
use_xavier=True,
stddev=1e-3,
weight_decay=None,
activation_fn=tf.nn.relu,
bn=False,
bn_decay=None,
is_training=None):
""" 2D convolution transpose with non-linear operation.
Args:
inputs: 4-D tensor variable BxHxWxC
num_output_channels: int
kernel_size: a list of 2 ints
scope: string
stride: a list of 2 ints
padding: 'SAME' or 'VALID'
use_xavier: bool, use xavier_initializer if true
stddev: float, stddev for truncated_normal init
weight_decay: float
activation_fn: function
bn: bool, whether to use batch norm
bn_decay: float or float tensor variable in [0,1]
is_training: bool Tensor variable
Returns:
Variable tensor
Note: conv2d(conv2d_transpose(a, num_out, ksize, stride), a.shape[-1], ksize, stride) == a
"""
with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
kernel_h, kernel_w = kernel_size
num_in_channels = inputs.get_shape()[-1].value
kernel_shape = [kernel_h, kernel_w,
num_output_channels, num_in_channels] # reversed to conv2d
kernel = _variable_with_weight_decay('weights',
shape=kernel_shape,
use_xavier=use_xavier,
stddev=stddev,
wd=weight_decay)
stride_h, stride_w = stride
# from slim.convolution2d_transpose
def get_deconv_dim(dim_size, stride_size, kernel_size, padding):
dim_size *= stride_size
if padding == 'VALID' and dim_size is not None:
dim_size += max(kernel_size - stride_size, 0)
return dim_size
# caculate output shape
batch_size = inputs.get_shape()[0].value
height = inputs.get_shape()[1].value
width = inputs.get_shape()[2].value
out_height = get_deconv_dim(height, stride_h, kernel_h, padding)
out_width = get_deconv_dim(width, stride_w, kernel_w, padding)
output_shape = [batch_size, out_height, out_width, num_output_channels]
outputs = tf.nn.conv2d_transpose(inputs, kernel, output_shape,
[1, stride_h, stride_w, 1],
padding=padding)
biases = _variable_on_cpu('biases', [num_output_channels],
tf.constant_initializer(0.0))
outputs = tf.nn.bias_add(outputs, biases)
if bn:
outputs = batch_norm_for_conv2d(outputs, is_training,
bn_decay=bn_decay, scope='bn')
if activation_fn is not None:
outputs = activation_fn(outputs)
return outputs
def conv3d(inputs,
num_output_channels,
kernel_size,
scope,
stride=[1, 1, 1],
padding='SAME',
use_xavier=True,
stddev=1e-3,
weight_decay=None,
activation_fn=tf.nn.relu,
bn=False,
bn_decay=None,
is_training=None):
""" 3D convolution with non-linear operation.
Args:
inputs: 5-D tensor variable BxDxHxWxC
num_output_channels: int
kernel_size: a list of 3 ints
scope: string
stride: a list of 3 ints
padding: 'SAME' or 'VALID'
use_xavier: bool, use xavier_initializer if true
stddev: float, stddev for truncated_normal init
weight_decay: float
activation_fn: function
bn: bool, whether to use batch norm
bn_decay: float or float tensor variable in [0,1]
is_training: bool Tensor variable
Returns:
Variable tensor
"""
with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
kernel_d, kernel_h, kernel_w = kernel_size
num_in_channels = inputs.get_shape()[-1].value
kernel_shape = [kernel_d, kernel_h, kernel_w,
num_in_channels, num_output_channels]
kernel = _variable_with_weight_decay('weights',
shape=kernel_shape,
use_xavier=use_xavier,
stddev=stddev,
wd=weight_decay)
stride_d, stride_h, stride_w = stride
outputs = tf.nn.conv3d(inputs, kernel,
[1, stride_d, stride_h, stride_w, 1],
padding=padding)
biases = _variable_on_cpu('biases', [num_output_channels],
tf.constant_initializer(0.0))
outputs = tf.nn.bias_add(outputs, biases)
if bn:
outputs = batch_norm_for_conv3d(outputs, is_training,
bn_decay=bn_decay, scope='bn')
if activation_fn is not None:
outputs = activation_fn(outputs)
return outputs
def fully_connected(inputs,
num_outputs,
scope,
use_xavier=True,
stddev=1e-3,
weight_decay=None,
activation_fn=tf.nn.relu,
bn=False,
bn_decay=None,
is_training=None):
""" Fully connected layer with non-linear operation.
Args:
inputs: 2-D tensor BxN
num_outputs: int
Returns:
Variable tensor of size B x num_outputs.
"""
with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
num_input_units = inputs.get_shape()[-1].value
weights = _variable_with_weight_decay('weights',
shape=[num_input_units, num_outputs],
use_xavier=use_xavier,
stddev=stddev,
wd=weight_decay)
outputs = tf.matmul(inputs, weights)
biases = _variable_on_cpu('biases', [num_outputs],
tf.constant_initializer(0.0))
outputs = tf.nn.bias_add(outputs, biases)
if bn:
outputs = batch_norm_for_fc(outputs, is_training, bn_decay, 'bn')
if activation_fn is not None:
outputs = activation_fn(outputs)
return outputs
def max_pool2d(inputs,
kernel_size,
scope,
stride=[2, 2],
padding='VALID'):
""" 2D max pooling.
Args:
inputs: 4-D tensor BxHxWxC
kernel_size: a list of 2 ints
stride: a list of 2 ints
Returns:
Variable tensor
"""
with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
kernel_h, kernel_w = kernel_size
stride_h, stride_w = stride
outputs = tf.nn.max_pool(inputs,
ksize=[1, kernel_h, kernel_w, 1],
strides=[1, stride_h, stride_w, 1],
padding=padding,
name=sc.name)
return outputs
def avg_pool2d(inputs,
kernel_size,
scope,
stride=[2, 2],
padding='VALID'):
""" 2D avg pooling.
Args:
inputs: 4-D tensor BxHxWxC
kernel_size: a list of 2 ints
stride: a list of 2 ints
Returns:
Variable tensor
"""
with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
kernel_h, kernel_w = kernel_size
stride_h, stride_w = stride
outputs = tf.nn.avg_pool(inputs,
ksize=[1, kernel_h, kernel_w, 1],
strides=[1, stride_h, stride_w, 1],
padding=padding,
name=sc.name)
return outputs
def max_pool3d(inputs,
kernel_size,
scope,
stride=[2, 2, 2],
padding='VALID'):
""" 3D max pooling.
Args:
inputs: 5-D tensor BxDxHxWxC
kernel_size: a list of 3 ints
stride: a list of 3 ints
Returns:
Variable tensor
"""
with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
kernel_d, kernel_h, kernel_w = kernel_size
stride_d, stride_h, stride_w = stride
outputs = tf.nn.max_pool3d(inputs,
ksize=[1, kernel_d, kernel_h, kernel_w, 1],
strides=[1, stride_d, stride_h, stride_w, 1],
padding=padding,
name=sc.name)
return outputs
def avg_pool3d(inputs,
kernel_size,
scope,
stride=[2, 2, 2],
padding='VALID'):
""" 3D avg pooling.
Args:
inputs: 5-D tensor BxDxHxWxC
kernel_size: a list of 3 ints
stride: a list of 3 ints
Returns:
Variable tensor
"""
with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
kernel_d, kernel_h, kernel_w = kernel_size
stride_d, stride_h, stride_w = stride
outputs = tf.nn.avg_pool3d(inputs,
ksize=[1, kernel_d, kernel_h, kernel_w, 1],
strides=[1, stride_d, stride_h, stride_w, 1],
padding=padding,
name=sc.name)
return outputs
def batch_norm_template_unused(inputs, is_training, scope, moments_dims, bn_decay):
""" NOTE: this is older version of the util func. it is deprecated.
Batch normalization on convolutional maps and beyond...
Ref.: http://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow
Args:
inputs: Tensor, k-D input ... x C could be BC or BHWC or BDHWC
is_training: boolean tf.Varialbe, true indicates training phase
scope: string, variable scope
moments_dims: a list of ints, indicating dimensions for moments calculation
bn_decay: float or float tensor variable, controling moving average weight
Return:
normed: batch-normalized maps
"""
with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
num_channels = inputs.get_shape()[-1].value
beta = _variable_on_cpu(name='beta',shape=[num_channels],
initializer=tf.constant_initializer(0))
gamma = _variable_on_cpu(name='gamma',shape=[num_channels],
initializer=tf.constant_initializer(1.0))
batch_mean, batch_var = tf.nn.moments(inputs, moments_dims, name='moments')
decay = bn_decay if bn_decay is not None else 0.9
ema = tf.train.ExponentialMovingAverage(decay=decay)
# Operator that maintains moving averages of variables.
# Need to set reuse=False, otherwise if reuse, will see moments_1/mean/ExponentialMovingAverage/ does not exist
# https://github.com/shekkizh/WassersteinGAN.tensorflow/issues/3
with tf.variable_scope(tf.get_variable_scope(), reuse=False):
ema_apply_op = tf.cond(is_training,
lambda: ema.apply([batch_mean, batch_var]),
lambda: tf.no_op())
# Update moving average and return current batch's avg and var.
def mean_var_with_update():
with tf.control_dependencies([ema_apply_op]):
return tf.identity(batch_mean), tf.identity(batch_var)
# ema.average returns the Variable holding the average of var.
mean, var = tf.cond(is_training,
mean_var_with_update,
lambda: (ema.average(batch_mean), ema.average(batch_var)))
normed = tf.nn.batch_normalization(inputs, mean, var, beta, gamma, 1e-3)
return normed
def batch_norm_template(inputs, is_training, scope, moments_dims_unused, bn_decay, data_format='NHWC'):
""" Batch normalization on convolutional maps and beyond...
Ref.: http://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow
Args:
inputs: Tensor, k-D input ... x C could be BC or BHWC or BDHWC
is_training: boolean tf.Varialbe, true indicates training phase
scope: string, variable scope
moments_dims: a list of ints, indicating dimensions for moments calculation
bn_decay: float or float tensor variable, controling moving average weight
data_format: 'NHWC' or 'NCHW'
Return:
normed: batch-normalized maps
"""
bn_decay = bn_decay if bn_decay is not None else 0.9
return tf.contrib.layers.batch_norm(inputs,
center=True, scale=True,
is_training=is_training, decay=bn_decay,updates_collections=None,
scope=scope,
data_format=data_format)
def batch_norm_for_fc(inputs, is_training, bn_decay, scope):
""" Batch normalization on FC data.
Args:
inputs: Tensor, 2D BxC input
is_training: boolean tf.Varialbe, true indicates training phase
bn_decay: float or float tensor variable, controling moving average weight
scope: string, variable scope
Return:
normed: batch-normalized maps
"""
return batch_norm_template(inputs, is_training, scope, [0,], bn_decay)
def batch_norm_for_conv1d(inputs, is_training, bn_decay, scope, data_format):
""" Batch normalization on 1D convolutional maps.
Args:
inputs: Tensor, 3D BLC input maps
is_training: boolean tf.Varialbe, true indicates training phase
bn_decay: float or float tensor variable, controling moving average weight
scope: string, variable scope
data_format: 'NHWC' or 'NCHW'
Return:
normed: batch-normalized maps
"""
return batch_norm_template(inputs, is_training, scope, [0,1], bn_decay, data_format)
def batch_norm_for_conv2d(inputs, is_training, bn_decay, scope, data_format):
""" Batch normalization on 2D convolutional maps.
Args:
inputs: Tensor, 4D BHWC input maps
is_training: boolean tf.Varialbe, true indicates training phase
bn_decay: float or float tensor variable, controling moving average weight
scope: string, variable scope
data_format: 'NHWC' or 'NCHW'
Return:
normed: batch-normalized maps
"""
return batch_norm_template(inputs, is_training, scope, [0,1,2], bn_decay, data_format)
def batch_norm_for_conv3d(inputs, is_training, bn_decay, scope):
""" Batch normalization on 3D convolutional maps.
Args:
inputs: Tensor, 5D BDHWC input maps
is_training: boolean tf.Varialbe, true indicates training phase
bn_decay: float or float tensor variable, controling moving average weight
scope: string, variable scope
Return:
normed: batch-normalized maps
"""
return batch_norm_template(inputs, is_training, scope, [0,1,2,3], bn_decay)
def dropout(inputs,
is_training,
scope,
keep_prob=0.5,
noise_shape=None):
""" Dropout layer.
Args:
inputs: tensor
is_training: boolean tf.Variable
scope: string
keep_prob: float in [0,1]
noise_shape: list of ints
Returns:
tensor variable
"""
with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
outputs = tf.cond(is_training,
lambda: tf.nn.dropout(inputs, keep_prob, noise_shape),
lambda: inputs)
return outputs
================================================
FILE: paper_experiments/utils/imm.py
================================================
# vim: expandtab:ts=4:sw=4
import numpy as np
import scipy.linalg
import utils.EKF as EKF
import pdb
import utils.kf_2d as kf_2d
import matplotlib.pyplot as plt
np.set_printoptions(precision=4, suppress=True)
class IMMFilter2D(EKF.EKF):
"""
An IMM filter for tracking bounding boxes in image space.
Contains 2 Kalman Filters
Filter 1: Constant Velocity Model:
The 8-dimensional state space
x, y, a, h, vx, vy, va, vh
contains the bounding box center position (x, y), aspect ratio a, height h,
and their respective velocities.
Object motion follows a constant velocity model. The bounding box location
(x, y, a, h) is taken as direct observation of the state space (linear
observation model).
Filter 2: Random Walk Model:
The 4-dimensional state space
x, y, a, h
contains the bounding box center position (x, y), aspect ratio a, height h.
Object motion follows a random walk model. The bounding box location
(x, y, a, h) is taken as direct observation of the state space (linear
observation model).
"""
def __init__(self, kf_vel_params=(1./20, 1./160, 1, 1, 2), kf_walk_params=(1./20, 1./160, 1, 1, 2), markov=(0.9,0.7)):
self.kf1 = kf_2d.KalmanFilter2D(*kf_vel_params)
self.kf2 = kf_2d.RandomWalkKalmanFilter2D(*kf_walk_params)
self.markov_transition = np.asarray([[markov[0], 1-markov[0]],
[markov[1], 1-markov[1]]])
def initiate(self, measurement, flow):
"""Create track from unassociated measurement.
Parameters
----------
measurement : ndarray
Bounding box coordinates (x, y, a, h) with center position (x, y),
aspect ratio a, and height h.
Returns
-------
(ndarray, ndarray)
Returns the mean vector (2,8 dimensional) and covariance matrix (2,8x8
dimensional) of the new track. Unobserved velocities are initialized
to 0 mean.
"""
mean_pos1, cov1 = self.kf1.initiate(measurement, flow)
#Random walk does not need the flow
mean_pos2, cov2 = self.kf2.initiate(measurement, None)
covariance = np.dstack([cov1, cov2])
covariance = np.transpose(covariance, axes=(2,0,1))
mean = np.vstack([mean_pos1, mean_pos2])
model_probs = np.ones((2,1))*0.5
return mean, covariance, model_probs
def gating_distance(self, mean, covariance, measurements,
only_position=False):
"""Compute gating distance between state distribution and measurements.
A suitable distance threshold can be obtained from `chi2inv95`. If
`only_position` is False, the chi-square distribution has 4 degrees of
freedom, otherwise 2.
Parameters
----------
mean : ndarray
Mean vector over the state distribution (8 dimensional).
covariance : ndarray
Covariance of the state distribution (8x8 dimensional).
measurements : ndarray
An Nx4 dimensional matrix of N measurements, each in
format (x, y, a, h) where (x, y) is the bounding box center
position, a the aspect ratio, and h the height.
only_position : Optional[bool]
If True, distance computation is done with respect to the bounding
box center position only.
Returns
-------
ndarray
Returns an array of length N, where the i-th element contains the
squared Mahalanobis distance between (mean, covariance) and
`measurements[i]`.
"""
dist1 = self.kf1.gating_distance(mean[0, :], covariance[0, :, :], measurements, only_position)
dist2 = self.kf2.gating_distance(mean[1, :], covariance[1, :, :], measurements, only_position)
return np.where(dist1 < dist2, dist1, dist2)
def update(self, mean, covariance, measurement, model_probabilities, marginalization=None, JPDA=False):
"""Run Kalman filter correction step.
Parameters
----------
mean : ndarray
The predicted state's mean vector (8 dimensional).
covariance : ndarray
The state's covariance matrix (8x8 dimensional).
measurement : ndarray
The 4 dimensional measurement vector (x, y, a, h), where (x, y)
is the center position, a the aspect ratio, and h the height of the
bounding box.
Returns
-------
(ndarray, ndarray)
Returns the measurement-corrected state distribution.
"""
# cholesky factorization used to solve for kalman gain since
# K = covariance * update_mat * inv(projected_cov)
# so K is also the solution to
# projected_cov * K = covariance * update_mat
# model_probabilities = np.dot(self.markov_transition.T, model_probabilities)
# combined_H = np.stack([self.kf1._update_mat, self.kf2._update_mat])
# S = np.linalg.multi_dot([combined_H, covariance, np.transpose(combined_H, (0,2,1))])
mean_1, cov_1 = self.kf1.project(mean[0], covariance[0])
mean_2, cov_2 = self.kf2.project(mean[1], covariance[1])
distance_1 = EKF.squared_mahalanobis_distance(mean_1, cov_1, measurement)
distance_2 = EKF.squared_mahalanobis_distance(mean_2, cov_2, measurement)
distance = np.vstack([distance_1, distance_2])
distance -= np.amin(distance)
dets = np.vstack([np.sqrt(np.linalg.det(cov_1)), np.sqrt(np.linalg.det(cov_2))])
if distance.ndim > 1:
likelihood = np.sum(np.exp(-distance/2)/dets, axis = -1, keepdims = True)
else:
likelihood = np.exp(-distance/2)/dets
model_probs = (likelihood*model_probabilities)/\
np.sum(likelihood*model_probabilities)
out_mean_1, out_cov_1 = self.kf1.update(mean[0], covariance[0], measurement, marginalization, JPDA)
out_mean_2, out_cov_2 = self.kf2.update(mean[1], covariance[1], measurement, marginalization, JPDA)
out_mean = np.vstack([out_mean_1, out_mean_2])
out_cov = np.dstack([out_cov_1, out_cov_2])
out_cov = np.transpose(out_cov, axes=(2,0,1))
return out_mean, out_cov, model_probs
def predict(self, mean, covariance, model_probabilities):
"""Run Kalman filter prediction step.
Parameters
----------
mean : ndarray
The mean vector of the object state at the previous
time step.
covariance : ndarray
The covariance matrix of the object state at the
previous time step.
Returns
-------
(ndarray, ndarray)
Returns the mean vector and covariance matrix of the predicted
state. Unobserved velocities are initialized to 0 mean.
"""
# Perform prediction
model_future_probabilities = np.dot(self.markov_transition.T, model_probabilities)
model_transition_probabilities = self.markov_transition*(model_probabilities/model_future_probabilities.T)
mixed_mean_1, mixed_cov_1, mixed_mean_2, mixed_cov_2 = self.mix_models(mean[0], covariance[0], mean[1], covariance[1], model_transition_probabilities)
out_mean_1, out_cov_1 = self.kf1.predict(mixed_mean_1, mixed_cov_1)
out_mean_2, out_cov_2 = self.kf2.predict(mixed_mean_2, mixed_cov_2)
out_mean = np.vstack([out_mean_1, out_mean_2])
out_cov = np.dstack([out_cov_1, out_cov_2])
out_cov = np.transpose(out_cov, axes=(2,0,1))
return out_mean, out_cov, model_future_probabilities
def mix_models(self, mean_1, cov_1, mean_2, cov_2, model_transition_probabilities):
mixed_mean_1 = model_transition_probabilities[0, 0]*mean_1 + model_transition_probabilities[1, 0]*mean_2
mixed_mean_2 = model_transition_probabilities[0, 1]*mean_1 + model_transition_probabilities[1, 1]*mean_2
mean_diff_12 = mean_1 - mixed_mean_2
mean_diff_21 = mean_2 - mixed_mean_1
mean_diff_11 = mean_1 - mixed_mean_1
mean_diff_22 = mean_2 - mixed_mean_2
mixed_cov_1 = model_transition_probabilities[0, 0]*(cov_1+np.outer(mean_diff_11, mean_diff_11)) + \
model_transition_probabilities[1, 0]*(cov_2+np.outer(mean_diff_21, mean_diff_21))
mixed_cov_2 = model_transition_probabilities[0, 1]*(cov_2+np.outer(mean_diff_12, mean_diff_12)) + \
model_transition_probabilities[1, 1]*(cov_2+np.outer(mean_diff_22, mean_diff_22))
return mixed_mean_1, mixed_cov_1, mixed_mean_2, mixed_cov_2
@staticmethod
def combine_states(mean, cov, model_probabilities):
mean = np.sum(model_probabilities*mean, axis = 0)
covariance = np.sum(np.expand_dims(model_probabilities,2)*cov, axis = 0)
return mean, covariance
def generate_particle_motion(motion_matrices, initial_state, process_noise, length = 100):
state_list = [initial_state]
seed_mode = 0 if np.random.random() < 0.5 else 1
markov_transition_matrix = np.asarray([[0.9, 0.1],[.7, 0.3]])
modes = [seed_mode]
for i in range(length):
modes.append(seed_mode)
motion_matrix = motion_matrices[seed_mode]
state_list.append(np.dot(motion_matrix, state_list[-1])+np.random.randn(*initial_state.shape)*process_noise[seed_mode])
if np.random.rand() < markov_transition_matrix[seed_mode][0]:
seed_mode = 0
else:
seed_mode = 1
return np.array(state_list), modes
def generate_observations(input_state_list, observation_matrix, observation_noise):
observation_shape = np.dot(observation_matrix, input_state_list[0]).shape
output = [np.dot(observation_matrix, state)+np.random.randn(*observation_shape)*observation_noise
for state in input_state_list]
return np.array(output)
if __name__=='__main__':
imm_filter = IMMFilter2D()
motion_matrix = np.eye(8)
motion_matrix[0,4] = 1
motion_matrix[1,5] = 1
initial_state = np.array([0,0,1,1,1,1,0,0])
states, modes = generate_particle_motion([motion_matrix, np.eye(8)], initial_state, [0.1, 2], 50)
plt.subplot(211)
plt.plot(states[:,0], states[:,1], linestyle = '--', marker='.', label= 'True state')
observation_matrix = np.eye(4,8)
obs = generate_observations(states, observation_matrix, 0.5)
# plt.scatter(obs[:,0], obs[:,1], marker='x', color='green', label = 'observation')
rnd_filter = kf_2d.KalmanFilter2D()
mean, covariance, probs = imm_filter.initiate(obs[0])
mean_rand, cov_rand = rnd_filter.initiate(obs[0])
mean_list, covariance_list, probs_list = [], [], []
mean_list_rand, covariance_list_rand = [], []
combined_mean, combined_cov = imm_filter.combine_states(mean, covariance, probs)
mean_list.append(combined_mean)
covariance_list.append(combined_cov)
mean_list_rand.append(mean_rand)
covariance_list_rand.append(cov_rand)
probs_list.append(probs)
for idx, i in enumerate(obs[1:]):
mean_rand_new, cov_rand_new = rnd_filter.predict(mean_rand, cov_rand)
mean_rand, cov_rand = rnd_filter.update(mean_rand_new, cov_rand_new, i)
mean_list_rand.append(mean_rand)
covariance_list_rand.append(cov_rand)
mean_new, covariance_new, probs_new = imm_filter.predict(mean, covariance, probs)
mean, covariance, probs = imm_filter.update(mean_new, covariance_new, i, probs_new)
combined_mean, combined_cov = imm_filter.combine_states(mean, covariance, probs)
pdb.set_trace()
pdb.set_trace()
mean_list.append(combined_mean)
covariance_list.append(combined_cov)
probs_list.append(probs)
mean_list = np.array(mean_list)
mean_list_rand = np.array(mean_list_rand)
plt.plot(mean_list[:, 0], mean_list[:, 1], marker='+', c='k', label = 'IMMestimate', alpha = 0.6)
plt.plot(mean_list_rand[:, 0], mean_list_rand[:, 1], marker=',', c='orange', label = 'CV estimate', alpha = 0.6)
# plt.scatter(mean_list[:, 0], mean_list[:, 1], marker='+', c=np.vstack([probs, np.zeros((1,1))]).T, label = 'IMMestimate')
# plt.scatter(mean_list_rand[:, 0], mean_list_rand[:, 1], marker='x', c='orange', label = 'random walk estimate')
MSE_IMM = np.mean((mean_list[:,:2]-states[:,:2])**2)
MSE = np.mean((mean_list_rand[:,:2]-states[:,:2])**2)
print("MSE: %f for 2D filter"%MSE)
print("MSE: %f for IMM filter"%MSE_IMM)
plt.legend()
plt.subplot(212)
plt.plot(modes, label='True modes')
plt.plot([i[1] for i in probs_list], label='predicted modes')
plt.legend()
plt.show()
================================================
FILE: paper_experiments/utils/iou_matching.py
================================================
# vim: expandtab:ts=4:sw=4
from __future__ import absolute_import
import numpy as np
from . import linear_assignment
import pdb
def iou(bbox, candidates):
"""Computer intersection over union.
Parameters
----------
bbox : ndarray
A bounding box in format `(top left x, top left y, width, height)`.
candidates : ndarray
A matrix of candidate bounding boxes (one per row) in the same format
as `bbox`.
Returns
-------
ndarray
The intersection over union in [0, 1] between the `bbox` and each
candidate. A higher score means a larger fraction of the `bbox` is
occluded by the candidate.
"""
bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:]
candidates_tl = candidates[:, :2]
candidates_br = candidates[:, :2] + candidates[:, 2:]
tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis],
np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]]
br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis],
np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]]
wh = np.maximum(0., br - tl)
area_intersection = wh.prod(axis=1)
area_bbox = bbox[2:].prod()
area_candidates = candidates[:, 2:].prod(axis=1)
return area_intersection / (area_bbox + area_candidates - area_intersection)
def iou_cost(tracks, detections, track_indices=None,
detection_indices=None, use3d=False, kf=None):
"""An intersection over union distance metric.
Parameters
----------
tracks : List[deep_sort.track.Track]
A list of tracks.
detections : List[deep_sort.detection.Detection]
A list of detections.
track_indices : Optional[List[int]]
A list of indices to tracks that should be matched. Defaults to
all `tracks`.
detection_indices : Optional[List[int]]
A list of indices to detections that should be matched. Defaults
to all `detections`.
box_expansion_factor:
Multiplier for box size to bias towards higher recall
Returns
-------
ndarray
Returns a cost matrix of shape
len(track_indices), len(detection_indices) where entry (i, j) is
`1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`.
"""
if track_indices is None:
track_indices = np.arange(len(tracks))
if detection_indices is None:
detection_indices = np.arange(len(detections))
cost_matrix = np.zeros((len(track_indices), len(detection_indices)))
if cost_matrix.shape[0] == 0 or cost_matrix.shape[1] == 0:
return cost_matrix
if use3d:
# Convert 3d detctions to tlwh format
# @TODO: Should use a Detection3D class to do this
candidates = np.array([detections[i].box_3d for i in detection_indices])
candidates[:,:2] -= candidates[:,3:5] / 2
candidates = candidates[:, [0,2,3,5]]
else:
candidates = np.asarray([detections[i].tlwh for i in detection_indices])
for row, track_idx in enumerate(track_indices):
if use3d:
bbox = tracks[track_idx].to_tlwh3d()
bbox[:2] -= bbox[3:5] / 2
bbox = bbox[[0,2,3,5]]
else:
bbox = tracks[track_idx].to_tlwh(kf)
cost_matrix[row, :] = 1. - iou(bbox, candidates)
return cost_matrix
================================================
FILE: paper_experiments/utils/kf_2d.py
================================================
# vim: expandtab:ts=4:sw=4
import numpy as np
import scipy.linalg
import utils.EKF as EKF
import pdb
np.set_printoptions(precision=4, suppress=True)
class KalmanFilter2D(EKF.EKF):
"""
A simple Kalman filter for tracking bounding boxes in image space.
The 8-dimensional state space
x, y, w, h, vx, vy, vw, vh
contains the bounding box center position (x, y), width w, height h,
and their respective velocities.
Object motion follows a constant velocity model. The bounding box location
(x, y, w, h) is taken as direct observation of the state space (linear
observation model).
"""
def __init__(self, pos_weight, velocity_weight, std_process, std_measurement, initial_uncertainty, gate_limit):
ndim, dt = 4, 1.
self.ndim = ndim
self.img_center = 1242
# Create Kalman filter model matrices.
# Motion model is constant velocity, i.e. x = x + Vx*dt
self._motion_mat = np.eye(2 * ndim, 2 * ndim)
for i in range(ndim):
self._motion_mat[i, ndim + i] = dt
# Sensor model is direct observation, i.e. x = x
self._observation_mat = np.eye(ndim, 2 * ndim)
# Motion and observation uncertainty are chosen relative to the current
# state estimate. These weights control the amount of uncertainty in
# the model. This is a bit hacky.
self._std_weight_process = std_process
self._std_weight_measurement = std_measurement
self._std_weight_pos = pos_weight
self._std_weight_vel = velocity_weight
self._initial_uncertainty = initial_uncertainty
self.LIMIT = gate_limit
def initiate(self, measurement, flow):
"""Create track from unassociated measurement.
Parameters
----------
measurement : ndarray
Bounding box coordinates (x, y, a, h) with center position (x, y),
aspect ratio a, and height h.
Returns
-------
(ndarray, ndarray)
Returns the mean vector (8 dimensional) and covariance matrix (8x8
dimensional) of the new track. Unobserved velocities are initialized
to 0 mean.
"""
mean_pos = measurement
mean_vel = np.zeros_like(mean_pos)
if flow is not None:
vel = np.mean(np.reshape(flow[int(mean_pos[1]):int(mean_pos[1]+mean_pos[3]),
int(mean_pos[0]):int(mean_pos[0]+mean_pos[2]), :], (-1, 2)), axis=0)
mean_vel[:2] = vel
mean = np.r_[mean_pos, mean_vel]
# Initialize covariance based on w, h and configured std
std = [
(1 + abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_pos * measurement[2],
(1 + abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_pos * measurement[3],
(1 + abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_pos * measurement[2],
(1 + abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_pos * measurement[3],
(1 + 1.5*abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_vel * measurement[2],
(1 + 1.5*abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_vel * measurement[3],
(1 + 1.5*abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_vel * measurement[2],
(1 + 1.5*abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_vel * measurement[3]]
covariance = np.diag(np.square(std))*(self._initial_uncertainty*self._std_weight_process)**2
return mean, covariance
def predict_mean(self, mean):
# Updates predicted state from previous state (function g)
# Calculates motion update Jacobian (Gt)
# Returns (g(mean), Gt)
return np.dot(self._motion_mat, mean)
def predict_covariance(self, mean, covariance, last_detection, next_to_last_detection):
# Updates predicted state from previous state (function g)
# Calculates motion update Jacobian (Gt)
# Returns (g(mean), Gt)
process_noise = self.get_process_noise(mean, last_detection, next_to_last_detection)
return (np.linalg.multi_dot((self._motion_mat, covariance, self._motion_mat.T))
+ process_noise)
def get_process_noise(self, mean, last_detection, next_to_last_detection):
# Returns Rt the motion noise covariance
depth_scale = 1
if last_detection.box_3d is not None:
dist = last_detection.get_3d_distance()
depth_scale = max(1,1+(16-dist)/10)
if next_to_last_detection is not None and next_to_last_detection.box_3d is not None:
b1 = last_detection.box_3d
b2 = next_to_last_detection.box_3d
vel = ((b1[0]-b2[0])**2 + (b1[2]-b2[2])**2)**(1/2)
if vel > 2: # Fast moving (car) nearby, increase uncertainty
depth_scale *= 2
pass
# print(vel)
# print(dist, depth_scale)
depth_scale = 1
# depth_scale *= max(1, 1+(40-mean[2])/50, 1+(40-mean[3])/50) # Note: Scales up small boxes bc higher uncertainty
# Motion uncertainty scaled by estimated height
std_pos = [
depth_scale * self._std_weight_pos * mean[2],
depth_scale * self._std_weight_pos * mean[3],
depth_scale * self._std_weight_pos * mean[2],
depth_scale * self._std_weight_pos * mean[3]]
std_vel = [
depth_scale * self._std_weight_vel * mean[2],
depth_scale * self._std_weight_vel * mean[3],
depth_scale * self._std_weight_vel * mean[2],
depth_scale * self._std_weight_vel * mean[3]]
motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))*self._std_weight_process**2
return motion_cov
def project_mean(self, mean):
# Measurement prediction from state (function h)
# Calculations sensor update Jacobian (Ht)
# Returns (h(mean), Ht)
return np.dot(self._observation_mat, mean)
def get_measurement_noise(self, measurement):
# Returns Qt the sensor noise covariance
# Measurement uncertainty scaled by estimated height
std = [
self._std_weight_pos*measurement[2],
self._std_weight_pos*measurement[3],
self._std_weight_pos*measurement[2],
self._std_weight_pos*measurement[3]]
innovation_cov = np.diag(np.square(std))*self._std_weight_measurement**2
return innovation_cov
def project_cov(self, mean, covariance):
# Returns S the innovation covariance (projected covariance)
measurement_noise = self.get_measurement_noise(mean)
innovation_cov = (np.linalg.multi_dot((self._observation_mat, covariance,
self._observation_mat.T))
+ measurement_noise)
return innovation_cov
def gating_distance(self, mean, covariance, measurements,
only_position=False, use_3d=False):
"""Compute gating distance between state distribution and measurements.
A suitable distance threshold can be obtained from `chi2inv95`. If
`only_position` is False, the chi-square distribution has 4 degrees of
freedom, otherwise 2.
Parameters
----------
mean : ndarray
Mean vector over the state distribution (8 dimensional).
covariance : ndarray
Covariance of the state distribution (8x8 dimensional).
measurements : ndarray
An Nx4 dimensional matrix of N measurements, each in
format (x, y, a, h) where (x, y) is the bounding box center
position, a the aspect ratio, and h the height.
only_position : Optional[bool]
If True, distance computation is done with respect to the bounding
box center position only.
Returns
-------
ndarray
Returns an array of length N, where the i-th element contains the
squared Mahalanobis distance between (mean, covariance) and
`measurements[i]`.
"""
projected_mean, projected_covariance = self.project(mean, covariance)
if only_position:
projected_mean, projected_covariance = projected_mean[:2], projected_covariance[:2, :2]
measurements = measurements[:, :2]
max_val = np.amax(projected_covariance)
# LIMIT = max(mean[2], mean[3]) #*(1 + abs(3*mean[0]/self.img_center - 1))
# print(projected_covariance)
if max_val > self.LIMIT:
projected_covariance *= self.LIMIT / max_val
return EKF.squared_mahalanobis_distance(projected_mean, projected_covariance, measurements)
class RandomWalkKalmanFilter2D(KalmanFilter2D): #TODO UPDATE THIS DOCUMENTATION
"""
A simple Kalman filter for tracking bounding boxes in image space.
The 8-dimensional state space
x, y, w, h
contains the bounding box center position (x, y), aspect ratio a, height h,
and their respective velocities.
Object motion follows a constant velocity model. The bounding box location
(x, y, a, h) is taken as direct observation of the state space (linear
observation model).
"""
def __init__(self, pos_weight, velocity_weight, std_process, std_measurement, initial_uncertainty, img_center=1242):
ndim, dt = 4, 1.
self.ndim = ndim
self.img_center = img_center
# Create Kalman filter model matrices.
# Motion model is constant velocity, i.e. x = x + Vx*dt
self._motion_mat = np.eye(2*ndim, 2*ndim)
self._motion_mat[ndim:, ndim:] = 0
# Sensor model is direct observation, i.e. x = x
self._observation_mat = np.eye(ndim, 2*ndim)
# Motion and observation uncertainty are chosen relative to the current
# state estimate. These weights control the amount of uncertainty in
# the model. This is a bit hacky.
self._std_weight_process = std_process
self._std_weight_measurement = std_measurement
self._std_weight_pos = pos_weight
self._std_weight_vel = velocity_weight
self._initial_uncertainty = initial_uncertainty
================================================
FILE: paper_experiments/utils/kf_3d.py
================================================
# vim: expandtab:ts=4:sw=4
import numpy as np
import scipy.linalg
import EKF
import pdb
class KalmanFilter3D(EKF.EKF):
"""
A simple 3D Kalman filter for tracking bounding cuboids in 3d.
The 12-dimensional state space
x, y, l, h, w, theta, Vx, Vy, Vl, Vh, Vw, Vtheta
contains the bounding box center position (x, y), width w, height h,
length l, heading theta, and their respective velocities.
Object motion follows a constant velocity model. The bounding box location
is taken as direct observation of the state space (linear observation model).
"""
def __init__(self):
ndim, dt = 6, 1.
self.ndim = ndim
# Create Kalman filter model matrices.
# Motion model is constant velocity, i.e. x = x + Vx*dt
self._motion_mat = np.eye(2 * ndim, 2 * ndim)
for i in range(ndim):
self._motion_mat[i, ndim + i] = dt
# Sensor model is direct observation, i.e. x = x
self._update_mat = np.eye(ndim, 2 * ndim)
# Motion and observation uncertainty are chosen relative to the current
# state estimate. These weights control the amount of uncertainty in
# the model. This is a bit hacky.
self._std_motion_pos = 0.8
self._std_motion_vel = 0.1
self._std_motion_theta= 0.017*1 # ~1 degrees
self._std_motion_omega = 0.017*0.1 # ~0.1 degrees
self._std_sensor_pos = 0.8
self._std_sensor_vel = 0.1
self._std_sensor_theta= 0.017*5 # ~5 degrees
std_pos = [
self._std_motion_pos, # x
self._std_motion_pos, # y
self._std_motion_pos, # l
self._std_motion_pos, # h
self._std_motion_pos, # w
self._std_motion_theta # theta
]
std_vel = [
self._std_motion_vel, # x
self._std_motion_vel, # y
self._std_motion_vel, # l
self._std_motion_vel, # h
self._std_motion_vel, # w
self._std_motion_omega # omega
]
self._motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
std = [
self._std_sensor_pos, # x
self._std_sensor_pos, # y
self._std_sensor_pos, # l
self._std_sensor_pos, # h
self._std_sensor_pos, # w
self._std_sensor_theta # theta
]
self._innovation_cov = np.diag(np.square(std))
def initiate(self, measurement):
"""Create track from unassociated measurement.
Parameters
----------
measurement : ndarray
Bounding box coordinates (x, y, l, h, w, theta)
Returns
-------
(ndarray, ndarray)
Returns the mean vector (12 dimensional) and covariance matrix (12x12
dimensional) of the new track. Unobserved velocities are initialized
to 0 mean.
"""
mean_pos = measurement
mean_vel = np.zeros_like(mean_pos)
mean = np.r_[mean_pos, mean_vel]
# Initialize covariance
std = [ 2, 2, 2, 2, 2, 2,
10, 10, 10, 10, 10, 10
]
covariance = self._motion_cov * np.diag(np.square(std))
return mean, covariance
def motion_update(self, mean, covariance):
# Updates predicted state from previous state (function g)
# Calculates motion update Jacobian (Gt)
# Returns (g(mean), Gt)
mean = np.dot(self._motion_mat, mean)
return mean, self._motion_mat
def get_motion_cov(self, mean, covariance):
# Returns Rt the motion noise covariance
return self._motion_cov
def sensor_update(self, mean, covariance):
# Measurement prediction from state (function h)
# Calculations sensor update Jacobian (Ht)
# Returns (h(mean), Ht)
mean = np.dot(self._update_mat, mean)
return mean, self._update_mat
def get_innovation_cov(self, mean, covariance):
# Returns Qt the sensor noise covariance
return self._innovation_cov
def adjust_angle(self, measured, target):
step = 2*np.pi
measured += step*np.round((target - measured)/step)
return measured
def update(self, mean, covariance, meas_in, marginalization=None, JPDA=False):
measurement = np.copy(meas_in)
if measurement.ndim == 1:
measurement[5] = self.adjust_angle(measurement[5], mean[5])
else:
measurement[:,5] = self.adjust_angle(measurement[:,5], mean[5])
return EKF.EKF.update(self, mean, covariance, measurement, marginalization, JPDA)
def gating_distance(self, mean, covariance, measurements,
only_position=False):
"""Compute gating distance between state distribution and measurements.
A suitable distance threshold can be obtained from `chi2inv95`. If
`only_position` is False, the chi-square distribution has 6 degrees of
freedom, otherwise 2.
Parameters
----------
mean : ndarray
Mean vector over the state distribution (8 dimensional).
covariance : ndarray
Covariance of the state distribution (8x8 dimensional).
measurements : ndarray
An Nx4 dimensional matrix of N measurements, each in
format (x, y, a, h) where (x, y) is the bounding box center
position, a the aspect ratio, and h the height.
only_position : Optional[bool]
If True, distance computation is done with respect to the bounding
box center position only.
Returns
-------
ndarray
Returns an array of length N, where the i-th element contains the
squared Mahalanobis distance between (mean, covariance) and
`measurements[i]`.
"""
mean, covariance = self.project(mean, covariance)
if only_position:
mean, covariance = mean[:2], covariance[:2, :2]
measurements = measurements[:, :2]
return EKF.squared_mahalanobis_distance(mean, covariance, measurements)
================================================
FILE: paper_experiments/utils/linear_assignment.py
================================================
# vim: expandtab:ts=4:sw=4
from __future__ import absolute_import
import numpy as np
from sklearn.utils.linear_assignment_ import linear_assignment
import EKF
import pdb
from mbest_ilp import new_m_best_sol
from multiprocessing import Pool
from functools import partial
#from mbest_ilp import m_best_sol as new_m_best_sol
INFTY_COST = 1e+5
def min_marg_matching(marginalizations, track_indices=None, max_distance=1):
cost_matrix = 1 - marginalizations
num_tracks, num_detections = cost_matrix.shape
if track_indices is None:
track_indices = np.arange(num_tracks)
detection_indices = np.arange(num_detections-1)
if num_tracks == 0 or num_detections == 0:
return [], track_indices, detection_indices # Nothing to match.
extra_dummy_cols = np.tile(cost_matrix[:,0,np.newaxis], (1, num_tracks-1))
expanded_cost_matrix = np.hstack((extra_dummy_cols, cost_matrix))
indices = linear_assignment(expanded_cost_matrix)
matches, unmatched_tracks, unmatched_detections = [], [], []
# gather unmatched detections (new track)
for col, detection_idx in enumerate(detection_indices):
if col+num_tracks not in indices[:, 1]:
unmatched_detections.append(detection_idx)
# gather unmatched tracks (no detection)
for row, track_idx in enumerate(track_indices):
if row not in indices[:, 0]:
unmatched_tracks.append(track_idx)
# thresholding and matches
for row, col in indices:
track_idx = track_indices[row]
detection_idx = col - num_tracks
if detection_idx < 0:
unmatched_tracks.append(track_idx)
continue
if expanded_cost_matrix[row, col] > max_distance:
# apply thresholding
unmatched_tracks.append(track_idx)
unmatched_detections.append(detection_idx)
else:
# associate matches
matches.append((track_idx, detection_idx))
return matches, unmatched_tracks, unmatched_detections
def min_cost_matching(
distance_metric, max_distance, tracks, detections, track_indices=None,
detection_indices=None, compare_2d = False, detections_3d=None):
"""Solve linear assignment problem.
Parameters
----------
distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
The distance metric is given a list of tracks and detections as well as
a list of N track indices and M detection indices. The metric should
return the NxM dimensional cost matrix, where element (i, j) is the
association cost between the i-th track in the given track indices and
the j-th detection in the given detection_indices.
max_distance : float
Gating threshold. Associations with cost larger than this value are
disregarded.
tracks : List[track.Track]
A list of predicted tracks at the current time step.
detections : List[detection.Detection]
A list of detections at the current time step.
track_indices : List[int]
List of track indices that maps rows in `cost_matrix` to tracks in
`tracks` (see description above).
detection_indices : List[int]
List of detection indices that maps columns in `cost_matrix` to
detections in `detections` (see description above).
Returns
-------
(List[(int, int)], List[int], List[int])
Returns a tuple with the following three entries:
* A list of matched track and detection indices.
* A list of unmatched track indices.
* A list of unmatched detection indices.
"""
if track_indices is None:
track_indices = np.arange(len(tracks))
if detection_indices is None:
detection_indices = np.arange(len(detections))
if len(detection_indices) == 0 or len(track_indices) == 0:
return [], track_indices, detection_indices # Nothing to match.
cost_matrix = distance_metric(
tracks, detections, track_indices, detection_indices, compare_2d, detections_3d)
cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5
#print("\n\nCascade Cost Matrix: ", cost_matrix)
indices = linear_assignment(cost_matrix)
matches, unmatched_tracks, unmatched_detections = [], [], []
# gather unmatched detections (new track)
for col, detection_idx in enumerate(detection_indices):
if col not in indices[:, 1]:
unmatched_detections.append(detection_idx)
# gather unmatched trackes (no detection)
for row, track_idx in enumerate(track_indices):
if row not in indices[:, 0]:
unmatched_tracks.append(track_idx)
# thresholding and matches
for row, col in indices:
track_idx = track_indices[row]
detection_idx = detection_indices[col]
if cost_matrix[row, col] > max_distance:
# apply thresholding
unmatched_tracks.append(track_idx)
unmatched_detections.append(detection_idx)
else:
# associate matches
matches.append((track_idx, detection_idx))
return matches, unmatched_tracks, unmatched_detections
# @profile
def JPDA(
distance_metric, dummy_node_cost_app, dummy_node_cost_iou, tracks, detections, track_indices=None,
detection_indices=None, m=1, compare_2d = False, windowing = False):
"""Solve linear assignment problem.
Parameters
----------
distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
The distance metric is given a list of tracks and detections as well as
a list of N track indices and M detection indices. The metric should
return the NxM dimensional cost matrix, where element (i, j) is the
association cost between the i-th track in the given track indices and
the j-th detection in the given detection_indices.
max_distance : float
Gating threshold. Associations with cost larger than this value are
disregarded.
tracks : List[track.Track]
A list of predicted tracks at the current time step.
detections : List[detection.Detection]
A list of detections at the current time step.
track_indices : List[int]
List of track indices that maps rows in `cost_matrix` to tracks in
`tracks` (see description above).
detection_indices : List[int]
List of detection indices that maps columns in `cost_matrix` to
detections in `detections` (see description above).
Returns
-------
(List[(int, int)], List[int], List[int])
Returns a tuple with the following three entries:
* A list of matched track and detection indices.
* A list of unmatched track indices.
* A list of unmatched detection indices.
"""
if track_indices is None:
track_indices = np.arange(len(tracks))
if detection_indices is None:
detection_indices = np.arange(len(detections))
if len(detection_indices) == 0 or len(track_indices) == 0:
return np.zeros((0, len(detections) + 1)) # Nothing to match.
cost_matrix, gate_mask = distance_metric(
tracks, detections, track_indices, detection_indices, compare_2d)
num_tracks, num_detections = cost_matrix.shape[0], cost_matrix.shape[1]
cost_matrix[gate_mask] = INFTY_COST
# print("\nIOU Cost Matrix:", cost_matrix[:,:,0])
# print("App:", cost_matrix[:,:,1])
clusters = find_clusters(cost_matrix[:,:,0], INFTY_COST - 0.0001)
# print('\n', clusters)
jpda_output = []
for cluster in clusters:
jpda_output.append(get_JPDA_output(cluster, cost_matrix, dummy_node_cost_app, dummy_node_cost_iou, INFTY_COST - 0.0001, m))
if not jpda_output:
mc = np.zeros((num_tracks, num_detections + 1))
mc[:, 0] = 1
return mc
assignments, assignment_cost = zip(*jpda_output)
assignments = np.vstack([item for sublist in assignments for item in sublist])
assignment_cost = np.array([item for sublist in assignment_cost for item in sublist])
marginalised_cost = np.sum(assignments*np.exp(-np.expand_dims(assignment_cost, 1)), axis = 0)
marginalised_cost = np.reshape(marginalised_cost, (num_tracks, num_detections+1))
# print('\n', marginalised_cost)
return marginalised_cost
def calculate_entropy(matrix, idx, idy):
mask = np.ones(matrix.shape)
mask[idx, idy] = 0
entropy = matrix/np.sum(mask*matrix, axis=1, keepdims=True)
entropy = (-entropy*np.log(entropy)) * mask
entropy = np.mean(np.sum(entropy, axis=1))
return entropy
def get_JPDA_output(cluster, cost_matrix, dummy_node_cost_app, dummy_node_cost_iou, cutoff, m):
if len(cluster[1]) == 0:
assignment = np.zeros((cost_matrix.shape[0], cost_matrix.shape[1]+1))
assignment[cluster[0], 0] = 1
assignment = assignment.reshape(1,-1)
return [assignment], np.array([0])
new_cost_matrix_appearance = np.reshape(cost_matrix[np.repeat(cluster[0], len(cluster[1])),
np.tile(cluster[1] - 1, len(cluster[0])),
[0]*(len(cluster[1])*len(cluster[0]))],
(len(cluster[0]), len(cluster[1])))
new_cost_matrix_iou = np.reshape(cost_matrix[np.repeat(cluster[0], len(cluster[1])), np.tile(cluster[1] - 1, len(cluster[0])), 1],
(len(cluster[0]), len(cluster[1])))
idx_x, idx_y = np.where(new_cost_matrix_appearance > cutoff)
appearance_entropy = calculate_entropy(new_cost_matrix_appearance, idx_x, idx_y)
iou_entropy = calculate_entropy(new_cost_matrix_iou, idx_x, idx_y)
if appearance_entropy < iou_entropy:
new_cost_matrix = new_cost_matrix_appearance
new_cost_matrix = 2*np.ones(new_cost_matrix.shape)/(new_cost_matrix+1) - 1
dummy_node_cost = -np.log(2/(dummy_node_cost_app+1) - 1)
else:
new_cost_matrix = new_cost_matrix_iou
new_cost_matrix[new_cost_matrix==1] -= 1e-3
new_cost_matrix = 1 - new_cost_matrix
dummy_node_cost = -np.log(1-dummy_node_cost_iou)
new_cost_matrix = -np.log(new_cost_matrix)
new_cost_matrix[idx_x, idx_y] = cutoff
if len(cluster[0]) == 1:
new_cost_matrix = np.concatenate([np.ones((new_cost_matrix.shape[0], 1))*dummy_node_cost, new_cost_matrix], axis = 1)
total_cost = np.sum(np.exp(-new_cost_matrix))
new_assignment = np.zeros((cost_matrix.shape[0], cost_matrix.shape[1]+1))
new_assignment[np.repeat(cluster[0], len(cluster[1])+1), np.tile(
np.concatenate([np.zeros(1, dtype = np.int32), cluster[1]]), len(cluster[0]))] = np.exp(-new_cost_matrix)/total_cost
new_assignment = new_assignment.reshape(1, -1)
return [new_assignment], np.array([0])
if new_cost_matrix.ndim <= 1:
new_cost_matrix = np.expand_dims(new_cost_matrix, 1)
# print(new_cost_matrix)
assignments, assignment_cost = new_m_best_sol(new_cost_matrix, m, dummy_node_cost)
offset = np.amin(assignment_cost)
assignment_cost -= offset
new_assignments = []
total_cost = np.sum(np.exp(-assignment_cost))
for assignment in assignments:
new_assignment = np.zeros((cost_matrix.shape[0], cost_matrix.shape[1]+1))
new_assignment[np.repeat(cluster[0], len(cluster[1])+1), np.tile(
np.concatenate([np.zeros(1, dtype = np.int32), cluster[1]]), len(cluster[0]))] = \
assignment/total_cost
new_assignments.append(new_assignment.reshape(1, -1))
return new_assignments, assignment_cost
def matching_cascade(
distance_metric, max_distance, cascade_depth, tracks, detections,
track_indices=None, detection_indices=None, compare_2d = False, detections_3d=None):
"""Run matching cascade.
Parameters
----------
distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
The distance metric is given a list of tracks and detections as well as
a list of N track indices and M detection indices. The metric should
return the NxM dimensional cost matrix, where element (i, j) is the
association cost between the i-th track in the given track indices and
the j-th detection in the given detection indices.
max_distance : float
Gating threshold. Associations with cost larger than this value are
disregarded.
cascade_depth: int
The cascade depth, should be se to the maximum track age.
tracks : List[track.Track]
A list of predicted tracks at the current time step.
detections : List[detection.Detection]
A list of detections at the current time step.
track_indices : Optional[List[int]]
List of track indices that maps rows in `cost_matrix` to tracks in
`tracks` (see description above). Defaults to all tracks.
detection_indices : Optional[List[int]]
List of detection indices that maps columns in `cost_matrix` to
detections in `detections` (see description above). Defaults to all
detections.
Returns
-------
(List[(int, int)], List[int], List[int])
Returns a tuple with the following three entries:
* A list of matched track and detection indices.
* A list of unmatched track indices.
* A list of unmatched detection indices.
"""
if track_indices is None:
track_indices = list(range(len(tracks)))
if detection_indices is None:
detection_indices = list(range(len(detections)))
unmatched_detections = detection_indices
matches = []
for level in range(cascade_depth):
if len(unmatched_detections) == 0: # No detections left
break
track_indices_l = [
k for k in track_indices
if tracks[k].time_since_update == 1 + level
]
if len(track_indices_l) == 0: # Nothing to match at this level
continue
matches_l, _, unmatched_detections = \
min_cost_matching(
distance_metric, max_distance, tracks, detections,
track_indices_l, unmatched_detections, compare_2d, detections_3d=detections_3d)
matches += matches_l
unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))
return matches, unmatched_tracks, unmatched_detections
# @profile
def gate_cost_matrix(
kf, tracks, detections, track_indices, detection_indices,
gated_cost=INFTY_COST, only_position=False, use3d=False, windowing = False):
"""Invalidate infeasible entries in cost matrix based on the state
distributions obtained by Kalman filtering.
Parameters
----------
kf : The Kalman filter.
cost_matrix : ndarray
The NxM dimensional cost matrix, where N is the number of track indices
and M is the number of detection indices, such that entry (i, j) is the
association cost between `tracks[track_indices[i]]` and
`detections[detection_indices[j]]`.
tracks : List[track.Track]
A list of predicted tracks at the current time step.
detections : List[detection.Detection]
A list of detections at the current time step.
track_indices : List[int]
List of track indices that maps rows in `cost_matrix` to tracks in
`tracks` (see description above).
detection_indices : List[int]
List of detection indices that maps columns in `cost_matrix` to
detections in `detections` (see description above).
gated_cost : Optional[float]
Entries in the cost matrix corresponding to infeasible associations are
set this value. Defaults to a very large value.
only_position : Optional[bool]
If True, only the x, y position of the state distribution is considered
during gating. Defaults to False.
Returns
-------
ndarray
Returns the modified cost matrix.
"""
# assert (len(track_indices) == cost_matrix.shape[0]), "Cost matrix shape does not match track indices"
# assert (len(detection_indices) == cost_matrix.shape[1]), "Cost matrix shape does match detection indices"
if len(track_indices) == 0 or len(detection_indices) == 0:
return None
if use3d:
measurements = np.array([det.box_3d for i, det in enumerate(detections) if i in detection_indices])
else:
measurements = np.asarray(
[detections[i].to_xywh() for i in detection_indices])
if use3d and only_position:
gating_dim = 3
elif use3d:
gating_dim = measurements.shape[1]
elif only_position:
gating_dim = 2
else:
gating_dim = measurements.shape[1]
gating_threshold = EKF.chi2inv95[gating_dim]
gate_mask = []
for track_idx in track_indices:
track = tracks[track_idx]
gating_distance = kf.gating_distance(
track.mean, track.covariance, measurements, only_position, use3d)
gated_set = gating_distance > gating_threshold
if np.all(gated_set):
gated_set = gating_distance > gating_threshold * 3
# print(track.track_id, gating_threshold, gating_distance)
gate_mask.append(gated_set)
# print(gated_set)
return np.vstack(gate_mask)
def find_clusters(cost_matrix, cutoff):
num_tracks, _ = cost_matrix.shape
clusters = []
total_tracks = 0
total_detections = 0
all_tracks = set(range(num_tracks))
all_visited_tracks = set()
while total_tracks < num_tracks:
visited_detections = set()
visited_tracks = set()
potential_track = next(iter(all_tracks - all_visited_tracks))
potential_tracks = set()
potential_tracks.add(potential_track)
while potential_tracks:
current_track = potential_tracks.pop()
visited_detections.update((np.where(cost_matrix[current_track] < cutoff)[0])+1)
visited_tracks.add(current_track)
for detection in visited_detections:
connected_tracks = np.where(cost_matrix[:, detection - 1] < cutoff)[0]
for track in connected_tracks:
if track in visited_tracks or track in potential_tracks:
continue
potential_tracks.add(track)
total_tracks += len(visited_tracks)
total_detections += len(visited_detections)
all_visited_tracks.update(visited_tracks)
clusters.append((np.array(list(visited_tracks), dtype = np.int32), np.array(list(visited_detections), dtype = np.int32)))
return clusters
================================================
FILE: paper_experiments/utils/logger.py
================================================
import io, os
import numpy as np
from PIL import Image
import tensorflow as tf
#courtesy https://becominghuman.ai/logging-in-tensorboard-with-pytorch-or-any-other-library-c549163dee9e
class Logger:
def __init__(self, logdir):
if not os.path.exists(logdir):
os.makedirs(logdir)
self.writer = tf.summary.FileWriter(logdir)
def close(self):
self.writer.close()
def log_scalar(self, tag, value, global_step):
summary = tf.Summary()
summary.value.add(tag=tag, simple_value=value)
self.writer.add_summary(summary, global_step=global_step)
self.writer.flush()
def log_image(self, tag, img, global_step):
s = io.BytesIO()
Image.fromarray(img).save(s, format='png')
img_summary = tf.Summary.Image(encoded_image_string=s.getvalue(),
height=img.shape[0],
width=img.shape[1])
summary = tf.Summary()
summary.value.add(tag=tag, image=img_summary)
self.writer.add_summary(summary, global_step=global_step)
self.writer.flush()
================================================
FILE: paper_experiments/utils/mbest_ilp.py
================================================
from gurobipy import Model, quicksum, LinExpr, GRB
import numpy as np
import copy
import time
from sklearn.utils.linear_assignment_ import linear_assignment
import pickle
import itertools
import pdb
from copy import deepcopy
import math
"""
Fn: ilp_assignment
------------------
Solves ILP problem using gurobi
"""
def ilp_assignment(model):
model.optimize()
if(model.status == 3):
return -1
return
"""
Fn: initialize_model
--------------------
Initializes gurobi ILP model by setting the base objective
"""
# @profile
def initialize_model(cost_matrix, cutoff, model = None):
#Add dummy detection
cost_matrix = np.insert(cost_matrix,0, np.ones(cost_matrix.shape[0])*cutoff, axis=1)
M,N = cost_matrix.shape
if model is None:
model = Model()
else:
model.remove(model.getVars())
model.remove(model.getConstrs())
model.setParam('OutputFlag', False)
# y = []
# for i in range(M):
# y.append([])
# for j in range(N):
# y[i].append(m.addVar(vtype=GRB.BINARY, name = 'y_%d%d'%(i,j)))
y = model.addVars(M,N, vtype=GRB.BINARY, name = 'y')
model.setObjective(quicksum(quicksum([y[i,j]*cost_matrix[i][j] for j in range(N)]) for i in range(M)), GRB.MINIMIZE)
# for i in range(M):
model.addConstrs((quicksum(y[i,j] for j in range(N))==1 for i in range(M)), name='constraint for track')
# for j in range(1,N):
model.addConstrs((quicksum(y[i,j] for i in range(M))<=1 for j in range(1, N)), name='constraint for detection')
y = list(y.values())
return model, M, N, y
"""
Fn: m_best_sol
--------------
Finds m_best solutions for object/track association givent the
input cost matrix. Solves constrained ILP problems using gurobi solver.
"""
def cache(func):
cache = {}
def cached_function(*args):
cost_matrix = args[0]
cost_matrix = np.hstack((np.ones((cost_matrix.shape[0], 1))*args[1], cost_matrix))
if (cost_matrix.shape[0], cost_matrix.shape[1]) in cache:
solution_list = cache[(cost_matrix.shape[0], cost_matrix.shape[1])]
solution_vals = np.sum(solution_list*cost_matrix.reshape(1, -1), axis = 1)
return solution_list, solution_vals
else:
solution_list, solution_vals = func(*args)
cache[(cost_matrix.shape[0], cost_matrix.shape[1])] = solution_list
return solution_list, solution_vals
return cached_function
# @profile
def num_solutions(cost_matrix):
M,N = cost_matrix.shape
N += 1
count = 0
for i in range(min(M+1, N)):
count += np.prod(range(M-i+1, M+1))*np.prod(range(N-i, N))//math.factorial(i)
if count > 2000:
break
return int(count)
@cache
def enumerate_solutions(cost_matrix, cutoff, num_solutions):
# num_solutions = [[2, 3, 4, 5, 6, 7],[3, 7, 13, 21, 31],[4, 13, 34, 73, 136],[5, 21, 73, 209, 501],[6, 31, 136, 501, 1546], [7]]
cost_matrix = np.hstack((np.ones((cost_matrix.shape[0], 1))*cutoff, cost_matrix))
M,N = cost_matrix.shape
solution_list = np.zeros((num_solutions, M, N), dtype = np.int32)
solution_list[:, :, 0] = 1
count = 0
for i in range(min(M+1, N)):
for chosen in itertools.combinations(range(M), i):
for perm in itertools.permutations(range(1,N), i):
if chosen:
solution_list[[count]*len(chosen), chosen, perm] = 1
solution_list[[count]*len(chosen), chosen, [0]*len(chosen)] = 0
count += 1
solution_vals = np.sum(np.sum(solution_list*np.expand_dims(cost_matrix, 0), axis = 1), axis = 1)
solution_list = np.reshape(solution_list, (num_solutions, -1))
return solution_list, solution_vals
def new_m_best_sol(cost_matrix, m_sol, cutoff, model = None):
sols = num_solutions(cost_matrix)
if sols <= 2000:
return enumerate_solutions(cost_matrix, cutoff, sols)
model, M, N, y = initialize_model(cost_matrix, cutoff, model)
X = np.zeros((m_sol, M*N))
xv = []
if (ilp_assignment(model) == -1):
xv.append(0)
else:
x = model.getAttr("X", y)
X[0] = x
xv.append(model.objVal)
if m_sol > 1:
model.addConstr(LinExpr(x,y) <= M-1, name = 'constraint_0')
if (ilp_assignment(model) == -1):
xv.append(0)
else:
x = model.getAttr("X", y)
X[1] = x
xv.append(model.objVal)
if m_sol > 2:
model.remove(model.getConstrByName('constraint_0'))
second_best_solutions = []
second_best_solution_vals = []
partitions = []
j = np.argmax(np.logical_xor(X[0], X[1]))
partitions.append([j])
partitions.append([j])
model.addConstr(y[j]==X[0][j], name = 'partition_constraint')
model.addConstr(LinExpr(X[0], y) <= M-1, name = 'non_equality_constraint')
ilp_assignment(model)
second_best_solutions.append(model.getAttr("X", y))
second_best_solution_vals.append(model.objVal)
model.remove(model.getConstrByName('non_equality_constraint'))
model.remove(model.getConstrByName('partition_constraint'))
model.addConstr(y[j]==X[1][j], name = 'partition_constraint')
model.addConstr(LinExpr(X[1], y) <= M-1, name = 'non_equality_constraint')
ilp_assignment(model)
second_best_solution_vals.append(model.objVal)
second_best_solutions.append(model.getAttr("X", y))
model.remove(model.getConstrByName('non_equality_constraint'))
model.remove(model.getConstrByName('partition_constraint'))
for m in range(2, m_sol):
l_k = np.argmin(second_best_solution_vals)
X[m] = second_best_solutions[l_k]
xv.append(second_best_solution_vals[l_k])
if m==m_sol-1:
break
j = np.argmax(np.logical_xor(X[m], X[l_k]))
parent_partition = partitions[l_k]
constrs = []
for idx in parent_partition:
constrs.append(model.addConstr(y[idx]==X[l_k, idx]))
model.addConstr(y[j]==X[m][j], name = 'partition_constraint_new')
model.addConstr(LinExpr(X[m], y) <= M-1, name = 'non_equality_constraint')
if(ilp_assignment(model) == -1):
second_best_solutions.append(np.ones((M,N)))
second_best_solution_vals.append(np.inf)
else:
second_best_solutions.append(model.getAttr("X", y))
second_best_solution_vals.append(model.objVal)
model.remove(model.getConstrByName('partition_constraint_new'))
model.remove(model.getConstrByName('non_equality_constraint'))
model.addConstr(LinExpr(X[l_k], y) <= M-1, name = 'non_equality_constraint')
model.addConstr(y[j]==X[l_k][j], name = 'partition_constraint_new')
if(ilp_assignment(model) == -1):
second_best_solution_vals[l_k] = np.inf
second_best_solutions[l_k] = np.ones((M,N))
else:
second_best_solution_vals[l_k] = model.objVal
second_best_solutions[l_k] = model.getAttr("X", y)
model.remove(model.getConstrByName('partition_constraint_new'))
model.remove(model.getConstrByName('non_equality_constraint'))
partitions[l_k].append(j)
partitions.append(copy.deepcopy(partitions[l_k]))
for constr in constrs:
model.remove(constr)
# X = np.asarray(X)
xv = np.asarray(xv)
return X, xv
def linear_assignment_wrapper(a):
return linear_assignment(a)
if __name__=='__main__':
# a = np.random.randn(100,100)
# # cProfile.run('m_best_sol(a,1,10)', 'mbest.profile')
# # cProfile.run('linear_assignment(a)', 'hungarian.profile')
# total = 0
# for i in range(10):
# start = time.time()
# _, sol_cost = m_best_sol(a, 1, 10)
# end = time.time()
# total+= end-start
# print("Time for JPDA m=1, is %f"%(total/10))
# total = 0
# for i in range(10):
# start = time.time()
# ass = linear_assignment(a)
# end = time.time()
# total+= end-start
# print("Time for Hungarian, is %f"%(total/10))
np.random.seed(14295)
# Check JPDA matches Hungarian
# while True:
# print('*******')
# a = np.random.randn(100,100)
# X, _ = new_m_best_sol(a, 1, 10)
# X = np.reshape(X[0], (100,101))[:,1:]
# ass = linear_assignment(a)
# output_hungarian = np.zeros(a.shape)
# output_hungarian[ass[:,0], ass[:, 1]] = 1
# assert(np.all(output_hungarian==X))
#
# Output to file to check
# np.random.seed(14295)
# vals = []
# a = np.random.randn(5,5)
a = np.array([[0.1,0.6,0.2,0.3],[0.4,0.1,0.9,0.4],[0.3,0.5,0.1,0.7],[0.8,0.2,0.2,0.1]])
num_solutions(a)
# enumerate_solutions(a.shape[0], a.shape[1]+1)
# ass = linear_assignment_wrapper(a)
# m = Model()
sols, vals = new_m_best_sol(a, 100, 10)
for i, val in enumerate(vals):
print(np.reshape(sols[i], (4,5)), val)
# print(np.reshape(sols[1], (4,5)), vals[1])
# print(np.reshape(sols[2], (4,5)), vals[2])
# print(np.reshape(sols[3], (4,5)), vals[3])
# with open('test.pkl', 'wb') as f:
# pickle.dump(vals, f)
================================================
FILE: paper_experiments/utils/nn_matching.py
================================================
# vim: expandtab:ts=4:sw=4
import numpy as np
import pdb
import torch
def _pdist(a, b):
"""Compute pair-wise squared distance between points in `a` and `b`.
Parameters
----------
a : array_like
An NxM matrix of N samples of dimensionality M.
b : array_like
An LxM matrix of L samples of dimensionality M.
Returns
-------
ndarray
Returns a matrix of size len(a), len(b) such that eleement (i, j)
contains the squared distance between `a[i]` and `b[j]`.
"""
a, b = np.asarray(a), np.asarray(b)
if len(a) == 0 or len(b) == 0:
return np.zeros((len(a), len(b)))
a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1)
r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :]
r2 = np.clip(r2, 0., float(np.inf))
return r2
def _cosine_distance(a, b, data_is_normalized=False):
"""Compute pair-wise cosine distance between points in `a` and `b`.
Parameters
----------
a : array_like
An NxM matrix of N samples of dimensionality M.
b : array_like
An LxM matrix of L samples of dimensionality M.
data_is_normalized : Optional[bool]
If True, assumes rows in a and b are unit length vectors.
Otherwise, a and b are explicitly normalized to lenght 1.
Returns
-------
ndarray
Returns a matrix of size len(a), len(b) such that eleement (i, j)
contains the squared distance between `a[i]` and `b[j]`.
"""
if not data_is_normalized:
a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True)
b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True)
return 1. - np.dot(a, b.T)
def _cosine_distance_torch(a, b, data_is_normalized=False):
'''
_cosine_distance but torched
'''
if not data_is_normalized:
a = a / torch.norm(a, dim=1, keepdim=True)
b = b / torch.norm(b, dim=1, keepdim=True)
return 1. - torch.matmul(a, torch.transpose(b,0,1))
def _nn_euclidean_distance(x, y):
""" Helper function for nearest neighbor distance metric (Euclidean).
Parameters
----------
x : ndarray
A matrix of N row-vectors (sample points).
y : ndarray
A matrix of M row-vectors (query points).
Returns
-------
ndarray
A vector of length M that contains for each entry in `y` the
smallest Euclidean distance to a sample in `x`.
"""
distances = _pdist(x, y)
return np.maximum(0.0, distances.min(axis=0))
def _nn_euclidean_distance_torch(x, y):
""" Helper function for nearest neighbor distance metric (Euclidean).
Parameters
----------
x : ndarray
A matrix of N row-vectors (sample points).
y : ndarray
A matrix of M row-vectors (query points).
Returns
-------
ndarray
A vector of length M that contains for each entry in `y` the
smallest Euclidean distance to a sample in `x`.
"""
x = x/((x*x).sum(1, keepdim = True)).sqrt()
y = y/((y*y).sum(1, keepdim = True)).sqrt()
sim = (x.unsqueeze(1) - y.unsqueeze(0)).pow(2).sum(2)
sim = sim.exp()
sim = (sim - 1)/(sim + 1)
sim = torch.min(sim, 0)[0]
return sim
def _nn_cosine_distance(x, y):
""" Helper function for nearest neighbor distance metric (cosine).
Parameters
----------
x : ndarray
A matrix of N row-vectors (sample points).
y : ndarray
A matrix of M row-vectors (query points).
Returns
-------
ndarray
A vector of length M that contains for each entry in `y` the
smallest cosine distance to a sample in `x`.
"""
distances = _cosine_distance(x, y)
return distances.min(axis=0)
def _nn_cosine_distance_torch(x,y):
'''
Same as _nn_cosine_distance except torched
'''
distances = _cosine_distance_torch(x,y)
return torch.min(distances, 0)[0]
class NearestNeighborDistanceMetric(object):
"""
A nearest neighbor distance metric that, for each target, returns
the closest distance to any sample that has been observed so far.
Parameters
----------
metric : str
Either "euclidean" or "cosine".
matching_threshold: float
The matching threshold. Samples with larger distance are considered an
invalid match.
budget : Optional[int]
If not None, fix samples per class to at most this number. Removes
the oldest samples when the budget is reached.
Attributes
----------
samples : Dict[int -> List[ndarray]]
A dictionary that maps from target identities to the list of samples
that have been observed so far.
"""
def __init__(self, metric, budget=None):
if metric == "euclidean":
self._metric = _nn_euclidean_distance
self._metric_torch = _nn_euclidean_distance_torch
elif metric == "cosine":
self._metric = _nn_cosine_distance
self._metric_torch = _nn_cosine_distance_torch
else:
raise ValueError(
"Invalid metric; must be either 'euclidean' or 'cosine'")
self.budget = budget
self.samples = {}
self.samples_2d = {}
def partial_fit(self, features, features_2d, targets, targets_2d, active_targets):
"""Update the distance metric with new data.
Parameters
----------
features : ndarray
An NxM matrix of N features of dimensionality M.
targets : ndarray
An integer array of associated target identities.
active_targets : List[int]
A list of targets that are currently present in the scene.
"""
for feature, target in zip(features, targets):
if feature is not None:
self.samples.setdefault(target, []).append(feature)
else:
self.samples.setdefault(target, [])
if self.budget is not None:
self.samples[target] = self.samples[target][-self.budget:]
self.samples = {k: self.samples[k] for k in active_targets if k in targets}
for target in active_targets:
self.samples.setdefault(target, [])
for feature_2d, target in zip(features_2d, targets_2d):
self.samples_2d.setdefault(target, []).append(feature_2d)
if self.budget is not None:
self.samples_2d[target] = self.samples_2d[target][-self.budget:]
self.samples_2d = {k: self.samples_2d[k] for k in active_targets}
def distance(self, features, targets, compare_2d=False):
"""Compute distance between features and targets.
Parameters
----------
features : ndarray
An NxM matrix of N features of dimensionality M.
targets : List[int]
A list of targets to match the given `features` against.
Returns
-------
ndarray
Returns a cost matrix of shape len(targets), len(features), where
element (i, j) contains the closest squared distance between
`targets[i]` and `features[j]`.
"""
cost_matrix = np.zeros((len(targets), len(features)))
for i, target in enumerate(targets):
if compare_2d:
cost_matrix[i, :] = self._metric(self.samples_2d[target], features)
else:
cost_matrix[i, :] = self._metric(self.samples[target], features)
return cost_matrix
def distance_torch(self, features, targets, compare_2d=False):
'''
Same as distance except torched.
'''
features = torch.from_numpy(features).cuda()
cost_matrix = torch.zeros(len(targets), len(features)).cuda()
for i, target in enumerate(targets):
if compare_2d:
cost_matrix[i, :] = self._metric_torch(torch.from_numpy(np.array(self.samples_2d[target])).cuda(), features)
else:
cost_matrix[i, :] = self._metric_torch(torch.from_numpy(np.array(self.samples[target])).cuda(), features)
return cost_matrix.cpu().numpy()
def check_samples(self, targets):
for target in targets:
if len(self.samples[target]) == 0:
return True
return False
================================================
FILE: paper_experiments/utils/pointnet_tf_util.py
================================================
""" Wrapper functions for TensorFlow layers.
Author: Charles R. Qi
Date: November 2016
"""
import numpy as np
import tensorflow as tf
def _variable_on_cpu(name, shape, initializer, use_fp16=False):
"""Helper to create a Variable stored on CPU memory.
Args:
name: name of the variable
shape: list of ints
initializer: initializer for Variable
Returns:
Variable Tensor
"""
with tf.device('/cpu:0'):
dtype = tf.float16 if use_fp16 else tf.float32
var = tf.get_variable(name, shape, initializer=initializer, dtype=dtype)
return var
def _variable_with_weight_decay(name, shape, stddev, wd, use_xavier=True):
"""Helper to create an initialized Variable with weight decay.
Note that the Variable is initialized with a truncated normal distribution.
A weight decay is added only if one is specified.
Args:
name: name of the variable
shape: list of ints
stddev: standard deviation of a truncated Gaussian
wd: add L2Loss weight decay multiplied by this float. If None, weight
decay is not added for this Variable.
use_xavier: bool, whether to use xavier initializer
Returns:
Variable Tensor
"""
if use_xavier:
initializer = tf.contrib.layers.xavier_initializer()
else:
initializer = tf.truncated_normal_initializer(stddev=stddev)
var = _variable_on_cpu(name, shape, initializer)
if wd is not None:
weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss')
tf.add_to_collection('losses', weight_decay)
return var
def conv1d(inputs,
num_output_channels,
kernel_size,
scope,
stride=1,
padding='SAME',
use_xavier=True,
stddev=1e-3,
weight_decay=0.0,
activation_fn=tf.nn.relu,
bn=False,
bn_decay=None,
is_training=None):
""" 1D convolution with non-linear operation.
Args:
inputs: 3-D tensor variable BxLxC
num_output_channels: int
kernel_size: int
scope: string
stride: int
padding: 'SAME' or 'VALID'
use_xavier: bool, use xavier_initializer if true
stddev: float, stddev for truncated_normal init
weight_decay: float
activation_fn: function
bn: bool, whether to use batch norm
bn_decay: float or float tensor variable in [0,1]
is_training: bool Tensor variable
Returns:
Variable tensor
"""
with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
num_in_channels = inputs.get_shape()[-1].value
kernel_shape = [kernel_size,
num_in_channels, num_output_channels]
kernel = _variable_with_weight_decay('weights',
shape=kernel_shape,
use_xavier=use_xavier,
stddev=stddev,
wd=weight_decay)
outputs = tf.nn.conv1d(inputs, kernel,
stride=stride,
padding=padding)
biases = _variable_on_cpu('biases', [num_output_channels],
tf.constant_initializer(0.0))
outputs = tf.nn.bias_add(outputs, biases)
if bn:
outputs = batch_norm_for_conv1d(outputs, is_training,
bn_decay=bn_decay, scope='bn')
if activation_fn is not None:
outputs = activation_fn(outputs)
return outputs
def conv2d(inputs,
num_output_channels,
kernel_size,
scope,
stride=[1, 1],
padding='SAME',
use_xavier=True,
stddev=1e-3,
weight_decay=0.0,
activation_fn=tf.nn.relu,
bn=False,
bn_decay=None,
is_training=None):
""" 2D convolution with non-linear operation.
Args:
inputs: 4-D tensor variable BxHxWxC
num_output_channels: int
kernel_size: a list of 2 ints
scope: string
stride: a list of 2 ints
padding: 'SAME' or 'VALID'
use_xavier: bool, use xavier_initializer if true
stddev: float, stddev for truncated_normal init
weight_decay: float
activation_fn: function
bn: bool, whether to use batch norm
bn_decay: float or float tensor variable in [0,1]
is_training: bool Tensor variable
Returns:
Variable tensor
"""
with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
kernel_h, kernel_w = kernel_size
num_in_channels = inputs.get_shape()[-1].value
kernel_shape = [kernel_h, kernel_w,
num_in_channels, num_output_channels]
kernel = _variable_with_weight_decay('weights',
shape=kernel_shape,
use_xavier=use_xavier,
stddev=stddev,
wd=weight_decay)
stride_h, stride_w = stride
outputs = tf.nn.conv2d(inputs, kernel,
[1, stride_h, stride_w, 1],
padding=padding)
biases = _variable_on_cpu('biases', [num_output_channels],
tf.constant_initializer(0.0))
outputs = tf.nn.bias_add(outputs, biases)
if bn:
outputs = batch_norm_for_conv2d(outputs, is_training,
bn_decay=bn_decay, scope='bn')
if activation_fn is not None:
outputs = activation_fn(outputs)
return outputs
def conv2d_transpose(inputs,
num_output_channels,
kernel_size,
scope,
stride=[1, 1],
padding='SAME',
use_xavier=True,
stddev=1e-3,
weight_decay=0.0,
activation_fn=tf.nn.relu,
bn=False,
bn_decay=None,
is_training=None):
""" 2D convolution transpose with non-linear operation.
Args:
inputs: 4-D tensor variable BxHxWxC
num_output_channels: int
kernel_size: a list of 2 ints
scope: string
stride: a list of 2 ints
padding: 'SAME' or 'VALID'
use_xavier: bool, use xavier_initializer if true
stddev: float, stddev for truncated_normal init
weight_decay: float
activation_fn: function
bn: bool, whether to use batch norm
bn_decay: float or float tensor variable in [0,1]
is_training: bool Tensor variable
Returns:
Variable tensor
Note: conv2d(conv2d_transpose(a, num_out, ksize, stride), a.shape[-1], ksize, stride) == a
"""
with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
kernel_h, kernel_w = kernel_size
num_in_channels = inputs.get_shape()[-1].value
kernel_shape = [kernel_h, kernel_w,
num_output_channels, num_in_channels] # reversed to conv2d
kernel = _variable_with_weight_decay('weights',
shape=kernel_shape,
use_xavier=use_xavier,
stddev=stddev,
wd=weight_decay)
stride_h, stride_w = stride
# from slim.convolution2d_transpose
def get_deconv_dim(dim_size, stride_size, kernel_size, padding):
dim_size *= stride_size
if padding == 'VALID' and dim_size is not None:
dim_size += max(kernel_size - stride_size, 0)
return dim_size
# caculate output shape
batch_size = inputs.get_shape()[0].value
height = inputs.get_shape()[1].value
width = inputs.get_shape()[2].value
out_height = get_deconv_dim(height, stride_h, kernel_h, padding)
out_width = get_deconv_dim(width, stride_w, kernel_w, padding)
output_shape = [batch_size, out_height, out_width, num_output_channels]
outputs = tf.nn.conv2d_transpose(inputs, kernel, output_shape,
[1, stride_h, stride_w, 1],
padding=padding)
biases = _variable_on_cpu('biases', [num_output_channels],
tf.constant_initializer(0.0))
outputs = tf.nn.bias_add(outputs, biases)
if bn:
outputs = batch_norm_for_conv2d(outputs, is_training,
bn_decay=bn_decay, scope='bn')
if activation_fn is not None:
outputs = activation_fn(outputs)
return outputs
def conv3d(inputs,
num_output_channels,
kernel_size,
scope,
stride=[1, 1, 1],
padding='SAME',
use_xavier=True,
stddev=1e-3,
weight_decay=0.0,
activation_fn=tf.nn.relu,
bn=False,
bn_decay=None,
is_training=None):
""" 3D convolution with non-linear operation.
Args:
inputs: 5-D tensor variable BxDxHxWxC
num_output_channels: int
kernel_size: a list of 3 ints
scope: string
stride: a list of 3 ints
padding: 'SAME' or 'VALID'
use_xavier: bool, use xavier_initializer if true
stddev: float, stddev for truncated_normal init
weight_decay: float
activation_fn: function
bn: bool, whether to use batch norm
bn_decay: float or float tensor variable in [0,1]
is_training: bool Tensor variable
Returns:
Variable tensor
"""
with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
kernel_d, kernel_h, kernel_w = kernel_size
num_in_channels = inputs.get_shape()[-1].value
kernel_shape = [kernel_d, kernel_h, kernel_w,
num_in_channels, num_output_channels]
kernel = _variable_with_weight_decay('weights',
shape=kernel_shape,
use_xavier=use_xavier,
stddev=stddev,
wd=weight_decay)
stride_d, stride_h, stride_w = stride
outputs = tf.nn.conv3d(inputs, kernel,
[1, stride_d, stride_h, stride_w, 1],
padding=padding)
biases = _variable_on_cpu('biases', [num_output_channels],
tf.constant_initializer(0.0))
outputs = tf.nn.bias_add(outputs, biases)
if bn:
outputs = batch_norm_for_conv3d(outputs, is_training,
bn_decay=bn_decay, scope='bn')
if activation_fn is not None:
outputs = activation_fn(outputs)
return outputs
def fully_connected(inputs,
num_outputs,
scope,
use_xavier=True,
stddev=1e-3,
weight_decay=0.0,
activation_fn=tf.nn.relu,
bn=False,
bn_decay=None,
is_training=None):
""" Fully connected layer with non-linear operation.
Args:
inputs: 2-D tensor BxN
num_outputs: int
Returns:
Variable tensor of size B x num_outputs.
"""
with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
num_input_units = inputs.get_shape()[-1].value
weights = _variable_with_weight_decay('weights',
shape=[num_input_units, num_outputs],
use_xavier=use_xavier,
stddev=stddev,
wd=weight_decay)
outputs = tf.matmul(inputs, weights)
biases = _variable_on_cpu('biases', [num_outputs],
tf.constant_initializer(0.0))
outputs = tf.nn.bias_add(outputs, biases)
if bn:
outputs = batch_norm_for_fc(outputs, is_training, bn_decay, 'bn')
if activation_fn is not None:
outputs = activation_fn(outputs)
return outputs
def max_pool2d(inputs,
kernel_size,
scope,
stride=[2, 2],
padding='VALID'):
""" 2D max pooling.
Args:
inputs: 4-D tensor BxHxWxC
kernel_size: a list of 2 ints
stride: a list of 2 ints
Returns:
Variable tensor
"""
with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
kernel_h, kernel_w = kernel_size
stride_h, stride_w = stride
outputs = tf.nn.max_pool(inputs,
ksize=[1, kernel_h, kernel_w, 1],
strides=[1, stride_h, stride_w, 1],
padding=padding,
name=sc.name)
return outputs
def avg_pool2d(inputs,
kernel_size,
scope,
stride=[2, 2],
padding='VALID'):
""" 2D avg pooling.
Args:
inputs: 4-D tensor BxHxWxC
kernel_size: a list of 2 ints
stride: a list of 2 ints
Returns:
Variable tensor
"""
with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
kernel_h, kernel_w = kernel_size
stride_h, stride_w = stride
outputs = tf.nn.avg_pool(inputs,
ksize=[1, kernel_h, kernel_w, 1],
strides=[1, stride_h, stride_w, 1],
padding=padding,
name=sc.name)
return outputs
def max_pool3d(inputs,
kernel_size,
scope,
stride=[2, 2, 2],
padding='VALID'):
""" 3D max pooling.
Args:
inputs: 5-D tensor BxDxHxWxC
kernel_size: a list of 3 ints
stride: a list of 3 ints
Returns:
Variable tensor
"""
with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
kernel_d, kernel_h, kernel_w = kernel_size
stride_d, stride_h, stride_w = stride
outputs = tf.nn.max_pool3d(inputs,
ksize=[1, kernel_d, kernel_h, kernel_w, 1],
strides=[1, stride_d, stride_h, stride_w, 1],
padding=padding,
name=sc.name)
return outputs
def avg_pool3d(inputs,
kernel_size,
scope,
stride=[2, 2, 2],
padding='VALID'):
""" 3D avg pooling.
Args:
inputs: 5-D tensor BxDxHxWxC
kernel_size: a list of 3 ints
stride: a list of 3 ints
Returns:
Variable tensor
"""
with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
kernel_d, kernel_h, kernel_w = kernel_size
stride_d, stride_h, stride_w = stride
outputs = tf.nn.avg_pool3d(inputs,
ksize=[1, kernel_d, kernel_h, kernel_w, 1],
strides=[1, stride_d, stride_h, stride_w, 1],
padding=padding,
name=sc.name)
return outputs
def batch_norm_template(inputs, is_training, scope, moments_dims, bn_decay):
""" Batch normalization on convolutional maps and beyond...
Ref.: http://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow
Args:
inputs: Tensor, k-D input ... x C could be BC or BHWC or BDHWC
is_training: boolean tf.Varialbe, true indicates training phase
scope: string, variable scope
moments_dims: a list of ints, indicating dimensions for moments calculation
bn_decay: float or float tensor variable, controling moving average weight
Return:
normed: batch-normalized maps
"""
with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
num_channels = inputs.get_shape()[-1].value
beta = tf.Variable(tf.constant(0.0, shape=[num_channels]),
name='beta', trainable=True)
gamma = tf.Variable(tf.constant(1.0, shape=[num_channels]),
name='gamma', trainable=True)
batch_mean, batch_var = tf.nn.moments(inputs, moments_dims, name='moments')
decay = bn_decay if bn_decay is not None else 0.9
ema = tf.train.ExponentialMovingAverage(decay=decay)
# Operator that maintains moving averages of variables.
ema_apply_op = tf.cond(is_training,
lambda: ema.apply([batch_mean, batch_var]),
lambda: tf.no_op())
# Update moving average and return current batch's avg and var.
def mean_var_with_update():
with tf.control_dependencies([ema_apply_op]):
return tf.identity(batch_mean), tf.identity(batch_var)
# ema.average returns the Variable holding the average of var.
mean, var = tf.cond(is_training,
mean_var_with_update,
lambda: (ema.average(batch_mean), ema.average(batch_var)))
normed = tf.nn.batch_normalization(inputs, mean, var, beta, gamma, 1e-3)
return normed
def batch_norm_for_fc(inputs, is_training, bn_decay, scope):
""" Batch normalization on FC data.
Args:
inputs: Tensor, 2D BxC input
is_training: boolean tf.Varialbe, true indicates training phase
bn_decay: float or float tensor variable, controling moving average weight
scope: string, variable scope
Return:
normed: batch-normalized maps
"""
return batch_norm_template(inputs, is_training, scope, [0,], bn_decay)
def batch_norm_for_conv1d(inputs, is_training, bn_decay, scope):
""" Batch normalization on 1D convolutional maps.
Args:
inputs: Tensor, 3D BLC input maps
is_training: boolean tf.Varialbe, true indicates training phase
bn_decay: float or float tensor variable, controling moving average weight
scope: string, variable scope
Return:
normed: batch-normalized maps
"""
return batch_norm_template(inputs, is_training, scope, [0,1], bn_decay)
def batch_norm_for_conv2d(inputs, is_training, bn_decay, scope):
""" Batch normalization on 2D convolutional maps.
Args:
inputs: Tensor, 4D BHWC input maps
is_training: boolean tf.Varialbe, true indicates training phase
bn_decay: float or float tensor variable, controling moving average weight
scope: string, variable scope
Return:
normed: batch-normalized maps
"""
return batch_norm_template(inputs, is_training, scope, [0,1,2], bn_decay)
def batch_norm_for_conv3d(inputs, is_training, bn_decay, scope):
""" Batch normalization on 3D convolutional maps.
Args:
inputs: Tensor, 5D BDHWC input maps
is_training: boolean tf.Varialbe, true indicates training phase
bn_decay: float or float tensor variable, controling moving average weight
scope: string, variable scope
Return:
normed: batch-normalized maps
"""
return batch_norm_template(inputs, is_training, scope, [0,1,2,3], bn_decay)
def dropout(inputs,
is_training,
scope,
keep_prob=0.5,
noise_shape=None):
""" Dropout layer.
Args:
inputs: tensor
is_training: boolean tf.Variable
scope: string
keep_prob: float in [0,1]
noise_shape: list of ints
Returns:
tensor variable
"""
with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
outputs = tf.cond(is_training,
lambda: tf.nn.dropout(inputs, keep_prob, noise_shape),
lambda: inputs)
return outputs
================================================
FILE: paper_experiments/utils/pointnet_transform_nets.py
================================================
import tensorflow as tf
import numpy as np
import sys
import os
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(BASE_DIR)
sys.path.append(os.path.join(BASE_DIR, '../utils'))
import pointnet_tf_util
def input_transform_net(point_cloud, is_training, bn_decay=None, K=3):
""" Input (XYZ) Transform Net, input is BxNx3 gray image
Return:
Transformation matrix of size 3xK """
batch_size = point_cloud.get_shape()[0].value
input_image = tf.expand_dims(point_cloud, -1)
net = pointnet_tf_util.conv2d(input_image, 64, [1,3],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='tconv1', bn_decay=bn_decay)
net = pointnet_tf_util.conv2d(net, 128, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='tconv2', bn_decay=bn_decay)
net = pointnet_tf_util.conv2d(net, 1024, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='tconv3', bn_decay=bn_decay)
net = tf.reduce_max(net, axis = 1)
net = tf.reshape(net, [batch_size, -1])
net = pointnet_tf_util.fully_connected(net, 512, bn=True, is_training=is_training,
scope='tfc1', bn_decay=bn_decay)
net = pointnet_tf_util.fully_connected(net, 256, bn=True, is_training=is_training,
scope='tfc2', bn_decay=bn_decay)
with tf.variable_scope('transform_XYZ') as sc:
assert(K==3)
weights = tf.get_variable('weights', [256, 3*K],
initializer=tf.constant_initializer(0.0),
dtype=tf.float32)
biases = tf.get_variable('biases', [3*K],
initializer=tf.constant_initializer(0.0),
dtype=tf.float32)
biases = biases + tf.constant([1,0,0,0,1,0,0,0,1], dtype=tf.float32)
transform = tf.matmul(net, weights)
transform = tf.nn.bias_add(transform, biases)
transform = tf.reshape(transform, [-1, 3, K])
return transform
def feature_transform_net(inputs, is_training, bn_decay=None, K=64):
""" Feature Transform Net, input is BxNx1xK
Return:
Transformation matrix of size KxK """
batch_size = inputs.get_shape()[0].value
num_point = inputs.get_shape()[1].value
net = pointnet_tf_util.conv2d(inputs, 64, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='tconv1', bn_decay=bn_decay)
net = pointnet_tf_util.conv2d(net, 128, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='tconv2', bn_decay=bn_decay)
net = pointnet_tf_util.conv2d(net, 1024, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='tconv3', bn_decay=bn_decay)
net = tf.reduce_max(net, axis = 1)
net = tf.reshape(net, [batch_size, -1])
net = pointnet_tf_util.fully_connected(net, 512, bn=True, is_training=is_training,
scope='tfc1', bn_decay=bn_decay)
net = pointnet_tf_util.fully_connected(net, 256, bn=True, is_training=is_training,
scope='tfc2', bn_decay=bn_decay)
with tf.variable_scope('transform_feat') as sc:
weights = tf.get_variable('weights', [256, K*K],
initializer=tf.constant_initializer(0.0),
dtype=tf.float32)
biases = tf.get_variable('biases', [K*K],
initializer=tf.constant_initializer(0.0),
dtype=tf.float32)
biases = biases + tf.constant(np.eye(K).flatten(), dtype=tf.float32)
transform = tf.matmul(net, weights)
transform = tf.nn.bias_add(transform, biases)
transform = tf.reshape(transform, [batch_size, K, K])
return transform
================================================
FILE: paper_experiments/utils/read_detections.py
================================================
import numpy as np
import pdb
from deep_sort_utils import non_max_suppression as deepsort_nms
def read_ground_truth_2d_detections(detection_path_2d, frame_idx, detection_matrix = None, threshold = -np.inf, nms_threshold = 0.75):
if detection_matrix is None:
detection_matrix = np.loadtxt(detection_path_2d, delimiter=',')
if len(detection_matrix) == 0:
return [], [], [], []
if len(detection_matrix.shape) == 1:
detection_matrix = np.expand_dims(detection_matrix, axis=0)
frame_indices = detection_matrix[:, 0].astype(np.int32)
if frame_idx is not None:
mask = frame_indices == frame_idx
detection_file = detection_matrix[mask]
else:
detection_file = detection_matrix
frame_indices = detection_matrix[:, 0].astype(np.int32)
if frame_idx is not None:
conf = np.expand_dims(detection_file[:,6].astype(np.float32), 1)
mask = conf[:,0] > threshold
detection_file = detection_file[mask]
object_ids = np.expand_dims(detection_file[:,1].astype(np.float32), 1)
x1 = np.expand_dims(detection_file[:,2].astype(np.float32), 1)
y1 = np.expand_dims(detection_file[:,3].astype(np.float32), 1)
w = np.expand_dims(detection_file[:,4].astype(np.float32), 1)
h = np.expand_dims(detection_file[:,5].astype(np.float32), 1)
conf = np.expand_dims(detection_file[:,6].astype(np.float32), 1)
cls_conf = -np.ones(conf.shape)
cls_pred = -np.ones(conf.shape)
detections = np.hstack([x1,y1,x1+w,y1+h, conf, cls_conf, cls_pred])
boxes = np.hstack([x1, y1, w, h])
indices = deepsort_nms(boxes, nms_threshold, np.squeeze(conf))
detections_out = []
for i in range(len(boxes)):
if i in indices:
detections_out.append(detections[i])
if detections_out:
detections = np.vstack(detections_out)
else:
detections = []
return detections, object_ids, detection_matrix
else:
detections = []
total_ids = []
object_ids = np.expand_dims(detection_file[:,1].astype(np.float32), 1)
for frame in np.unique(frame_indices):
frame_mask = frame_indices==frame
x1 = np.expand_dims(detection_file[frame_mask,2].astype(np.float32), 1)
y1 = np.expand_dims(detection_file[frame_mask,3].astype(np.float32), 1)
w = np.expand_dims(detection_file[frame_mask,4].astype(np.float32), 1)
h = np.expand_dims(detection_file[frame_mask,5].astype(np.float32), 1)
conf = np.expand_dims(detection_file[frame_mask,6].astype(np.float32), 1)
boxes = np.hstack([x1, y1, w, h])
cls_conf = -np.ones(conf.shape)
cls_pred = -np.ones(conf.shape)
frame_detections = np.hstack([x1,y1,x1+w,y1+h, conf, cls_conf, cls_pred])
indices = deepsort_nms(boxes, nms_threshold, np.squeeze(conf))
frame_detections_out = []
ids = np.zeros((x1.shape[0], 1))
for i in range(len(object_ids)):
if i in indices:
frame_detections_out.append(frame_detections[i])
elif i < ids.shape[0]:
ids[i] = -1
if frame_detections_out:
frame_detections = np.vstack(frame_detections_out)
detections.append(frame_detections)
total_ids.append(ids)
detections = np.vstack(detections)
ids = np.vstack(total_ids)
frame_indices = frame_indices[np.squeeze(ids != -1)]
object_ids = object_ids[np.squeeze(ids != -1)]
return detections, object_ids, frame_indices
def read_ground_truth_3d_detections(detection_path_3d, frame_idx):
detection_file = np.loadtxt(detection_path_3d, delimiter=',')
frame_indices = detection_file[:, 0].astype(np.int32)
if frame_idx is not None:
mask = frame_indices == frame_idx
detection_file = detection_file[mask]
x = np.expand_dims(detection_file[:,2].astype(np.float32), 1)
y = np.expand_dims(detection_file[:,3].astype(np.float32), 1)
z = np.expand_dims(detection_file[:,4].astype(np.float32), 1)
l = np.expand_dims(detection_file[:,5].astype(np.float32), 1)
h = np.expand_dims(detection_file[:,6].astype(np.float32), 1)
w = np.expand_dims(detection_file[:,7].astype(np.float32), 1)
theta = np.expand_dims(detection_file[:,8].astype(np.float32), 1)
ids = np.expand_dims(detection_file[:,1].astype(np.float32), 1)
boxes_3d = np.hstack([x, y, z, l, h, w, theta])
if frame_idx is None:
return boxes_3d, ids, frame_indices
return boxes_3d, ids
================================================
FILE: paper_experiments/utils/resnet_reid_utils.py
================================================
import torch
import os
import sys
import torch.nn as nn
from torch.autograd import Variable
from torchvision import transforms
from PIL import Image
import numpy as np
PACKAGE_PARENT = '..'
SCRIPT_DIR = os.path.dirname(os.path.realpath(os.path.join(os.getcwd(), os.path.expanduser(__file__))))
sys.path.append(os.path.normpath(os.path.join(SCRIPT_DIR, PACKAGE_PARENT)))
from models.resnet_reid_models import ICT_ResNet
class Feature_ResNet(nn.Module):
def __init__(self,n_layer,output_color):
super(Feature_ResNet,self).__init__()
all_model = ICT_ResNet(1,10,9,n_layer,pretrained=False)
for name,modules in all_model._modules.items():
if name.find('fc') == -1 :
self.add_module(name,modules)
if output_color == True:
self.fc_c = all_model.fc_c
self.output_color = output_color
def forward(self,x):
for name,module in self._modules.items():
if name.find('fc') == -1:
x = module(x)
x = x.view(x.size(0),-1)
if self.output_color == False: return x
else:
output = self.fc_c(x)
color = torch.max(self.fc_c(x),dim=1)[1]
return x,color
class ResNet_Loader(object):
def __init__(self,model_path,n_layer=50,batch_size=4,output_color=False):
self.batch_size = batch_size
self.output_color = output_color
self.model = Feature_ResNet(n_layer,output_color)
state_dict = torch.load(model_path)
for key in list(state_dict.keys()):
if key.find('fc') != -1 and key.find('fc_c') == -1 :
del state_dict[key]
elif output_color == False and key.find('fc_c') != -1:
del state_dict[key]
self.model.load_state_dict(state_dict)
self.model.eval()
# print('loading resnet%d model'%(n_layer))
self.compose = transforms.Compose([transforms.Resize((224,224)),transforms.ToTensor(),
transforms.Normalize(mean=[0.485,0.456,0.406],std=[0.229,0.224,0.225])])
self.upsample = nn.Upsample(size=(224,224),mode='bilinear')
# @profile
def inference(self,patches):
self.model.cuda()
feature_list = []
color_list = []
batch_list = []
self.batch_size = len(patches)
for i, patch in enumerate(patches):
img = self.compose(transforms.ToPILImage()((patch.cpu().numpy()*255).astype(np.uint8)))
# img = self.upsample(patch.permute(2,0,1).unsqueeze_(0)).squeeze(0)
batch_list.append(img)
if (i+1)% self.batch_size == 0:
if self.output_color == False:
features = self.model(Variable(torch.stack(batch_list)).cuda())
for feature in features:
feature_list.append(feature.data)
else:
features,colors = self.model(Variable(torch.stack(batch_list)).cuda())
feature_list.append(features.data)
color_list.append(colors.data)
batch_list = []
if len(batch_list)>0:
if self.output_color == False:
features = self.model(Variable(torch.stack(batch_list)).cuda())
for feature in features:
feature_list.append(feature.data)
else:
features,colors = self.model(Variable(torch.stack(batch_list)).cuda())
feature_list.append(features.data)
color_list.append(colors.data)
batch_list = []
# self.model.cpu() TODO: What does this do? Why would we move model to CPU?
if self.output_color == False:
# feature_list = torch.cat(feature_list,dim=0)
return feature_list
else:
feature_list = torch.cat(feature_list,dim=0)
color_list = torch.cat(color_list,dim=0)
return feature_list,color_list
================================================
FILE: paper_experiments/utils/test_jpda.py
================================================
from gurobipy import *
from numpy import *
'''
def mycallback(model, where):
if where == GRB.callback.MIP:
print model.cbGet(GRB.callback.MIP_NODCNT)
print model.cbGet(GRB.callback.MIP_ITRCNT),'HEY MOTHERFUCKER'
if where == GRB.callback.MIPNODE:
print model.cbGet(GRB.callback.MIPNODE_OBJBST),'BEST OBJ'
'''
numT = 100
numC = 100
Assignment = random.random((numT,numC))
m=Model("Assignment")
X = []
for t in range(numT):
X.append([])
for c in range(numC):
X[t].append(m.addVar(vtype=GRB.BINARY,name="X%d%d"% (t, c)))
m.update()
m.modelSense = GRB.MAXIMIZE
constraintT = []
constraintC = []
for t in range(numT):
constraintT.append(m.addConstr(quicksum(X[t][c] for c in range(numC)) == 1 ,'constraintT%d' % t))
for c in range(numC):
constraintT.append(m.addConstr(quicksum(X[t][c] for t in range(numT)) == 1 ,'constraintC%d' % t))
m.setObjective(quicksum(quicksum([X[t][c]*Assignment[t][c] for c in range(numC)]) for t in range(numT)))
m.update()
#m.optimize(mycallback)
m.optimize()
print('runtime is %f'%m.Runtime)
================================================
FILE: paper_experiments/utils/test_kf/.gitignore
================================================
*.p
================================================
FILE: paper_experiments/utils/test_kf/run_kf_test.py
================================================
import sys
sys.path.insert(0, '..')
import kalman_filter
import kf_simple3d
import numpy as np
import matplotlib.pyplot as plt
import pickle
import os.path
import pdb
np.set_printoptions(precision=4)
class Track:
def __init__(self, track_id, first_detection, kf_type):
# initiate kf
if kf_type == "2d":
self.kf = kalman_filter.KalmanFilter()
elif kf_type == "simple3d":
self.kf = kf_simple3d.KalmanFilterSimple3D()
self.mean, self.cov = self.kf.initiate(first_detection)
self.id = track_id
n = len(self.mean)
self.n = n
m = len(first_detection)
self.m = m
# initialize data stores
self.frame_log = np.zeros((0))
self.measurement_log = np.zeros((0, m))
self.gt_log = np.zeros((0, m))
self.mean_log = np.zeros((0, n))
self.cov_log = np.zeros((0, n, n))
self.gating_distance_log = np.zeros((0))
def update(self, measurement, gt, frame):
# log data
self.mean_log = np.vstack((self.mean_log, self.mean))
self.cov_log = np.concatenate((self.cov_log, self.cov[np.newaxis,:,:]))
self.measurement_log = np.vstack((self.measurement_log, measurement))
self.gt_log = np.vstack((self.gt_log, gt))
self.frame_log = np.append(self.frame_log, frame)
gating_distance = self.kf.gating_distance(self.mean, self.cov, measurement)
self.gating_distance_log = np.append(self.gating_distance_log, gating_distance)
# KF predict and update
self.mean, self.cov = self.kf.predict(self.mean, self.cov)
self.mean, self.cov = self.kf.update(self.mean, self.cov, measurement)
def plot(self):
t = self.frame_log
gt = self.gt_log
meas = self.measurement_log
state = self.mean_log
plt.subplot(321)
plt.plot(t, gt[:,0], label='Ground Truth')
plt.plot(t, meas[:,0], label='Measured')
plt.plot(t, state[:,0], label='filtered')
plt.xlabel('time')
plt.ylabel('x')
plt.legend()
plt.subplot(322)
plt.plot(t, gt[:,1], label='Ground Truth')
plt.plot(t, meas[:,1], label='Measured')
plt.plot(t, state[:,1], label='filtered')
plt.xlabel('time')
plt.ylabel('y')
plt.legend()
plt.subplot(323)
plt.plot(gt[:,0], gt[:,1], label='Ground Truth')
plt.plot(meas[:,0], meas[:,1], label='Measured')
plt.plot(state[:,0], state[:,1], label='filtered')
plt.xlabel('x')
plt.ylabel('y')
plt.legend()
plt.subplot(324)
plt.plot(t, state[:,self.m], label='filtered')
plt.xlabel('time')
plt.ylabel('Vx')
plt.legend()
plt.subplot(325)
plt.plot(t, state[:,self.m+1], label='filtered')
plt.xlabel('time')
plt.ylabel('Vy')
plt.legend()
plt.show()
def file2data(fname):
# data should be a list of lists of numpy arrays
# Each element in the list represents a frame
# Each frame is a list of detections
# Each detection is a numpy array of measurements.
with open(fname, "rb") as f:
data = pickle.load(f)
return data
def cmp_tracks(track1, track2):
# Expect perfect match in mean log and gating distance
mean_log_pass = np.max(np.abs(track1.mean_log == track2.mean_log)) > 1e-12
gating_distance_pass = np.max(np.abs(track1.gating_distance_log == track2.gating_distance_log)) > 1e-12
return mean_log_pass and gating_distance_pass
def cmp(data, val):
any_fail = False
for itrack in data:
passed = cmp_tracks(data[itrack], val[itrack])
if not passed:
print("Mismatch found in track: ", itrack)
# pdb.set_trace()
any_fail = True
else:
print("Tracks matched: ", itrack)
return not any_fail
def validate(data, fname):
if os.path.isfile(fname):
val_data = file2data(fname)
return cmp(data, val_data)
else:
with open(fname, "wb") as f:
pickle.dump(data, f)
return True
def run_kf_test(fname, kf_type):
print("Running test for: {}".format(fname))
data = file2data(fname)
first_frame = data[0]
tracks = {}
for detection in first_frame:
meas, gt, gt_id = (detection[0], detection[1], detection[2])
tracks[gt_id] = Track(gt_id, meas, kf_type)
frame_cnt = 0;
for frame in data:
for detection in frame:
meas, gt, gt_id = (detection[0], detection[1], detection[2])
tracks[gt_id].update(meas, gt, frame_cnt)
frame_cnt += 1
passed = validate(tracks, fname + ".val")
if not passed:
for track_id in tracks:
tracks[track_id].plot()
if __name__=='__main__':
run_kf_test("single_track_4state_test.p", "2d")
run_kf_test("two_track_4state_test.p", "2d")
run_kf_test("single_track_6state_test.p", "simple3d")
================================================
FILE: paper_experiments/utils/test_kf/write_kf_test.py
================================================
import sys
sys.path.insert(0, '..')
import kalman_filter
import numpy as np
import pickle
import pdb
def data2file(data, fname):
# data should be a list
# Each element in the list represents a frame
# Each frame is a list of detections
# Each detection is a list of [measurement, ground truth, gt track id]
with open(fname, "wb") as f:
pickle.dump(data, f)
def add_noise(center, std):
out = np.zeros_like(center)
for i in range(len(center)):
out[i] = np.random.normal(center[i], std[i])
return out
def single_track_4state_test():
np.random.seed(0)
data = []
# Iterate over frames
for i in range(100):
frame = []
track_id = 0
# Track 0
x_gt = i*10
y_gt = i*10
a_gt = 2
h_gt = 400
gt = np.array([x_gt, y_gt, a_gt, h_gt])
meas = add_noise(gt, [15, 15, 0.1, 5])
detection = [meas, gt, 0]
frame.append(detection)
data.append(frame)
data2file(data, "single_track_4state_test.p")
def two_track_4state_test():
np.random.seed(0)
data = []
# Iterate over frames
for i in range(100):
frame = []
track_id = 0
# Track 0
x_gt = i*10
y_gt = i*10
a_gt = 2
h_gt = 400
gt = np.array([x_gt, y_gt, a_gt, h_gt])
meas = add_noise(gt, [15, 15, 0.1, 5])
detection = [meas, gt, track_id]
frame.append(detection)
# Track 1
track_id += 1
x_gt = i*i/10
y_gt = i**0.5*30
a_gt = 2
h_gt = 400
gt = np.array([x_gt, y_gt, a_gt, h_gt])
meas = add_noise(gt, [20, 10, 0.2, 8])
detection = [meas, gt, track_id]
frame.append(detection)
data.append(frame)
data2file(data, "two_track_4state_test.p")
def single_track_6state_test():
np.random.seed(0)
data = []
# Iterate over frames
for i in range(100):
frame = []
track_id = 0
# Track 0
x_gt = i*10
y_gt = i*10
l_gt = 400
h_gt = 400
w_gt = 400
theta_gt = i/10
gt = np.array([x_gt, y_gt, l_gt, h_gt, w_gt, theta_gt])
gt = add_noise(gt, [3, 3, 1, 1, 1, 1*0.017])
meas = add_noise(gt, [15, 15, 5, 5, 5, 5*0.017])
detection = [meas, gt, 0]
frame.append(detection)
data.append(frame)
data2file(data, "single_track_6state_test.p")
if __name__=='__main__':
single_track_4state_test()
two_track_4state_test()
single_track_6state_test()
================================================
FILE: paper_experiments/utils/track.py
================================================
# vim: expandtab:ts=4:sw=4
import numpy as np
import pdb
import torch
import copy
from .imm import IMMFilter2D
class TrackState:
"""
Enumeration type for the single target track state. Newly created tracks are
classified as `tentative` until enough evidence has been collected. Then,
the track state is changed to `confirmed`. Tracks that are no longer alive
are classified as `deleted` to mark them for removal from the set of active
tracks.
"""
Tentative = 1
Confirmed = 2
Deleted = 3
class Track:
"""
A single target track with state space `(x, y, a, h)` and associated
velocities, where `(x, y)` is the center of the bounding box, `a` is the
aspect ratio and `h` is the height.
Parameters
----------
mean : ndarray
Mean vector of the initial state distribution.
covariance : ndarray
Covariance matrix of the initial state distribution.
track_id : int
A unique track identifier.
n_init : int
Number of consecutive detections before the track is confirmed. The
track state is set to `Deleted` if a miss occurs within the first
`n_init` frames.
max_age : int
The maximum number of consecutive misses before the track state is
set to `Deleted`.
feature : Optional[ndarray]
Feature vector of the detection this track originates from. If not None,
this feature is added to the `features` cache.
Attributes
----------
mean : ndarray
Mean vector of the initial state distribution.
covariance : ndarray
Covariance matrix of the initial state distribution.
track_id : int
A unique track identifier.
hits : int
Total number of measurement updates.
age : int
Total number of frames since first occurance.
time_since_update : int
Total number of frames since last measurement update.
state : TrackState
The current track state.
features : List[ndarray]
A cache of features. On each measurement update, the associated feature
vector is added to this list.
"""
def __init__(self, mean, covariance, model_probabilities, track_id, n_init, max_age,
feature=None, appearance_feature = None, cuda = False, lstm = None, kf_appearance_feature=False, last_det = None):
self.mean = mean
self.covariance = covariance
self.model_probabilities = model_probabilities
self.track_id = track_id
self.hits = 1
self.age = 1
self.time_since_update = 0
self.tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
self.cuda = cuda
self.state = TrackState.Tentative
self.features = []
self.features_2d = []
self.hidden = None
self.kf_appearance_feature = kf_appearance_feature
if lstm is None:
self.features.append(feature)
self.features_2d.append(appearance_feature)
else:
self.feature_update(feature, appearance_feature, lstm)
if self.model_probabilities is not None:
self.first_detection = mean[:,:4]
else:
self.first_detection = mean[:4]
self._n_init = n_init
if self.state == TrackState.Tentative and self.hits >= self._n_init:
self.state = TrackState.Confirmed
self._max_age = max_age
self.matched = True
self.exiting = False
self.next_to_last_detection = None
self.last_detection = last_det
self.last_2d_det = last_det
def to_tlwh(self, kf):
"""Get current position in bounding box format `(top left x, top left y,
width, height)`.
Returns
-------
ndarray
The bounding box.
"""
if self.model_probabilities is None:
if self.last_2d_det is not None: #TODO: This part
# print(self.last_2d_det.to_xywh(), self.mean[:4])
ret = self.last_2d_det.to_xywh()
else:
ret = self.mean[:4].copy()
else:
mean, _ = IMMFilter2D.combine_states(self.mean, self.covariance, self.model_probabilities)
ret = mean[:4].copy()
ret[:2] -= ret[2:] / 2
return ret
def to_tlbr(self):
"""Get current position in bounding box format `(min x, miny, max x,
max y)`.
Returns
-------
ndarray
The bounding box.
"""
ret = self.to_tlwh(None)
ret[2:] = ret[:2] + ret[2:]
return ret
def update_feature(self, img, appearance_model):
x = round(self.mean[0])
y = round(self.mean[1])
a = self.mean[2]
box_h = int(round(self.mean[3]))
x1 = int(round(x - (x / 2)))
y1 = int(round(y - (y / 2)))
box_w = int(round(a * box_h))
Tensor = torch.cuda.FloatTensor if self.cuda else torch.FloatTensor
# patch = torch.Tensor(img[y1:y1+box_h, x1:x1+box_w, :]).type(Tensor).permute(2,0,1)
patch = img[:, y1:y1+box_h, x1:x1+box_w]
if patch is None or patch.nelement()==0:
return None
patch = patch.unsqueeze(0)
with torch.no_grad():
feature ,_ = appearance_model(patch)
return feature.squeeze(0)
def predict(self, kf):
"""Propagate the state distribution to the current time step using a
Kalman filter prediction step.
Parameters
----------
kf : kalman_filter.KalmanFilter
The Kalman filter.
"""
if self.model_probabilities is None:
self.mean, self.covariance = kf.predict(self.mean, self.covariance, self.last_detection, self.next_to_last_detection)
else:
self.mean, self.covariance, self.model_probabilities = kf.predict(self.mean, self.covariance, self.model_probabilities)
self.age += 1
self.time_since_update += 1
# @profile
def update(self, kf, detection, detections_3d=None,
marginalization=None, detection_idx=None, JPDA=False,
cur_frame = None, appearance_model = None, lstm = None,
only_feature=False):
"""Perform Kalman filter measurement update step and update the feature
cache.
Parameters
----------
kf : kalman_filter.KalmanFilter
The Kalman filter.
detection : Detection
The associated detection.
"""
if JPDA:
detections = [det.to_xywh() for det in detection]
if self.model_probabilities is None:
self.mean, self.covariance = kf.update(
self.mean, self.covariance, detections, marginalization, JPDA)
else:
self.mean, self.covariance, self.model_probabilities = kf.update(self.mean, self.covariance, detections, self.model_probabilities, marginalization, JPDA)
self.feature_update(detection, detection_idx, lstm)
if np.argmax(marginalization) != 0:
self.matched=True
else:
self.matched=False
if detection_idx < 0:
self.last_2d_det = None
return
self.hits += 1
self.time_since_update = 0
detection = detection[detection_idx]
self.last_2d_det = detection
else:
detection = detection[detection_idx]
if self.model_probabilities is None:
self.mean, self.covariance = kf.update(
self.mean, self.covariance, detection.to_xywh())
else:
self.mean, self.covariance, self.model_probabilities = kf.update(self.mean, self.covariance, detection.to_xyah(), self.model_probabilities)
self.feature_update(detection.feature, detection.appearance_feature, lstm)
self.hits += 1
self.time_since_update = 0
if detection.box_3d is not None:
self.next_to_last_detection = self.last_detection
self.last_detection = detection
if self.age==2:
self.update_velocity(detection.to_xywh())
if self.state == TrackState.Tentative and self.hits >= self._n_init:
self.state = TrackState.Confirmed
def delete_track(self):
self.state = TrackState.Deleted
def mark_missed(self):
"""Mark this track as missed (no association at the current time step).
"""
if self.state == TrackState.Tentative:
self.state = TrackState.Deleted
elif self.time_since_update > self._max_age:
self.state = TrackState.Deleted
def update_velocity(self, new_detection):
if self.model_probabilities is not None:
for kf_n in range(2):
velocity_estimate = new_detection - self.first_detection
self.mean[kf_n,4:] = velocity_estimate[kf_n,:4]
# Reduce covariance of velocity by 4 times (half the standard deviation)
self.covariance[kf_n,:,4:] /= 4
self.covariance[kf_n,4:,:] /= 4
else:
velocity_estimate = new_detection - self.first_detection
self.mean[4:] = velocity_estimate[:4]
# Reduce covariance of velocity by 4 times (half the standard deviation)
self.covariance[:,4:] /= 4
self.covariance[4:,:4] /= 4
def is_tentative(self):
"""Returns True if this track is tentative (unconfirmed).
"""
return self.state == TrackState.Tentative
def is_confirmed(self):
"""Returns True if this track is confirmed."""
return self.state == TrackState.Confirmed
def is_deleted(self):
"""Returns True if this track is dead and should be deleted."""
return self.state == TrackState.Deleted
def is_exiting(self):
return self.exiting
def mark_exiting(self):
self.exiting = True
def feature_update(self, detections, detection_idx, lstm, JPDA=False, marginalization=None):
if JPDA:
features=[d.feature for d in detections]
appearance_features=[d.appearance_feature for d in detections]
if len([i for i in features if i is None])==0:
combined_feature=np.sum(np.array(features).reshape(len(features), -1)
*marginalization[1:].reshape(-1, 1), axis=0).astype(np.float32)
self.features.append(combined_feature)
if len([i for i in appearance_features if i is None])==0:
combined_feature=np.sum(
np.array(appearance_features).reshape(len(appearance_features), -1)
*marginalization[1:].reshape(-1, 1), axis=0).astype(np.float32)
self.features_2d.append(combined_feature)
else:
feature = detections[detection_idx].feature
appearance_feature = detections[detection_idx].appearance_feature
if feature is not None:
if lstm is not None:
input_feature = torch.Tensor(feature).type(self.tensor)
input_feature = input_feature.unsqueeze(0)
with torch.no_grad():
if self.hidden is None:
output_feature, self.hidden = lstm(input_feature)
else:
output_feature, self.hidden = lstm(input_feature, self.hidden)
output_feature = output_feature.cpu().numpy().squeeze(0)
else:
output_feature = feature
# print("track:", self.track_id, "original", len(self.features), "2D", len(self.features_2d))
self.features.append(output_feature)
# diffs = [] #TODO: REMOVE
# for i in range(len(self.features)-1):
# diffs.append(np.linalg.norm(self.features[i],self.features[i+1]))
# diffs = np.asarray(diffs)
# print("track:", self.track_id, "count:", len(self.features),"mean", np.mean(diffs), "std", np.std(diffs))
if appearance_feature is not None:
self.features_2d.append(appearance_feature)
================================================
FILE: paper_experiments/utils/track_3d.py
================================================
# vim: expandtab:ts=4:sw=4
import numpy as np
import pdb
import torch
class TrackState:
"""
Enumeration type for the single target track state. Newly created tracks are
classified as `tentative` until enough evidence has been collected. Then,
the track state is changed to `confirmed`. Tracks that are no longer alive
are classified as `deleted` to mark them for removal from the set of active
tracks.
"""
Tentative = 1
Confirmed = 2
Deleted = 3
class Track_3d:
"""
A single target track with state space `(x, y, a, h)` and associated
velocities, where `(x, y)` is the center of the bounding box, `a` is the
aspect ratio and `h` is the height.
Parameters
----------
mean : ndarray
Mean vector of the initial state distribution.
covariance : ndarray
Covariance matrix of the initial state distribution.
track_id : int
A unique track identifier.
n_init : int
Number of consecutive detections before the track is confirmed. The
track state is set to `Deleted` if a miss occurs within the first
`n_init` frames.
max_age : int
The maximum number of consecutive misses before the track state is
set to `Deleted`.
feature : Optional[ndarray]
Feature vector of the detection this track originates from. If not None,
this feature is added to the `features` cache.
Attributes
----------
mean : ndarray
Mean vector of the initial state distribution.
covariance : ndarray
Covariance matrix of the initial state distribution.
track_id : int
A unique track identifier.
hits : int
Total number of measurement updates.
age : int
Total number of frames since first occurance.
time_since_update : int
Total number of frames since last measurement update.
state : TrackState
The current track state.
features : List[ndarray]
A cache of features. On each measurement update, the associated feature
vector is added to this list.
"""
def __init__(self, mean, covariance, track_id, n_init, max_age,
feature=None, appearance_feature = None, cuda = False, lstm = None):
self.mean = mean
self.covariance = covariance
self.track_id = track_id
self.hits = 1
self.age = 1
self.time_since_update = 0
self.tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
self.cuda = cuda
self.state = TrackState.Tentative
self.features = []
self.features_2d = []
self.hidden = None
if lstm is None:
self.features.append(feature)
self.features_2d.append(appearance_feature)
else:
self.feature_update(feature, appearance_feature, lstm)
self.first_detection = mean[:7]
self._n_init = n_init
if self.state == TrackState.Tentative and self.hits >= self._n_init:
self.state = TrackState.Confirmed
self._max_age = max_age
self.matched = True
self.exiting = False
def to_tlwh3d(self):
"""Get current position in bounding box format `(box center of bottom face [x, y, z], l, w, h)`.
Returns
-------
ndarray
The bounding box.
"""
ret = self.mean[[0,1,2,3,4,5,6]].copy()
return ret
def to_tlwh(self, kf):
"""Get current position in bounding box format `(box center of bottom face [x, y, z], l, w, h)`.
Returns
-------
ndarray
The bounding box.
"""
corner_points, _ = kf.calculate_corners(kf)
min_x, min_y = np.amin(corner_points, axis = 0)[:2]
max_x, max_y = np.amax(corner_points, axis = 0)[:2]
ret = np.array([min_x, min_y, max_x - min_x, max_y - min_y])
return ret
def predict(self, kf):
"""Propagate the state distribution to the current time step using a
Kalman filter prediction step.
Parameters
----------
kf : kalman_filter.KalmanFilter
The Kalman filter.
"""
self.mean, self.covariance = kf.predict(self.mean, self.covariance)
self.age += 1
self.time_since_update += 1
# @profile
def update(self, kf, detection, compare_2d=False,
marginalization=None, detection_idx=None, JPDA=False, lstm = None):
"""Perform Kalman filter measurement update step and update the feature
cache.
Parameters
----------
kf : kalman_filter.KalmanFilter
The Kalman filter.
detection : Detection
The associated detection.
"""
if JPDA:
detections_2d = [det.tlwh for det in detection]
if compare_2d:
detections_3d = None
else:
detections_3d = [det.box_3d for det in detection]
self.mean, self.covariance, self.mean_post_3d = kf.update(
self.mean, self.covariance, detections_2d, detections_3d, marginalization, JPDA)
if detection_idx < 0:
return
detection = detection[detection_idx]
else:
detection = detection[detection_idx]
detections_3d = detections_3d[detection_idx]
self.mean, self.covariance = kf.update(
self.mean, self.covariance, detection.tlwh, detections_3d)
self.hits += 1
self.time_since_update = 0
if self.state == TrackState.Tentative and self.hits >= self._n_init:
self.state = TrackState.Confirmed
def mark_missed(self):
"""Mark this track as missed (no association at the current time step).
"""
if self.state == TrackState.Tentative:
self.state = TrackState.Deleted
elif self.time_since_update > self._max_age:
self.state = TrackState.Deleted
def is_tentative(self):
"""Returns True if this track is tentative (unconfirmed).
"""
return self.state == TrackState.Tentative
def is_confirmed(self):
"""Returns True if this track is confirmed."""
return self.state == TrackState.Confirmed
def is_deleted(self):
"""Returns True if this track is dead and should be deleted."""
return self.state == TrackState.Deleted
================================================
FILE: paper_experiments/utils/tracker.py
================================================
# vim: expandtab:ts=4:sw=4
from __future__ import absolute_import
import numpy as np
import pdb
from . import kf_2d, kf_3d, double_measurement_kf, imm
from . import linear_assignment
from . import iou_matching
from .track import Track
from . import JPDA_matching
from . import tracking_utils
import math
from nn_matching import NearestNeighborDistanceMetric
import cv2
class Tracker:
"""
This is the multi-target tracker.
Parameters
----------
metric : nn_matching.NearestNeighborDistanceMetric
A distance metric for measurement-to-track association.
max_age : int
Maximum number of missed misses before a track is deleted.
n_init : int
Number of consecutive detections before the track is confirmed. The
track state is set to `Deleted` if a miss occurs within the first
`n_init` frames.
Attributes
----------
metric : nn_matching.NearestNeighborDistanceMetric
The distance metric used for measurement to track association.
max_age : int
Maximum number of missed misses before a track is deleted.
n_init : int
Number of frames that a track remains in initialization phase.
kf : EKF.KalmanFilter
A Kalman filter to filter target trajectories in image space.
tracks : List[Track]
The list of active tracks at the current time step.
"""
def __init__(self, max_age=5, n_init=3,
JPDA=False, m_best_sol=1, assn_thresh=0.0,
matching_strategy=None,
kf_appearance_feature=None,
gate_full_state=False, lstm = None, cuda = False, appearance_model = None,
calib = None, kf_vel_params=(1./20, 1./160, 1, 1, 2), dummy_node_cost_iou=0.4, dummy_node_cost_app=0.2, nn_budget = None, use_imm=False, kf_walk_params=(1./20, 1./160, 1, 1, 2),
markov=(0.9, 0.7), uncertainty_limit=1.8, optical_flow=False, gate_limit=400):
self.max_age = max_age
self.n_init = n_init
self.metric = NearestNeighborDistanceMetric("euclidean", nn_budget)
if not use_imm:
self.kf = kf_2d.KalmanFilter2D(*kf_vel_params, gate_limit)
self.use_imm = False
else:
self.kf = imm.IMMFilter2D(kf_vel_params, kf_walk_params, markov=markov)
self.use_imm = True
self.tracks = []
self._next_id = 1
self.JPDA = JPDA
self.m_best_sol = m_best_sol
self.assn_thresh = assn_thresh
self.matching_strategy = matching_strategy
self.kf_appearance_feature = kf_appearance_feature
self.gate_only_position = not gate_full_state
self.lstm = lstm
self.cuda = cuda
self.dummy_node_cost_app = dummy_node_cost_app
self.dummy_node_cost_iou = dummy_node_cost_iou
self.appearance_model = appearance_model
self.prev_frame = None
self.uncertainty_limit = uncertainty_limit
self.optical_flow = optical_flow
# @profile
def gated_metric(self, tracks, dets, track_indices, detection_indices, compare_2d = False):
targets = np.array([tracks[i].track_id for i in track_indices])
if not compare_2d and self.metric.check_samples(targets):
compare_2d = True
if compare_2d:
features = np.array([dets[i].appearance_feature for i in detection_indices])
else:
features = np.array([dets[i].feature for i in detection_indices])
#cost_matrix = self.metric.distance(features, targets, compare_2d)
cost_matrix_appearance = self.metric.distance_torch(features, targets, compare_2d)
cost_matrix_iou = iou_matching.iou_cost(tracks, dets, track_indices, detection_indices)
gate_mask = linear_assignment.gate_cost_matrix(
self.kf, tracks, dets, track_indices,
detection_indices, only_position=self.gate_only_position)
cost_matrix = np.dstack((cost_matrix_appearance, cost_matrix_iou))
return cost_matrix, gate_mask
def predict(self):
"""Propagate track state distributions one time step forward.
This function should be called once every time step, before `update`.
"""
for track in self.tracks:
track.predict(self.kf)
# @profile
def update(self, cur_frame, detections, compare_2d = False):
"""Perform measurement update and track management.
Parameters
----------
detections : List[deep_sort.detection.Detection]
A list of detections at the current time step.
"""
self.cur_frame = cv2.cvtColor((255*cur_frame).permute(1,2,0).cpu().numpy(), cv2.COLOR_BGR2GRAY)
matches, unmatched_tracks, unmatched_detections = \
self._match(detections, compare_2d)
# update filter for each assigned track
# Only do this for non-JPDA because in JPDA the kf states are updated
# during the matching process
if not self.JPDA:
# Map matched tracks to detections
track_detection_map = {t:d for (t,d) in matches}
# Map unmatched tracks to -1 for no detection
for t in unmatched_tracks:
track_detection_map[t] = -1
for track_idx, detection_idx in matches:
self.tracks[track_idx].update(self.kf, detections,
detection_idx=detection_idx, JPDA=self.JPDA,
cur_frame = self.cur_frame, appearance_model = self.appearance_model,
lstm = self.lstm)
# update track state for unmatched tracks
for track_idx in unmatched_tracks:
self.tracks[track_idx].mark_missed()
# create new tracks
self.prune_tracks()
flow = None
if unmatched_detections:
if self.optical_flow and self.prev_frame is not None:
flow = cv2.calcOpticalFlowFarneback(self.prev_frame, self.cur_frame, None, 0.5, 3, 15, 3, 5, 1.2, 0)
for detection_idx in unmatched_detections:
self._initiate_track(detections[detection_idx], flow)
# Update distance metric.
active_targets = [t.track_id for t in self.tracks]
features, features_2d, targets, targets_2d = [], [], [], []
for track in self.tracks:
features += track.features
features_2d += track.features_2d
targets += [track.track_id for _ in track.features]
targets_2d += [track.track_id for _ in track.features_2d]
track.features = []
track.features_2d = []
self.metric.partial_fit(
np.asarray(features), np.asarray(features_2d), np.asarray(targets), np.asarray(targets_2d), active_targets)
self.prev_frame = self.cur_frame
# @profile
def _match(self, detections, compare_2d):
# Associate all tracks using combined cost matrices.
if self.JPDA:
# Run JPDA on all tracks
marginalizations = \
linear_assignment.JPDA(self.gated_metric, self.dummy_node_cost_app, self.dummy_node_cost_iou, self.tracks, \
detections, m=self.m_best_sol, compare_2d = compare_2d)
# for track in self.tracks: #TODO: REMOVE
# print(track.track_id)
# print(marginalizations)
jpda_matcher = JPDA_matching.Matcher(
detections, marginalizations, range(len(self.tracks)),
self.matching_strategy, assignment_threshold=self.assn_thresh)
matches_a, unmatched_tracks_a, unmatched_detections = jpda_matcher.match()
# Map matched tracks to detections
# Map matched tracks to detections
track_detection_map = {t:d for (t,d) in matches_a}
# Map unmatched tracks to -1 for no detection
for t in unmatched_tracks_a:
track_detection_map[t] = -1
# update Kalman state
if marginalizations.shape[0] > 0:
for i in range(len(self.tracks)):
self.tracks[i].update(self.kf, detections,
marginalization=marginalizations[i,:], detection_idx=track_detection_map[i],
JPDA=self.JPDA, cur_frame = self.cur_frame, appearance_model = self.appearance_model, lstm = self.lstm)
else:
confirmed_tracks = [i for i, t in enumerate(self.tracks) if t.is_confirmed()]
matches_a, unmatched_tracks_a, unmatched_detections = \
linear_assignment.matching_cascade(
self.gated_metric, self.dummy_node_cost_iou, self.max_age,
self.tracks, detections, confirmed_tracks, compare_2d = compare_2d)
return matches_a, unmatched_tracks_a, unmatched_detections
def _initiate_track(self, detection, flow=None):
if self.use_imm:
mean, covariance, model_probabilities = self.kf.initiate(detection.to_xywh(), flow)
else:
mean, covariance = self.kf.initiate(detection.to_xywh(), flow)
model_probabilities = None
self.tracks.append(Track(
mean, covariance, model_probabilities, self._next_id, self.n_init, self.max_age,
kf_appearance_feature = self.kf_appearance_feature,
feature=detection.feature, appearance_feature = detection.appearance_feature,
cuda = self.cuda, lstm = self.lstm, last_det = detection))
self._next_id += 1
def prune_tracks(self):
h, w = self.cur_frame.shape
for track in self.tracks:
# Check if track is leaving
if self.use_imm:
predicted_mean, predicted_cov = self.kf.combine_states(track.mean, track.covariance, track.model_probabilities) #TODO: This doesn't predict. Mean should def predict
else:
predicted_mean = self.kf.predict_mean(track.mean)
predicted_cov = track.covariance
predicted_pos = predicted_mean[:2]
predicted_vel = predicted_mean[4:6]
predicted_pos[0] -= w/2
predicted_pos[1] -= h/2
cos_theta = np.dot(predicted_pos, predicted_vel)/(np.linalg.norm(predicted_pos)*
np.linalg.norm(predicted_vel) + 1e-6)
predicted_pos[0] += w/2
predicted_pos[1] += h/2
# Thresholds for deciding whether track is outside image
BORDER_VALUE = 0
if (cos_theta > 0 and
(predicted_pos[0] - track.mean[2]/2<= BORDER_VALUE or
predicted_pos[0] + track.mean[2]/2 >= w - BORDER_VALUE)):
if track.is_exiting() and not track.matched:
track.delete_track()
else:
track.mark_exiting()
# Check if track is too uncertain
# cov_axis,_ = np.linalg.eigh(predicted_cov)
# if np.abs(np.sqrt(cov_axis[-1]))*6 > self.uncertainty_limit*np.linalg.norm(predicted_mean[2:4]):
# track.delete_track()
self.tracks = [t for t in self.tracks if not t.is_deleted()]
================================================
FILE: paper_experiments/utils/tracker_3d.py
================================================
# vim: expandtab:ts=4:sw=4
from __future__ import absolute_import
import numpy as np
import pdb
from . import double_measurement_kf
from . import linear_assignment
from . import iou_matching
from .track_3d import Track_3d
from . import JPDA_matching
from . import tracking_utils
import math
from nn_matching import NearestNeighborDistanceMetric
class Tracker_3d:
"""
This is the multi-target tracker.
Parameters
----------
metric : nn_matching.NearestNeighborDistanceMetric
A distance metric for measurement-to-track association.
max_age : int
Maximum number of missed misses before a track is deleted.
n_init : int
Number of consecutive detections before the track is confirmed. The
track state is set to `Deleted` if a miss occurs within the first
`n_init` frames.
Attributes
----------
metric : nn_matching.NearestNeighborDistanceMetric
The distance metric used for measurement to track association.
max_age : int
Maximum number of missed misses before a track is deleted.
n_init : int
Number of frames that a track remains in initialization phase.
kf : EKF.KalmanFilter
A Kalman filter to filter target trajectories in image space.
tracks : List[Track]
The list of active tracks at the current time step.
"""
def __init__(self, max_age=30, n_init=3,
JPDA=False, m_best_sol=1, assn_thresh=0.0,
matching_strategy=None, appearance_model = None,
gate_full_state=False, lstm = None, cuda = False, calib=None, omni=False,
kf_vel_params=(1./20, 1./160, 1, 1, 2), dummy_node_cost=0.2, nn_budget = None, use_imm=False,
markov=(0.9, 0.7), uncertainty_limit=1.8, optical_flow=False, gate_limit=400):
self.metric = NearestNeighborDistanceMetric("euclidean", nn_budget)
self.max_age = max_age
self.n_init = n_init
self.kf = double_measurement_kf.KF_3D(calib, *kf_vel_params, omni=omni)
self.tracks = []
self._next_id = 1
self.JPDA = JPDA
self.m_best_sol = m_best_sol
self.assn_thresh = assn_thresh
self.matching_strategy = matching_strategy
self.gate_only_position = not gate_full_state
self.lstm = lstm
self.cuda = cuda
self.dummy_node_cost = dummy_node_cost
self.appearance_model = appearance_model
# @profile
def gated_metric(self, tracks, dets, track_indices, detection_indices, compare_2d=None):
targets = np.array([tracks[i].track_id for i in track_indices])
if not compare_2d and self.metric.check_samples(targets):
compare_2d = True
if compare_2d:
features = np.array([dets[i].appearance_feature for i in detection_indices])
else:
features = np.array([dets[i].feature for i in detection_indices])
#cost_matrix = self.metric.distance(features, targets, compare_2d)
cost_matrix_appearance = self.metric.distance_torch(features, targets, compare_2d)
use_3d = True
for i in detection_indices:
if dets[i].box_3d is None:
use_3d = False
break
if use_3d:
cost_matrix_iou = iou_matching.iou_cost(tracks, dets, track_indices, detection_indices, use3d=use_3d)
else:
cost_matrix_iou = np.ones(cost_matrix_appearance.shape)
kf = self.kf
dets_for_gating = dets
gate_mask = linear_assignment.gate_cost_matrix(
kf, tracks, dets_for_gating, track_indices,
detection_indices, only_position=self.gate_only_position, use3d=use_3d)
cost_matrix = np.dstack((cost_matrix_appearance, cost_matrix_iou))
return cost_matrix, gate_mask
def predict(self):
"""Propagate track state distributions one time step forward.
This function should be called once every time step, before `update`.
"""
for track in self.tracks:
track.predict(self.kf)
# @profile
def update(self, input_img, detections, compare_2d):
"""Perform measurement update and track management.
Parameters
----------
detections : List[deep_sort.detection.Detection]
A list of detections at the current time step.
"""
matches, unmatched_tracks, unmatched_detections = \
self._match(detections, compare_2d)
# update filter for each assigned track
# Only do this for non-JPDA because in JPDA the kf states are updated
# during the matching process
if not self.JPDA:
# Map matched tracks to detections
track_detection_map = {t:d for (t,d) in matches}
# Map unmatched tracks to -1 for no detection
for t in unmatched_tracks:
track_detection_map[t] = -1
for track_idx, detection_idx in matches:
self.tracks[track_idx].update(self.kf, detections,
detection_idx=detection_idx, JPDA=self.JPDA,
cur_frame = self.cur_frame, appearance_model = self.appearance_model,
lstm = self.lstm)
# update track state for unmatched tracks
for track_idx in unmatched_tracks:
self.tracks[track_idx].mark_missed()
self.prune_tracks()
# create new tracks
for detection_idx in unmatched_detections:
self._initiate_track(detections[detection_idx])
# Update distance metric.
active_targets = [t.track_id for t in self.tracks]
features, features_2d, targets, targets_2d = [], [], [], []
for track in self.tracks:
features += track.features
features_2d += track.features_2d
targets += [track.track_id for _ in track.features]
targets_2d += [track.track_id for _ in track.features_2d]
track.features = []
track.features_2d = []
self.metric.partial_fit(
np.asarray(features), np.asarray(features_2d), np.asarray(targets), np.asarray(targets_2d), active_targets)
# @profile
def _match(self, detections, compare_2d):
# Associate confirmed tracks using appearance features.
if self.JPDA:
# Only run JPDA on confirmed tracks
marginalizations = \
linear_assignment.JPDA(self.gated_metric, self.dummy_node_cost, self.tracks, \
detections, compare_2d=compare_2d)
jpda_matcher = JPDA_matching.Matcher(
detections, marginalizations, range(len(self.tracks)),
self.matching_strategy, assignment_threshold=self.assn_thresh)
matches_a, unmatched_tracks_a, unmatched_detections = jpda_matcher.match()
# Map matched tracks to detections
track_detection_map = {t:d for (t,d) in matches_a}
# Map unmatched tracks to -1 for no detection
for t in unmatched_tracks_a:
track_detection_map[t] = -1
# udpate Kalman state
if marginalizations.shape[0] > 0:
for i in range(len(self.tracks)):
self.tracks[i].update(self.kf, detections,
marginalization=marginalizations[i,:], detection_idx=track_detection_map[i],
JPDA=self.JPDA, lstm = self.lstm)
else:
matches_a, unmatched_tracks_a, unmatched_detections = \
linear_assignment.matching_cascade(
self.gated_metric, self.metric.matching_threshold, self.max_age,
self.tracks, detections, confirmed_tracks, compare_2d = compare_2d, detections_3d=detections_3d)
return matches_a, unmatched_tracks_a, unmatched_detections
def _initiate_track(self, detection):
if detection.box_3d is None:
return
mean, covariance = self.kf.initiate(detection.box_3d)
self.tracks.append(Track_3d(
mean, covariance, self._next_id, self.n_init, self.max_age,
feature=detection.feature, appearance_feature = detection.appearance_feature,
cuda = self.cuda, lstm = self.lstm))
self._next_id += 1
def prune_tracks(self):
# for track in self.tracks:
# # Check if track is leaving
# predicted_mean = self.kf.predict_mean(track.mean)
# predicted_cov = track.covariance
# predicted_pos = predicted_mean[:2]
# predicted_vel = predicted_mean[4:6]
# predicted_pos[0] -= w/2
# predicted_pos[1] -= h/2
# cos_theta = np.dot(predicted_pos, predicted_vel)/(np.linalg.norm(predicted_pos)*
# np.linalg.norm(predicted_vel) + 1e-6)
# predicted_pos[0] += w/2
# predicted_pos[1] += h/2
# # Thresholds for deciding whether track is outside image
# BORDER_VALUE = 0
# if (cos_theta > 0 and
# (predicted_pos[0] - track.mean[2]/2<= BORDER_VALUE or
# predicted_pos[0] + track.mean[2]/2 >= w - BORDER_VALUE)):
# if track.is_exiting() and not track.matched:
# track.delete_track()
# else:
# track.mark_exiting()
# Check if track is too uncertain
# cov_axis,_ = np.linalg.eigh(predicted_cov)
# if np.abs(np.sqrt(cov_axis[-1]))*6 > self.uncertainty_limit*np.linalg.norm(predicted_mean[2:4]):
# track.delete_track()
self.tracks = [t for t in self.tracks if not t.is_deleted()]
================================================
FILE: paper_experiments/utils/tracking_utils.py
================================================
import torch, sys, os, pdb
import numpy as np
from PIL import Image
from scipy.spatial import Delaunay
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir)))
from .aligned_reid_utils import load_state_dict
from models.yolo_models import Darknet
from .featurepointnet_model_util import rotate_pc_along_y
from .deep_sort_utils import non_max_suppression as deepsort_nms
import math
from .detection import Detection
def create_detector(config_path, weight_path, cuda):
detector = Darknet(config_path)
detector.load_weights(weight_path)
if cuda:
detector.cuda()
detector.eval()
return detector
def get_depth_patches(point_cloud, box_3d, ids_3d, rot_angles, num_point = 1024):
#print(ids_3d)
depth_patches = []
for i, box in enumerate(box_3d):
if ids_3d[i] == -1:
depth_patches.append(None)
continue
box_center = np.asarray([ [box[0], box[1], box[2]] ])
rotate_pc_along_y(box_center, np.pi/2 + np.squeeze(box[6]))
box_center = box_center[0]
rotate_pc_along_y(point_cloud, np.pi/2 + np.squeeze(box[6]))
x = point_cloud[:, 0]
y = point_cloud[:, 1]
z = point_cloud[:, 2]
idx_1 = np.logical_and(x >= float(box_center[0] - box[3]/2.0), x <= float(box_center[0] + box[3]/2.0))
idx_2 = np.logical_and(y <= (box_center[1]+0.1), y >= float(box_center[1] - box[4]))
idx_3 = np.logical_and(z >= float(box_center[2] - box[5]/2.0), z <= float(box_center[2] + box[5]/2.0))
idx = np.logical_and(idx_1, idx_2)
idx = np.logical_and(idx, idx_3)
depth_patch = point_cloud[idx, :]
rotate_pc_along_y(point_cloud, -(np.squeeze(box[6])+np.pi/2)) #unrotate to prep for next iteration
rotate_pc_along_y(depth_patch, -(np.squeeze(box[6])+np.pi/2))
if depth_patch.size == 0:
ids_3d[i] = -1
depth_patches.append(None)
else:
if depth_patch.shape[0] > num_point:
pc_in_box_fov = np.expand_dims(depth_patch[np.random.choice(range(depth_patch.shape[0]), size = (num_point), replace=False)], 0)
else:
pc_in_box_fov = np.expand_dims(
np.vstack([depth_patch,
depth_patch[np.random.choice(range(depth_patch.shape[0]), size = (num_point - depth_patch.shape[0]), replace=True)]])
, 0)
depth_patches.append( get_center_view_point_set(pc_in_box_fov, rot_angles[i])[0])
return depth_patches, ids_3d
def non_max_suppression_3D_prime(detections, boxes_3d, ids_3d, ids_2d, nms_thresh = 1, confidence = None):
x = [boxes_3d[i][0] for i in range(len(boxes_3d))]
z = [boxes_3d[i][2] for i in range(len(boxes_3d))]
l = [boxes_3d[i][5] for i in range(len(boxes_3d))] #[3]
w = [boxes_3d[i][3] for i in range(len(boxes_3d))] #[5]
indices = deepsort_nms(boxes_3d, nms_thresh, np.squeeze(confidence))
for i in range(len(ids_3d)):
if i not in indices:
ids_3d[i] = -1
ids_2d[i] = -1
boxes_3d[i] = None
detections[i] = None
return detections, boxes_3d, ids_2d, ids_3d
def non_max_suppression_3D(depth_patches, ids_3d, ids_2d, nms_thresh = 1, confidence = None):
#depth_patches list of patches
if len(depth_patches) == 0:
return []
pick = []
if confidence is not None:
idxs = np.argsort(confidence)
else:
idxs = list(range(len(depth_patches)))
while len(idxs) > 0:
last = len(idxs) - 1
i = idxs[last]
overlap = np.asarray([iou_3d(depth_patches[i], depth_patches[idxs[x]]) for x in range(last)])
if np.any(overlap == -np.inf):
idxs = np.delete(idxs, [last])
continue
pick.append(i)
idxs = np.delete(
idxs, np.concatenate(
([last], np.where(overlap > nms_thresh)[0])))
for i in range(len(depth_patches)):
if i not in pick:
if ids_3d[i]!=-1:
ids_2d[i] = -1
ids_3d[i] = -1
return depth_patches, ids_3d, ids_2d
def iou_3d(patch_1, patch_2):
#Expecting patches of shape (N, 4) or (N,3) (numpy arrays)
if patch_2 is None:
return np.inf
elif patch_1 is None:
return -np.inf
# Unique points
patch_unique_1 = np.unique(patch_1, axis = 0)
patch_unique_2 = np.unique(patch_2, axis = 0)
intersection_points = 0
for point_1_idx in range(patch_unique_1.shape[0]):
point_distance = np.sqrt(np.sum((patch_unique_1[point_1_idx]-patch_unique_2)**2, axis = 1))
intersection_points += np.any(point_distance<0.3)
union_points = patch_unique_1.shape[0] + patch_unique_2.shape[0] - intersection_points
iou = intersection_points/union_points
return iou
def convert_detections(detections, features, appearance_features, detections_3d):
detection_list = []
if detections_3d is None:
detections_3d = [None] * len(detections)
for detection, feature, appearance_feature, detection_3d in zip(detections, features, appearance_features, detections_3d):
x1, y1, x2, y2, conf, _, _ = detection
box_2d = [x1, y1, x2-x1, y2-y1]
if detection_3d is not None:
x, y, z, l, w, h, theta = detection_3d
box_3d = [x, y, z, l, w, h, theta]
else:
box_3d = None
if feature is None:
detection_list.append(Detection(box_2d, None, conf, appearance_feature.cpu(), feature))
else:
detection_list.append(Detection(box_2d, box_3d, conf, appearance_feature.cpu(), feature.cpu()))
return detection_list
def combine_features(features, depth_features, ids_3d, combination_model, depth_weight = 1):
combined_features = []
appearance_features = []
for i, (appearance_feature, depth_feature) in enumerate(zip(features, depth_features)):
if ids_3d[i] == -1:
depth_feature = torch.zeros(512, device=torch.device("cuda"))
combined_features.append(torch.cat([appearance_feature, depth_feature* depth_weight]))
appearance_features.append(appearance_feature)
if combination_model is not None and len(combined_features) > 0:
combination_model.eval()
combined_feature = torch.stack(combined_features)
combined_features = combination_model(combined_feature).detach()
combined_features = list(torch.unbind(combined_features))
return combined_features, appearance_features
def filter(detections):
for i, det in enumerate(detections): #Note image is 1242 x 375
left = det[0]
top = det[1]
right = det[2]
bottom = det[3]
if (left < 10 or right > 1232) and (top < 10 or bottom > 365):
detections[i] = None
return detections
================================================
FILE: paper_experiments/utils/visualise.py
================================================
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patches as plt_patches
import numpy as np
import utils.imm as imm
from PIL import Image
import pdb
def draw_track(bbox, track = None, bbox_colors = None, det = True,
do_ellipse = False, axis = None, id_num = 0, do_velocity=False):
if axis is None:
axis = plt.gca()
if track is None:
color = plt.get_cmap('tab20b')(8) if det else plt.get_cmap('tab20b')(6)
# plt.imshow(original_img)
width = bbox[2]
height = bbox[3]
else:
color = bbox_colors[track.track_id]
id_num = track.track_id
width = bbox[2]
height = bbox[3]
plot_bbox = plt_patches.Rectangle((bbox[0], bbox[1]), width, height, linewidth=2,
edgecolor=color,
facecolor='none')
ax = axis
ax.add_patch(plot_bbox)
ax.text(bbox[0], bbox[1], s = id_num, color='white', verticalalignment='top',
bbox={'color': color, 'pad': 0})
if do_ellipse:
draw_ellipse(track, color)
if do_velocity:
draw_velocity(track, color)
def draw_detection(detection, color='k'):
bbox = detection.tlwh
plot_bbox = plt_patches.Rectangle((bbox[0], bbox[1]), bbox[2], bbox[3], linewidth=2,
edgecolor=color,
facecolor = 'w',
alpha = 0.5)
ax = plt.gca()
ax.add_patch(plot_bbox)
def draw_ellipse(track, color):
ax = plt.gca()
if track.model_probabilities is not None:
mean, cov = imm.IMMFilter2D.combine_states(track.mean, track.covariance, track.model_probabilities)
# print("New orig mat",track.covariance)
# print("New",cov)
else:
mean = track.mean
cov = track.covariance
# print("Old",cov)
lambda_, v = np.linalg.eig(cov[:2, :2])
lambda_ = np.sqrt(lambda_)
idx = np.argsort(lambda_)[::-1]
lambda_ = lambda_[idx]
v = v[:, idx]
nsigma = np.sqrt(5.99)
ell = plt_patches.Ellipse(xy=(mean[0], mean[1])
, width= lambda_[0]*2*nsigma
, height=lambda_[1]*2*nsigma
, angle=np.rad2deg(np.arctan2(v[1, 0], v[0, 0]))
, edgecolor=color
, facecolor='none'
)
ax.add_patch(ell)
def draw_velocity(track, color):
ax = plt.gca()
if track.model_probabilities is not None:
mean, cov = imm.IMMFilter2D.combine_states(track.mean, track.covariance, track.model_probabilities)
else:
mean = track.mean
ax.arrow(mean[0], mean[1],
mean[4], mean[5],
edgecolor=color,
head_width=5)
def draw_box3d(mu, color, alpha, facecolor='none', ax=None):
if np.any(np.isnan(mu)):
return
if ax is None:
ax = plt.gca()
x, z, l, w, theta = mu[0], mu[2], mu[3], mu[5], mu[6]
r = np.sqrt(w**2 + l**2)/2
psi = np.arctan2(w, l)
dx, dz = r*np.cos(psi), r*np.sin(psi)
rect = plt_patches.Rectangle((-dx, -dz), l, w, linewidth=2,
edgecolor=color,
alpha=alpha,
facecolor=facecolor)
t = matplotlib.transforms.Affine2D().translate(x, z)
t = t.rotate_around(x, z, theta)
t_start = ax.transData
t_end = t + t_start
rect.set_transform(t_end)
ax.add_patch(rect)
def draw_velocity_3d(track, color, ax=None):
mean = track.mean
if ax is None:
ax = plt.gca()
x, z, vx, vz = mean[0], mean[2], mean[7], mean[8]
arr = plt.arrow(x, z, vx, vz,
color=color,
head_width=0.5,
head_length=0.5)
ax.add_patch(arr)
def draw_ellipse3d(covariance, x, y, color, ax=None):
if np.any(np.isnan(covariance)):
return
if ax is None:
ax = plt.gca()
lambda_, v = np.linalg.eig(np.reshape(covariance[[0, 0, 2, 2], [0, 2, 0, 2]], (2,2)))
lambda_ = np.sqrt(lambda_)
idx = np.argsort(lambda_)[::-1]
lambda_ = lambda_[idx]
v = v[:, idx]
nsigma = np.sqrt(5.99)
ell = plt_patches.Ellipse(xy=(x,y)
, width= lambda_[0]*2*nsigma
, height=lambda_[1]*2*nsigma
, angle=np.rad2deg(np.arctan2(v[1, 0], v[0, 0]))
, edgecolor=color
, facecolor='none'
)
ax.add_patch(ell)
def draw_track3d(track, color, ax=None):
mu = track.mean
draw_box3d(mu, color, 1, ax=ax)
if ax is None:
ax = plt.gca()
x, z = mu[0], mu[2]
ax.text(x, z, s = track.track_id, color='white', verticalalignment='top',
bbox={'color': color, 'pad': 0})
draw_ellipse3d(track.covariance, x, z, color, ax)
draw_velocity_3d(track, color, ax)
def draw_detection3d(det, color, ax=None):
draw_box3d(det.box_3d, color, 0.5, color, ax=ax)
================================================
FILE: paper_experiments/utils/yolo_utils/__init__.py
================================================
================================================
FILE: paper_experiments/utils/yolo_utils/datasets.py
================================================
import glob
import random
import os
import numpy as np
import torch
from torch.utils.data import Dataset
from PIL import Image
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from skimage.transform import resize
import sys
class ImageFolder(Dataset):
def __init__(self, folder_path, img_size=416):
self.files = sorted(glob.glob('%s/*.*' % folder_path))
self.img_shape = (img_size, img_size)
def __getitem__(self, index):
img_path = self.files[index % len(self.files)]
# Extract image
img = np.array(Image.open(img_path))
h, w, _ = img.shape
dim_diff = np.abs(h - w)
# Upper (left) and lower (right) padding
pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2
# Determine padding
pad = ((pad1, pad2), (0, 0), (0, 0)) if h <= w else ((0, 0), (pad1, pad2), (0, 0))
# Add padding
input_img = np.pad(img, pad, 'constant', constant_values=127.5) / 255.
# Resize and normalize
input_img = resize(input_img, (*self.img_shape, 3), mode='reflect', anti_aliasing = True)
# Channels-first
input_img = np.transpose(input_img, (2, 0, 1))
# As pytorch tensor
input_img = torch.from_numpy(input_img).float()
return img_path, input_img
def __len__(self):
return len(self.files)
class ListDataset(Dataset):
def __init__(self, list_path, img_size=416):
with open(list_path, 'r') as file:
self.img_files = file.readlines()
self.label_files = [path.replace('images', 'labels').replace('.png', '.txt').replace('.jpg', '.txt') for path in self.img_files]
self.img_shape = (img_size, img_size)
self.max_objects = 50
def __getitem__(self, index):
#---------
# Image
#---------
img_path = self.img_files[index % len(self.img_files)].rstrip()
img = np.array(Image.open(img_path))
# Handles images with less than three channels
while len(img.shape) != 3:
index += 1
img_path = self.img_files[index % len(self.img_files)].rstrip()
img = np.array(Image.open(img_path))
h, w, _ = img.shape
dim_diff = np.abs(h - w)
# Upper (left) and lower (right) padding
pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2
# Determine padding
pad = ((pad1, pad2), (0, 0), (0, 0)) if h <= w else ((0, 0), (pad1, pad2), (0, 0))
# Add padding
input_img = np.pad(img, pad, 'constant', constant_values=128) / 255.
padded_h, padded_w, _ = input_img.shape
# Resize and normalize
input_img = resize(input_img, (*self.img_shape, 3), mode='reflect')
# Channels-first
input_img = np.transpose(input_img, (2, 0, 1))
# As pytorch tensor
input_img = torch.from_numpy(input_img).float()
#---------
# Label
#---------
label_path = self.label_files[index % len(self.img_files)].rstrip()
labels = None
if os.path.exists(label_path):
labels = np.loadtxt(label_path).reshape(-1, 5)
# Extract coordinates for unpadded + unscaled image
x1 = w * (labels[:, 1] - labels[:, 3]/2)
y1 = h * (labels[:, 2] - labels[:, 4]/2)
x2 = w * (labels[:, 1] + labels[:, 3]/2)
y2 = h * (labels[:, 2] + labels[:, 4]/2)
# Adjust for added padding
x1 += pad[1][0]
y1 += pad[0][0]
x2 += pad[1][0]
y2 += pad[0][0]
# Calculate ratios from coordinates
labels[:, 1] = ((x1 + x2) / 2) / padded_w
labels[:, 2] = ((y1 + y2) / 2) / padded_h
labels[:, 3] *= w / padded_w
labels[:, 4] *= h / padded_h
# Fill matrix
filled_labels = np.zeros((self.max_objects, 5))
if labels is not None:
filled_labels[range(len(labels))[:self.max_objects]] = labels[:self.max_objects]
filled_labels = torch.from_numpy(filled_labels)
return img_path, input_img, filled_labels
def __len__(self):
return len(self.img_files)
================================================
FILE: paper_experiments/utils/yolo_utils/parse_config.py
================================================
def parse_model_config(path):
"""Parses the yolo-v3 layer configuration file and returns module definitions"""
file = open(path, 'r')
lines = file.read().split('\n')
lines = [x for x in lines if x and not x.startswith('#')]
lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces
module_defs = []
for line in lines:
if line.startswith('['): # This marks the start of a new block
module_defs.append({})
module_defs[-1]['type'] = line[1:-1].rstrip()
if module_defs[-1]['type'] == 'convolutional':
module_defs[-1]['batch_normalize'] = 0
else:
key, value = line.split("=")
value = value.strip()
module_defs[-1][key.rstrip()] = value.strip()
return module_defs
def parse_data_config(path):
"""Parses the data configuration file"""
options = dict()
options['gpus'] = '0,1,2,3'
options['num_workers'] = '10'
with open(path, 'r') as fp:
lines = fp.readlines()
for line in lines:
line = line.strip()
if line == '' or line.startswith('#'):
continue
key, value = line.split('=')
options[key.strip()] = value.strip()
return options
================================================
FILE: paper_experiments/utils/yolo_utils/utils.py
================================================
from __future__ import division
import math
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
def load_classes(path):
"""
Loads class labels at 'path'
"""
fp = open(path, "r")
names = fp.read().split("\n")[:-1]
return names
def weights_init_normal(m):
classname = m.__class__.__name__
if classname.find("Conv") != -1:
torch.nn.init.normal_(m.weight.data, 0.0, 0.02)
elif classname.find("BatchNorm2d") != -1:
torch.nn.init.normal_(m.weight.data, 1.0, 0.02)
torch.nn.init.constant_(m.bias.data, 0.0)
def compute_ap(recall, precision):
""" Compute the average precision, given the recall and precision curves.
Code originally from https://github.com/rbgirshick/py-faster-rcnn.
# Arguments
recall: The recall curve (list).
precision: The precision curve (list).
# Returns
The average precision as computed in py-faster-rcnn.
"""
# correct AP calculation
# first append sentinel values at the end
mrec = np.concatenate(([0.0], recall, [1.0]))
mpre = np.concatenate(([0.0], precision, [0.0]))
# compute the precision envelope
for i in range(mpre.size - 1, 0, -1):
mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
# to calculate area under PR curve, look for points
# where X axis (recall) changes value
i = np.where(mrec[1:] != mrec[:-1])[0]
# and sum (\Delta recall) * prec
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
return ap
def bbox_iou(box1, box2, x1y1x2y2=True):
"""
Returns the IoU of two bounding boxes
"""
if not x1y1x2y2:
# Transform from center and width to exact coordinates
b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
else:
# Get the coordinates of bounding boxes
b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]
# get the corrdinates of the intersection rectangle
inter_rect_x1 = torch.max(b1_x1, b2_x1)
inter_rect_y1 = torch.max(b1_y1, b2_y1)
inter_rect_x2 = torch.min(b1_x2, b2_x2)
inter_rect_y2 = torch.min(b1_y2, b2_y2)
# Intersection area
inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp(
inter_rect_y2 - inter_rect_y1 + 1, min=0
)
# Union Area
b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)
return iou
def bbox_iou_numpy(box1, box2):
"""Computes IoU between bounding boxes.
Parameters
----------
box1 : ndarray
(N, 4) shaped array with bboxes
box2 : ndarray
(M, 4) shaped array with bboxes
Returns
-------
: ndarray
(N, M) shaped array with IoUs
"""
area = (box2[:, 2] - box2[:, 0]) * (box2[:, 3] - box2[:, 1])
iw = np.minimum(np.expand_dims(box1[:, 2], axis=1), box2[:, 2]) - np.maximum(
np.expand_dims(box1[:, 0], 1), box2[:, 0]
)
ih = np.minimum(np.expand_dims(box1[:, 3], axis=1), box2[:, 3]) - np.maximum(
np.expand_dims(box1[:, 1], 1), box2[:, 1]
)
iw = np.maximum(iw, 0)
ih = np.maximum(ih, 0)
ua = np.expand_dims((box1[:, 2] - box1[:, 0]) * (box1[:, 3] - box1[:, 1]), axis=1) + area - iw * ih
ua = np.maximum(ua, np.finfo(float).eps)
intersection = iw * ih
return intersection / ua
def non_max_suppression(prediction, num_classes, conf_thres=0.5, nms_thres=0.4):
"""
Removes detections with lower object confidence score than 'conf_thres' and performs
Non-Maximum Suppression to further filter detections.
Returns detections with shape:
(x1, y1, x2, y2, object_conf, class_score, class_pred)
"""
# From (center x, center y, width, height) to (x1, y1, x2, y2)
box_corner = prediction.new(prediction.shape)
box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2
box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2
box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2
box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2
prediction[:, :, :4] = box_corner[:, :, :4]
output = [None for _ in range(len(prediction))]
for image_i, image_pred in enumerate(prediction):
# Filter out confidence scores below threshold
conf_mask = (image_pred[:, 4] >= conf_thres).squeeze()
image_pred = image_pred[conf_mask]
# If none are remaining => process next image
if not image_pred.size(0):
continue
# Get score and class with highest confidence
class_conf, class_pred = torch.max(image_pred[:, 5 : 5 + num_classes], 1, keepdim=True)
# Detections ordered as (x1, y1, x2, y2, obj_conf, class_conf, class_pred)
detections = torch.cat((image_pred[:, :5], class_conf.float(), class_pred.float()), 1)
# Iterate through all predicted classes
unique_labels = detections[:, -1].cpu().unique()
if prediction.is_cuda:
unique_labels = unique_labels.cuda()
for c in unique_labels:
# Get the detections with the particular class
detections_class = detections[detections[:, -1] == c]
# Sort the detections by maximum objectness confidence
_, conf_sort_index = torch.sort(detections_class[:, 4], descending=True)
detections_class = detections_class[conf_sort_index]
# Perform non-maximum suppression
max_detections = []
while detections_class.size(0):
# Get detection with highest confidence and save as max detection
max_detections.append(detections_class[0].unsqueeze(0))
# Stop if we're at the last detection
if len(detections_class) == 1:
break
# Get the IOUs for all boxes with lower confidence
ious = bbox_iou(max_detections[-1], detections_class[1:])
# Remove detections with IoU >= NMS threshold
detections_class = detections_class[1:][ious < nms_thres]
max_detections = torch.cat(max_detections).data
# Add max detections to outputs
output[image_i] = (
max_detections if output[image_i] is None else torch.cat((output[image_i], max_detections))
)
return output
def build_targets(
pred_boxes, pred_conf, pred_cls, target, anchors, num_anchors, num_classes, grid_size, ignore_thres, img_dim
):
nB = target.size(0)
nA = num_anchors
nC = num_classes
nG = grid_size
mask = torch.zeros(nB, nA, nG, nG)
conf_mask = torch.ones(nB, nA, nG, nG)
tx = torch.zeros(nB, nA, nG, nG)
ty = torch.zeros(nB, nA, nG, nG)
tw = torch.zeros(nB, nA, nG, nG)
th = torch.zeros(nB, nA, nG, nG)
tconf = torch.ByteTensor(nB, nA, nG, nG).fill_(0)
tcls = torch.ByteTensor(nB, nA, nG, nG, nC).fill_(0)
nGT = 0
nCorrect = 0
for b in range(nB):
for t in range(target.shape[1]):
if target[b, t].sum() == 0:
continue
nGT += 1
# Convert to position relative to box
gx = target[b, t, 1] * nG
gy = target[b, t, 2] * nG
gw = target[b, t, 3] * nG
gh = target[b, t, 4] * nG
# Get grid box indices
gi = int(gx)
gj = int(gy)
# Get shape of gt box
gt_box = torch.FloatTensor(np.array([0, 0, gw, gh])).unsqueeze(0)
# Get shape of anchor box
anchor_shapes = torch.FloatTensor(np.concatenate((np.zeros((len(anchors), 2)), np.array(anchors)), 1))
# Calculate iou between gt and anchor shapes
anch_ious = bbox_iou(gt_box, anchor_shapes)
# Where the overlap is larger than threshold set mask to zero (ignore)
conf_mask[b, anch_ious > ignore_thres, gj, gi] = 0
# Find the best matching anchor box
best_n = np.argmax(anch_ious)
# Get ground truth box
gt_box = torch.FloatTensor(np.array([gx, gy, gw, gh])).unsqueeze(0)
# Get the best prediction
pred_box = pred_boxes[b, best_n, gj, gi].unsqueeze(0)
# Masks
mask[b, best_n, gj, gi] = 1
conf_mask[b, best_n, gj, gi] = 1
# Coordinates
tx[b, best_n, gj, gi] = gx - gi
ty[b, best_n, gj, gi] = gy - gj
# Width and height
tw[b, best_n, gj, gi] = math.log(gw / anchors[best_n][0] + 1e-16)
th[b, best_n, gj, gi] = math.log(gh / anchors[best_n][1] + 1e-16)
# One-hot encoding of label
target_label = int(target[b, t, 0])
tcls[b, best_n, gj, gi, target_label] = 1
tconf[b, best_n, gj, gi] = 1
# Calculate iou between ground truth and best matching prediction
iou = bbox_iou(gt_box, pred_box, x1y1x2y2=False)
pred_label = torch.argmax(pred_cls[b, best_n, gj, gi])
score = pred_conf[b, best_n, gj, gi]
if iou > 0.5 and pred_label == target_label and score > 0.5:
nCorrect += 1
return nGT, nCorrect, mask, conf_mask, tx, ty, tw, th, tconf, tcls
def to_categorical(y, num_classes):
""" 1-hot encodes a tensor """
return torch.from_numpy(np.eye(num_classes, dtype="uint8")[y])
================================================
FILE: requirements.txt
================================================
cycler==0.10.0
kiwisolver==1.1.0
matplotlib==3.1.2
numpy==1.21.0
opencv-python==4.2.0.32
pyparsing==2.4.6
python-dateutil==2.8.1
six==1.14.0
tqdm==4.41.1
================================================
FILE: src/3d_detector.py
================================================
#!/home/sibot/anaconda2/bin/python
""" yolo_bbox_to_sort.py
Subscribe to the Yolo 2 bboxes, and publish the detections with a 2d appearance feature used for reidentification
"""
import time
import rospy
import ros_numpy
import sys
import numpy as np
import torch
import pdb
import time
import os
import cv2
from std_msgs.msg import Int8
import message_filters
from sensor_msgs.msg import PointCloud2, Image
from darknet_ros_msgs.msg import BoundingBoxes, BoundingBox
from featurepointnet_model_util import generate_detections_3d, convert_depth_features
from featurepointnet_model import create_depth_model
from calibration import OmniCalibration
from visualization_msgs.msg import MarkerArray, Marker
from cv_bridge import CvBridge, CvBridgeError
from geometry_msgs.msg import Pose, Vector3
from std_msgs.msg import ColorRGBA
from jpda_rospack.msg import detection3d_with_feature_array, detection3d_with_feature
class Detector_3d:
def __init__(self):
self.node_name = "fpointnet_detector_plus_feature"
rospy.init_node(self.node_name)
rospy.on_shutdown(self.cleanup)
fpointnet_config = \
rospy.get_param('~fpointnet_config',
'~/jr2_catkin_workspace/src/jpda_rospack/src/fpointnet_jrdb/model.ckpt')
calibration_folder = rospy.get_param('~calib_3d', 'src/jpda_rospack/calib/')
self.depth_model = create_depth_model('FPointNet', fpointnet_config)
self.calib = OmniCalibration(calibration_folder)
self.velodyne_sub_upper = \
message_filters.Subscriber("/upper_velodyne/velodyne_points", PointCloud2, queue_size=2)
self.velodyne_sub_lower = \
message_filters.Subscriber("/lower_velodyne/velodyne_points", PointCloud2, queue_size=2)
self.yolo_bbox_sub = \
message_filters.Subscriber("/omni_yolo_bboxes", BoundingBoxes, queue_size=2)
self.time_sync = \
message_filters.ApproximateTimeSynchronizer([self.yolo_bbox_sub,
self.velodyne_sub_upper,
self.velodyne_sub_lower], 5, 0.06)
self.time_sync.registerCallback(self.get_3d_feature)
self.feature_3d_pub = rospy.Publisher("detection3d_with_feature", detection3d_with_feature_array, queue_size=10)
self.pc_transform_pub = rospy.Publisher("/transformed_pointcloud", PointCloud2, queue_size=10)
self.pc_pub = rospy.Publisher("/frustum", PointCloud2, queue_size=10)
self.debug_pub = rospy.Publisher("/test", Int8, queue_size=1)
self.marker_box_pub = rospy.Publisher("/3d_detection_markers", MarkerArray, queue_size=10)
rospy.loginfo("3D detector ready.")
def get_3d_feature(self, y1_bboxes, pointcloud_upper, pointcloud_lower):
start = time.time()
#rospy.loginfo('Processing Pointcloud with FPointNet')
# Assumed that pointclouds have 64 bit floats!
pc_upper = ros_numpy.numpify(pointcloud_upper).astype({'names':['x','y','z','intensity','ring'], 'formats':['f4','f4','f4','f4','f4'], 'offsets':[0,4,8,16,20], 'itemsize':32})
pc_lower = ros_numpy.numpify(pointcloud_lower).astype({'names':['x','y','z','intensity','ring'], 'formats':['f4','f4','f4','f4','f4'], 'offsets':[0,4,8,16,20], 'itemsize':32})
pc_upper = torch.from_numpy(pc_upper.view(np.float32).reshape(pc_upper.shape + (-1,)))[:, [0,1,2,4]]
pc_lower = torch.from_numpy(pc_lower.view(np.float32).reshape(pc_lower.shape + (-1,)))[:, [0,1,2,4]]
# move onto gpu if available
try:
pc_upper = pc_upper.cuda()
pc_lower = pc_lower.cuda()
except:
pass
# translate and rotate into camera frame using calib object
# in message pointcloud has x pointing forward, y pointing to the left and z pointing upward
# need to transform this such that x is pointing to the right, y pointing downwards, z pointing forward
# also done inside calib
pc_upper = self.calib.move_lidar_to_camera_frame(pc_upper, upper=True)
pc_lower = self.calib.move_lidar_to_camera_frame(pc_lower, upper=False)
pc = torch.cat([pc_upper, pc_lower], dim = 0)
pc[:, 3] = 1
# pc = pc.cpu().numpy()
# self.publish_pointcloud_from_array(pc, self.pc_transform_pub, header = pointcloud_upper.header)
# idx = torch.randperm(pc.shape[0]).cuda()
# pc = pc[idx]
detections_2d = []
frame_det_ids = []
count = 0
for y1_bbox in y1_bboxes.bounding_boxes:
if y1_bbox.Class == 'person':
xmin = y1_bbox.xmin
xmax = y1_bbox.xmax
ymin = y1_bbox.ymin
ymax = y1_bbox.ymax
probability = y1_bbox.probability
frame_det_ids.append(count)
count += 1
detections_2d.append([xmin, ymin, xmax, ymax, probability, -1, -1])
features_3d = detection3d_with_feature_array()
features_3d.header.stamp = y1_bboxes.header.stamp
features_3d.header.frame_id = 'occam'
boxes_3d_markers = MarkerArray()
if not detections_2d:
self.marker_box_pub.publish(boxes_3d_markers)
self.feature_3d_pub.publish(features_3d)
return
boxes_3d, valid_3d, rot_angles, _, depth_features, frustums = \
generate_detections_3d(self.depth_model, detections_2d, pc,
self.calib, (3, 480, 3760), omni=True,
peds=True)
depth_features = convert_depth_features(depth_features, valid_3d)
for box, feature, i in zip(boxes_3d, depth_features, frame_det_ids):
#frustum = frustums[i]
#frustum[:, [0,2]] = np.squeeze(np.matmul(
# np.array([[np.cos(rot_angles[i]), np.sin(rot_angles[i])],
# [-np.sin(rot_angles[i]), np.cos(rot_angles[i])]]),
# np.expand_dims(frustum[:, [0,2]], 2)), 2)
# frustum[:, 3] = np.amax(logits[i], axis = 1)
#self.publish_pointcloud_from_array(frustum, self.pc_pub, header = pointcloud_upper.header)
det_msg = detection3d_with_feature()
det_msg.header.frame_id = 'occam'
det_msg.header.stamp = features_3d.header.stamp
det_msg.valid = True if valid_3d[i] != -1 else False
det_msg.frame_det_id = i
if det_msg.valid:
det_msg.x = box[0]
det_msg.y = box[1]
det_msg.z = box[2]
det_msg.l = box[3]
det_msg.h = box[4]
det_msg.w = box[5]
det_msg.theta = box[6]
det_msg.feature = feature
features_3d.detection3d_with_features.append(det_msg)
pose_msg = Pose()
marker_msg = Marker()
marker_msg.header.stamp = pointcloud_lower.header.stamp
marker_msg.header.frame_id = 'occam'
marker_msg.action = 0
marker_msg.id = i
marker_msg.lifetime = rospy.Duration(0.2)
marker_msg.type = 1
marker_msg.scale = Vector3(box[3], box[4], box[5])
pose_msg.position.x = det_msg.x
pose_msg.position.y = det_msg.y - det_msg.h/2
pose_msg.position.z = det_msg.z
marker_msg.pose = pose_msg
marker_msg.color = ColorRGBA(g=1, a =0.5)
boxes_3d_markers.markers.append(marker_msg)
else:
det_msg.y = -1
det_msg.x = -1
det_msg.z = -1
det_msg.l = -1
det_msg.w = -1
det_msg.h = -1
det_msg.theta = -1
det_msg.feature = [-1]
features_3d.detection3d_with_features.append(det_msg)
self.marker_box_pub.publish(boxes_3d_markers)
self.feature_3d_pub.publish(features_3d)
# rospy.loginfo("3D detector time: {}".format(time.time() - start))
def publish_pointcloud_from_array(self, pointcloud, publisher, frame = 'occam', header = None):
list_pc = [tuple(j) for j in pointcloud]
pc_output_msg = np.array(list_pc, dtype = [('x', 'f4'),('y', 'f4'),('z', 'f4'),('intensity', 'f4')])
pc_msg = ros_numpy.msgify(PointCloud2, pc_output_msg)
if header is not None:
pc_msg.header.stamp = header.stamp
pc_msg.header.frame_id = 'occam'
publisher.publish(pc_msg)
def cleanup(self):
print("Shutting down 3D-Detection node.")
def main(args):
try:
Detector_3d()
rospy.spin()
except KeyboardInterrupt:
print("Shutting down 3D-Detection node.")
if __name__ == '__main__':
main(sys.argv)
================================================
FILE: src/EKF.py
================================================
# vim: expandtab:ts=4:sw=4
import numpy as np
import scipy.linalg
import pdb
"""
Table for the 0.95 quantile of the chi-square distribution with N degrees of
freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
function and used as Mahalanobis gating threshold.
"""
chi2inv95 = {
1: 3.8415,
2: 5.9915,
3: 7.8147,
4: 9.4877,
5: 11.070,
6: 12.592,
7: 14.067,
8: 15.507,
9: 16.919}
chi2inv90 = {
1: 2.706,
2: 4.605,
3: 6.251,
4: 7.779,
5: 9.236,
6: 10.645,
7: 12.017,
8: 13.363,
9: 14.684}
chi2inv975 = {
1: 5.025,
2: 7.378,
3: 9.348,
4: 11.143,
5: 12.833,
6: 14.449,
7: 16.013,
8: 17.535,
9: 19.023}
chi2inv10 = {
1: .016,
2: .221,
3: .584,
4: 1.064,
5: 1.610,
6: 2.204,
7: 2.833,
8: 3.490,
9: 4.168}
chi2inv995 = {
1: 0.0000393,
2: 0.0100,
3: .0717,
4: .207,
5: .412,
6: .676,
7: .989,
8: 1.344,
9: 1.735}
chi2inv75 = {
1: 1.323,
2: 2.773,
3: 4.108,
4: 5.385,
5: 6.626,
6: 7.841,
7: 9.037,
8: 10.22,
9: 11.39}
def squared_mahalanobis_distance(mean, covariance, measurements):
# cholesky factorization used to solve for
# z = d * inv(covariance)
# so z is also the solution to
# covariance * z = d
d = measurements - mean
# cholesky_factor = np.linalg.cholesky(covariance)
# z = scipy.linalg.solve_triangular(
# cholesky_factor, d.T, lower=True, check_finite=False,
# overwrite_b=True)
squared_maha = np.linalg.multi_dot([d, np.linalg.inv(covariance),
d.T]).diagonal()
return squared_maha
class EKF(object):
"""
Generic extended kalman filter class
"""
def __init__(self):
pass
def initiate(self, measurement):
"""Create track from unassociated measurement.
Parameters
----------
measurement : ndarray
Returns
-------
(ndarray, ndarray)
Returns the mean vector and covariance matrix of the new track.
Unobserved velocities are initialized to 0 mean.
"""
pass
def predict_mean(self, mean):
# Updates predicted state from previous state (function g)
# Calculates motion update Jacobian (Gt)
# Returns (g(mean), Gt)
pass
def get_process_noise(self, mean, covariance):
# Returns Rt the motion noise covariance
pass
def predict_covariance(self, mean, covariance):
pass
def project_mean(self, mean):
# Measurement prediction from state (function h)
# Calculations sensor update Jacobian (Ht)
# Returns (h(mean), Ht)
pass
def project_cov(self, mean, covariance):
pass
def predict(self, mean, covariance):
"""Run Kalman filter prediction step.
Parameters
----------
mean : ndarray
The mean vector of the object state at the previous
time step.
covariance : ndarray
The covariance matrix of the object state at the
previous time step.
Returns
-------
(ndarray, ndarray)
Returns the mean vector and covariance matrix of the predicted
state. Unobserved velocities are initialized to 0 mean.
"""
# Perform prediction
covariance = self.predict_covariance(mean, covariance)
mean = self.predict_mean(mean)
return mean, covariance
def get_innovation_cov(self, covariance):
pass
def project(self, mean, covariance):
"""Project state distribution to measurement space.
Parameters
----------
mean : ndarray
The state's mean vector
covariance : ndarray
The state's covariance matrix
Returns
-------
(ndarray, ndarray)
Returns the projected mean and covariance matrix of the given state
estimate.
"""
# Measurement uncertainty scaled by estimated height
return self.project_mean(mean), self.project_cov(mean, covariance)
def update(self, mean, covariance, measurement_t, marginalization=None, JPDA=False):
"""Run Kalman filter correction step.
Parameters
----------
mean : ndarray
The predicted state's mean vector (8 dimensional).
covariance : ndarray
The state's covariance matrix (8x8 dimensional).
measurement : ndarray
The 4 dimensional measurement vector (x, y, a, h), where (x, y)
is the center position, a the aspect ratio, and h the height of the
bounding box.
Returns
-------
(ndarray, ndarray)
Returns the measurement-corrected state distribution.
"""
predicted_measurement, innovation_cov = self.project(mean, covariance)
# cholesky factorization used to solve for kalman gain since
# K = covariance * update_mat.T * inv(innovation_cov)
# so K is also the solution to
# innovation_cov * K = covariance * update_mat.T
try:
chol_factor, lower = scipy.linalg.cho_factor(
innovation_cov, lower=True, check_finite=False)
kalman_gain = scipy.linalg.cho_solve(
(chol_factor, lower), np.dot(covariance, self._observation_mat.T).T,
check_finite=False).T
except:
# in case cholesky factorization fails, revert to standard solver
kalman_gain = np.linalg.solve(innovation_cov, np.dot(covariance, self._observation_mat.T).T).T
if JPDA:
# marginalization
innovation = np.zeros((self.ndim))
cov_soft = np.zeros((self.ndim, self.ndim))
for measurement_idx, measurement in enumerate(measurement_t):
p_ij = marginalization[measurement_idx + 1] # + 1 for dummy
y_ij = measurement - predicted_measurement
innovation += y_ij * p_ij
cov_soft += p_ij * np.outer(y_ij, y_ij)
cov_soft = cov_soft - np.outer(innovation, innovation)
P_star = covariance - np.linalg.multi_dot((
kalman_gain, innovation_cov, kalman_gain.T))
p_0 = marginalization[0]
P_0 = p_0 * covariance + (1 - p_0) * P_star
new_covariance = P_0 + np.linalg.multi_dot((kalman_gain, cov_soft, kalman_gain.T))
else:
innovation = measurement_t - predicted_measurement
new_covariance = covariance - np.linalg.multi_dot((
kalman_gain, innovation_cov, kalman_gain.T))
new_mean = mean + np.dot(innovation, kalman_gain.T)
return new_mean, new_covariance
================================================
FILE: src/JPDA_matching.py
================================================
# vim: expandtab:ts=4:sw=4
from __future__ import absolute_import
import numpy as np
from linear_assignment import min_marg_matching
import pdb
def get_unmatched(all_idx, matches, i, marginalization=None):
assigned = [match[i] for match in matches]
unmatched = set(all_idx) - set(assigned)
if marginalization is not None:
# from 1 for dummy node
in_gate_dets = np.nonzero(np.sum(
marginalization[:, 1:], axis=0))[0].tolist()
unmatched = [d for d in unmatched if d not in in_gate_dets]
return list(unmatched)
class Matcher:
def __init__(self, detections, marginalizations, confirmed_tracks,
matching_strategy,
assignment_threshold=None):
self.detections = detections
self.marginalizations = marginalizations
self.confirmed_tracks = confirmed_tracks
self.assignment_threshold = assignment_threshold
self.detection_indices = np.arange(len(detections))
self.matching_strategy = matching_strategy
def match(self):
self.get_matches()
self.get_unmatched_tracks()
self.get_unmatched_detections()
return self.matches, self.unmatched_tracks, self.unmatched_detections
def get_matches(self):
if self.matching_strategy == "max_and_threshold":
self.max_and_threshold_matching()
elif self.matching_strategy == "hungarian":
self.hungarian()
elif self.matching_strategy == "max_match":
self.max_match()
elif self.matching_strategy == "none":
self.matches = []
else:
raise Exception('Unrecognized matching strategy: {}'.
format(self.matching_strategy))
def get_unmatched_tracks(self):
self.unmatched_tracks = get_unmatched(self.confirmed_tracks,
self.matches, 0)
def get_unmatched_detections(self):
self.unmatched_detections = get_unmatched(self.detection_indices, self.matches, 1, self.marginalizations)
def max_match(self):
self.matches = []
if self.marginalizations.shape[0] == 0:
return
detection_map = {}
for i, track_idx in enumerate(self.confirmed_tracks):
marginalization = self.marginalizations[i,:]
detection_id = np.argmax(marginalization) - 1 # subtract one for dummy
if detection_id < 0:
continue
if detection_id not in detection_map.keys():
detection_map[detection_id] = track_idx
else:
cur_track = detection_map[detection_id]
track_update = track_idx if self.marginalizations[track_idx, detection_id] > self.marginalizations[cur_track, detection_id] else cur_track
detection_map[detection_id] = track_update
threshold_p = marginalization[detection_id + 1]
if threshold_p < self.assignment_threshold:
continue
for detection in detection_map.keys():
self.matches.append((detection_map[detection], detection))
def max_and_threshold_matching(self):
self.matches = []
if self.marginalizations.shape[0] == 0:
return
for i, track_idx in enumerate(self.confirmed_tracks):
marginalization = self.marginalizations[i,:]
detection_id = np.argmax(marginalization) - 1 # subtract one for dummy
if detection_id < 0:
continue
threshold_p = marginalization[detection_id + 1]
if threshold_p < self.assignment_threshold:
continue
self.matches.append((track_idx, detection_id))
def hungarian(self):
self.matches, _, _ = min_marg_matching(self.marginalizations,
self.confirmed_tracks,
self.assignment_threshold)
================================================
FILE: src/__init__.py
================================================
================================================
FILE: src/aligned_reid_model.py
================================================
import torch
import torch.nn as nn
import torch.nn.init as init
import torch.nn.functional as F
import torch.utils.model_zoo as model_zoo
import os
import math
class Model(nn.Module):
def __init__(self, local_conv_out_channels=128, num_classes=None):
super(Model, self).__init__()
self.base = resnet50(pretrained=True)
planes = 2048
self.local_conv = nn.Conv2d(planes, local_conv_out_channels, 1)
self.local_bn = nn.BatchNorm2d(local_conv_out_channels)
self.local_relu = nn.ReLU(inplace=True)
if num_classes is not None:
self.fc = nn.Linear(planes, num_classes)
init.normal(self.fc.weight, std=0.001)
init.constant(self.fc.bias, 0)
def forward(self, x):
"""
Returns:
global_feat: shape [N, C]
local_feat: shape [N, H, c]
"""
# shape [N, C, H, W]
feat = self.base(x)
global_feat = F.avg_pool2d(feat, feat.size()[2:])
# shape [N, C]
# global_feat = global_feat.view(global_feat.size(0), -1)
# shape [N, C, H, 1]
# local_feat = torch.mean(feat, -1, keepdim=True)
# local_feat = self.local_relu(self.local_bn(self.local_conv(local_feat)))
# # shape [N, H, c]
# local_feat = local_feat.squeeze(-1).permute(0, 2, 1)
return global_feat
__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
'resnet152']
model_urls = {
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
}
os.environ["TORCH_HOME"] = "./ResNet_Model"
def conv3x3(in_planes, out_planes, stride=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=False)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes * 4)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self, block, layers):
self.inplanes = 64
super(ResNet, self).__init__()
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
return x
def remove_fc(state_dict):
"""Remove the fc layer parameters from state_dict."""
new_state_dict = state_dict.copy()
for key, value in state_dict.items():
if key.startswith('fc.'):
del new_state_dict[key]
return new_state_dict
def resnet18(pretrained=False):
"""Constructs a ResNet-18 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(BasicBlock, [2, 2, 2, 2])
if pretrained:
model.load_state_dict(remove_fc(model_zoo.load_url(model_urls['resnet18'])))
return model
def resnet34(pretrained=False):
"""Constructs a ResNet-34 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(BasicBlock, [3, 4, 6, 3])
if pretrained:
model.load_state_dict(remove_fc(model_zoo.load_url(model_urls['resnet34'])))
return model
def resnet50(pretrained=False):
"""Constructs a ResNet-50 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(Bottleneck, [3, 4, 6, 3])
if pretrained:
model.load_state_dict(remove_fc(model_zoo.load_url(model_urls['resnet50'], model_dir="./ResNet_Model")))### ADDED MODEL_DIR
return model
def resnet101(pretrained=False):
"""Constructs a ResNet-101 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(Bottleneck, [3, 4, 23, 3])
if pretrained:
model.load_state_dict(
remove_fc(model_zoo.load_url(model_urls['resnet101'])))
return model
def resnet152(pretrained=False):
"""Constructs a ResNet-152 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(Bottleneck, [3, 8, 36, 3])
if pretrained:
model.load_state_dict(
remove_fc(model_zoo.load_url(model_urls['resnet152'])))
return model
================================================
FILE: src/aligned_reid_utils.py
================================================
from __future__ import print_function
import os
import os.path as osp
import pickle
from scipy import io
import datetime
import time
from contextlib import contextmanager
import numpy as np
from PIL import Image
import torch
from torch.autograd import Variable
from aligned_reid_model import Model as aligned_reid_model
# from models.deep_sort_model import ImageEncoder as deep_sort_model
def time_str(fmt=None):
if fmt is None:
fmt = '%Y-%m-%d_%H:%M:%S'
return datetime.datetime.today().strftime(fmt)
def load_pickle(path):
"""Check and load pickle object.
According to this post: https://stackoverflow.com/a/41733927, cPickle and
disabling garbage collector helps with loading speed."""
assert osp.exists(path)
# gc.disable()
with open(path, 'rb') as f:
ret = pickle.load(f)
# gc.enable()
return ret
def save_pickle(obj, path):
"""Create dir and save file."""
may_make_dir(osp.dirname(osp.abspath(path)))
with open(path, 'wb') as f:
pickle.dump(obj, f, protocol=2)
def save_mat(ndarray, path):
"""Save a numpy ndarray as .mat file."""
io.savemat(path, dict(ndarray=ndarray))
def to_scalar(vt):
"""Transform a length-1 pytorch Variable or Tensor to scalar.
Suppose tx is a torch Tensor with shape tx.size() = torch.Size([1]),
then npx = tx.cpu().numpy() has shape (1,), not 1."""
if isinstance(vt, Variable):
return vt.data.cpu().numpy().flatten()[0]
if torch.is_tensor(vt):
return vt.cpu().numpy().flatten()[0]
raise TypeError('Input should be a variable or tensor')
def transfer_optim_state(state, device_id=-1):
"""Transfer an optimizer.state to cpu or specified gpu, which means
transferring tensors of the optimizer.state to specified device.
The modification is in place for the state.
Args:
state: An torch.optim.Optimizer.state
device_id: gpu id, or -1 which means transferring to cpu
"""
for key, val in state.items():
if isinstance(val, dict):
transfer_optim_state(val, device_id=device_id)
elif isinstance(val, Variable):
raise RuntimeError("Oops, state[{}] is a Variable!".format(key))
elif isinstance(val, torch.nn.Parameter):
raise RuntimeError("Oops, state[{}] is a Parameter!".format(key))
else:
try:
if device_id == -1:
state[key] = val.cpu()
else:
state[key] = val.cuda(device=device_id)
except:
pass
def may_transfer_optims(optims, device_id=-1):
"""Transfer optimizers to cpu or specified gpu, which means transferring
tensors of the optimizer to specified device. The modification is in place
for the optimizers.
Args:
optims: A list, which members are either torch.nn.optimizer or None.
device_id: gpu id, or -1 which means transferring to cpu
"""
for optim in optims:
if isinstance(optim, torch.optim.Optimizer):
transfer_optim_state(optim.state, device_id=device_id)
def may_transfer_modules_optims(modules_and_or_optims, device_id=-1):
"""Transfer optimizers/modules to cpu or specified gpu.
Args:
modules_and_or_optims: A list, which members are either torch.nn.optimizer
or torch.nn.Module or None.
device_id: gpu id, or -1 which means transferring to cpu
"""
for item in modules_and_or_optims:
if isinstance(item, torch.optim.Optimizer):
transfer_optim_state(item.state, device_id=device_id)
elif isinstance(item, torch.nn.Module):
if device_id == -1:
item.cpu()
else:
item.cuda(device=device_id)
elif item is not None:
print('[Warning] Invalid type {}'.format(item.__class__.__name__))
class TransferVarTensor(object):
"""Return a copy of the input Variable or Tensor on specified device."""
def __init__(self, device_id=-1):
self.device_id = device_id
def __call__(self, var_or_tensor):
return var_or_tensor.cpu() if self.device_id == -1 \
else var_or_tensor.cuda(self.device_id)
class TransferModulesOptims(object):
"""Transfer optimizers/modules to cpu or specified gpu."""
def __init__(self, device_id=-1):
self.device_id = device_id
def __call__(self, modules_and_or_optims):
may_transfer_modules_optims(modules_and_or_optims, self.device_id)
def set_devices(sys_device_ids):
"""
It sets some GPUs to be visible and returns some wrappers to transferring
Variables/Tensors and Modules/Optimizers.
Args:
sys_device_ids: a tuple; which GPUs to use
e.g. sys_device_ids = (), only use cpu
sys_device_ids = (3,), use the 4th gpu
sys_device_ids = (0, 1, 2, 3,), use first 4 gpus
sys_device_ids = (0, 2, 4,), use the 1st, 3rd and 5th gpus
Returns:
TVT: a `TransferVarTensor` callable
TMO: a `TransferModulesOptims` callable
"""
# Set the CUDA_VISIBLE_DEVICES environment variable
import os
visible_devices = ''
for i in sys_device_ids:
visible_devices += '{}, '.format(i)
os.environ['CUDA_VISIBLE_DEVICES'] = visible_devices
# Return wrappers.
# Models and user defined Variables/Tensors would be transferred to the
# first device.
device_id = 0 if len(sys_device_ids) > 0 else -1
TVT = TransferVarTensor(device_id)
TMO = TransferModulesOptims(device_id)
return TVT, TMO
def set_devices_for_ml(sys_device_ids):
"""This version is for mutual learning.
It sets some GPUs to be visible and returns some wrappers to transferring
Variables/Tensors and Modules/Optimizers.
Args:
sys_device_ids: a tuple of tuples; which devices to use for each model,
len(sys_device_ids) should be equal to number of models. Examples:
sys_device_ids = ((-1,), (-1,))
the two models both on CPU
sys_device_ids = ((-1,), (2,))
the 1st model on CPU, the 2nd model on GPU 2
sys_device_ids = ((3,),)
the only one model on the 4th gpu
sys_device_ids = ((0, 1), (2, 3))
the 1st model on GPU 0 and 1, the 2nd model on GPU 2 and 3
sys_device_ids = ((0,), (0,))
the two models both on GPU 0
sys_device_ids = ((0,), (0,), (1,), (1,))
the 1st and 2nd model on GPU 0, the 3rd and 4th model on GPU 1
Returns:
TVTs: a list of `TransferVarTensor` callables, one for one model.
TMOs: a list of `TransferModulesOptims` callables, one for one model.
relative_device_ids: a list of lists; `sys_device_ids` transformed to
relative ids; to be used in `DataParallel`
"""
import os
all_ids = []
for ids in sys_device_ids:
all_ids += ids
unique_sys_device_ids = list(set(all_ids))
unique_sys_device_ids.sort()
if -1 in unique_sys_device_ids:
unique_sys_device_ids.remove(-1)
# Set the CUDA_VISIBLE_DEVICES environment variable
visible_devices = ''
for i in unique_sys_device_ids:
visible_devices += '{}, '.format(i)
os.environ['CUDA_VISIBLE_DEVICES'] = visible_devices
# Return wrappers
relative_device_ids = []
TVTs, TMOs = [], []
for ids in sys_device_ids:
relative_ids = []
for id in ids:
if id != -1:
id = find_index(unique_sys_device_ids, id)
relative_ids.append(id)
relative_device_ids.append(relative_ids)
# Models and user defined Variables/Tensors would be transferred to the
# first device.
TVTs.append(TransferVarTensor(relative_ids[0]))
TMOs.append(TransferModulesOptims(relative_ids[0]))
return TVTs, TMOs, relative_device_ids
def load_ckpt(modules_optims, ckpt_file, load_to_cpu=True, verbose=True):
"""Load state_dict's of modules/optimizers from file.
Args:
modules_optims: A list, which members are either torch.nn.optimizer
or torch.nn.Module.
ckpt_file: The file path.
load_to_cpu: Boolean. Whether to transform tensors in modules/optimizers
to cpu type.
"""
map_location = (lambda storage, loc: storage) if load_to_cpu else None
ckpt = torch.load(ckpt_file, map_location=map_location)
for m, sd in zip(modules_optims, ckpt['state_dicts']):
if 'fc.weight' in sd:
del sd['fc.weight']
if 'fc.bias' in sd:
del sd['fc.bias']
load_state_dict(m, sd)
if verbose:
print('Resume from ckpt {}, \nepoch {}, \nscores {}'.format(
ckpt_file, ckpt['ep'], ckpt['scores']))
return ckpt['ep'], ckpt['scores']
def save_ckpt(modules_optims, ep, scores, ckpt_file):
"""Save state_dict's of modules/optimizers to file.
Args:
modules_optims: A list, which members are either torch.nn.optimizer
or torch.nn.Module.
ep: the current epoch number
scores: the performance of current model
ckpt_file: The file path.
Note:
torch.save() reserves device type and id of tensors to save, so when
loading ckpt, you have to inform torch.load() to load these tensors to
cpu or your desired gpu, if you change devices.
"""
state_dicts = [m.state_dict() for m in modules_optims]
ckpt = dict(state_dicts=state_dicts,
ep=ep,
scores=scores)
may_make_dir(osp.dirname(osp.abspath(ckpt_file)))
torch.save(ckpt, ckpt_file)
def load_state_dict(model, src_state_dict):
"""Copy parameters and buffers from `src_state_dict` into `model` and its
descendants. The `src_state_dict.keys()` NEED NOT exactly match
`model.state_dict().keys()`. For dict key mismatch, just
skip it; for copying error, just output warnings and proceed.
Arguments:
model: A torch.nn.Module object.
src_state_dict (dict): A dict containing parameters and persistent buffers.
Note:
This is modified from torch.nn.modules.module.load_state_dict(), to make
the warnings and errors more detailed.
"""
from torch.nn import Parameter
dest_state_dict = model.state_dict()
for name, param in src_state_dict.items():
### CHANGED HERE FOR FINE TUNING
if name not in dest_state_dict:
continue
if isinstance(param, Parameter):
# backwards compatibility for serialized parameters
param = param.data
try:
dest_state_dict[name].copy_(param)
except Exception as e:
print("Warning: Error occurs when copying '{}': {}"
.format(name, str(e)))
# src_missing = set(dest_state_dict.keys()) - set(src_state_dict.keys())
# if len(src_missing) > 0:
# print("Keys not found in source state_dict: ")
# for n in src_missing:
# print('\t', n)
# dest_missing = set(src_state_dict.keys()) - set(dest_state_dict.keys())
# if len(dest_missing) > 0:
# print("Keys not found in destination state_dict: ")
# for n in dest_missing:
# print('\t', n)
def is_iterable(obj):
return hasattr(obj, '__len__')
def may_set_mode(maybe_modules, mode):
"""maybe_modules: an object or a list of objects."""
assert mode in ['train', 'eval']
if not is_iterable(maybe_modules):
maybe_modules = [maybe_modules]
for m in maybe_modules:
if isinstance(m, torch.nn.Module):
if mode == 'train':
m.train()
else:
m.eval()
def may_make_dir(path):
"""
Args:
path: a dir, or result of `osp.dirname(osp.abspath(file_path))`
Note:
`osp.exists('')` returns `False`, while `osp.exists('.')` returns `True`!
"""
# This clause has mistakes:
# if path is None or '':
if path in [None, '']:
return
if not osp.exists(path):
os.makedirs(path)
class AverageMeter(object):
"""Modified from Tong Xiao's open-reid.
Computes and stores the average and current value"""
def __init__(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
self.val = val
self.sum += val * n
self.count += n
self.avg = float(self.sum) / (self.count + 1e-20)
class RunningAverageMeter(object):
"""Computes and stores the running average and current value"""
def __init__(self, hist=0.99):
self.val = None
self.avg = None
self.hist = hist
def reset(self):
self.val = None
self.avg = None
def update(self, val):
if self.avg is None:
self.avg = val
else:
self.avg = self.avg * self.hist + val * (1 - self.hist)
self.val = val
class RecentAverageMeter(object):
"""Stores and computes the average of recent values."""
def __init__(self, hist_size=100):
self.hist_size = hist_size
self.fifo = []
self.val = 0
def reset(self):
self.fifo = []
self.val = 0
def update(self, val):
self.val = val
self.fifo.append(val)
if len(self.fifo) > self.hist_size:
del self.fifo[0]
@property
def avg(self):
assert len(self.fifo) > 0
return float(sum(self.fifo)) / len(self.fifo)
def get_model_wrapper(model, multi_gpu):
from torch.nn.parallel import DataParallel
if multi_gpu:
return DataParallel(model)
else:
return model
class ReDirectSTD(object):
"""Modified from Tong Xiao's `Logger` in open-reid.
This class overwrites sys.stdout or sys.stderr, so that console logs can
also be written to file.
Args:
fpath: file path
console: one of ['stdout', 'stderr']
immediately_visible: If `False`, the file is opened only once and closed
after exiting. In this case, the message written to file may not be
immediately visible (Because the file handle is occupied by the
program?). If `True`, each writing operation of the console will
open, write to, and close the file. If your program has tons of writing
operations, the cost of opening and closing file may be obvious. (?)
Usage example:
`ReDirectSTD('stdout.txt', 'stdout', False)`
`ReDirectSTD('stderr.txt', 'stderr', False)`
NOTE: File will be deleted if already existing. Log dir and file is created
lazily -- if no message is written, the dir and file will not be created.
"""
def __init__(self, fpath=None, console='stdout', immediately_visible=False):
import sys
import os
import os.path as osp
assert console in ['stdout', 'stderr']
self.console = sys.stdout if console == 'stdout' else sys.stderr
self.file = fpath
self.f = None
self.immediately_visible = immediately_visible
if fpath is not None:
# Remove existing log file.
if osp.exists(fpath):
os.remove(fpath)
# Overwrite
if console == 'stdout':
sys.stdout = self
else:
sys.stderr = self
def __del__(self):
self.close()
def __enter__(self):
pass
def __exit__(self, *args):
self.close()
def write(self, msg):
self.console.write(msg)
if self.file is not None:
may_make_dir(os.path.dirname(osp.abspath(self.file)))
if self.immediately_visible:
with open(self.file, 'a') as f:
f.write(msg)
else:
if self.f is None:
self.f = open(self.file, 'w')
self.f.write(msg)
def flush(self):
self.console.flush()
if self.f is not None:
self.f.flush()
import os
os.fsync(self.f.fileno())
def close(self):
self.console.close()
if self.f is not None:
self.f.close()
def set_seed(seed):
import random
random.seed(seed)
print('setting random-seed to {}'.format(seed))
import numpy as np
np.random.seed(seed)
print('setting np-random-seed to {}'.format(seed))
import torch
torch.backends.cudnn.enabled = False
print('cudnn.enabled set to {}'.format(torch.backends.cudnn.enabled))
# set seed for CPU
torch.manual_seed(seed)
print('setting torch-seed to {}'.format(seed))
def print_array(array, fmt='{:.2f}', end=' '):
"""Print a 1-D tuple, list, or numpy array containing digits."""
s = ''
for x in array:
s += fmt.format(float(x)) + end
s += '\n'
print(s)
return s
# Great idea from https://github.com/amdegroot/ssd.pytorch
def str2bool(v):
return v.lower() in ("yes", "true", "t", "1")
def tight_float_str(x, fmt='{:.4f}'):
return fmt.format(x).rstrip('0').rstrip('.')
def find_index(seq, item):
for i, x in enumerate(seq):
if item == x:
return i
return -1
def adjust_lr_exp(optimizer, base_lr, ep, total_ep, start_decay_at_ep):
"""Decay exponentially in the later phase of training. All parameters in the
optimizer share the same learning rate.
Args:
optimizer: a pytorch `Optimizer` object
base_lr: starting learning rate
ep: current epoch, ep >= 1
total_ep: total number of epochs to train
start_decay_at_ep: start decaying at the BEGINNING of this epoch
Example:
base_lr = 2e-4
total_ep = 300
start_decay_at_ep = 201
It means the learning rate starts at 2e-4 and begins decaying after 200
epochs. And training stops after 300 epochs.
NOTE:
It is meant to be called at the BEGINNING of an epoch.
"""
assert ep >= 1, "Current epoch number should be >= 1"
if ep < start_decay_at_ep:
return
for g in optimizer.param_groups:
g['lr'] = (base_lr * (0.001 ** (float(ep + 1 - start_decay_at_ep)
/ (total_ep + 1 - start_decay_at_ep))))
print('=====> lr adjusted to {:.10f}'.format(g['lr']).rstrip('0'))
def adjust_lr_staircase(optimizer, base_lr, ep, decay_at_epochs, factor):
"""Multiplied by a factor at the BEGINNING of specified epochs. All
parameters in the optimizer share the same learning rate.
Args:
optimizer: a pytorch `Optimizer` object
base_lr: starting learning rate
ep: current epoch, ep >= 1
decay_at_epochs: a list or tuple; learning rate is multiplied by a factor
at the BEGINNING of these epochs
factor: a number in range (0, 1)
Example:
base_lr = 1e-3
decay_at_epochs = [51, 101]
factor = 0.1
It means the learning rate starts at 1e-3 and is multiplied by 0.1 at the
BEGINNING of the 51'st epoch, and then further multiplied by 0.1 at the
BEGINNING of the 101'st epoch, then stays unchanged till the end of
training.
NOTE:
It is meant to be called at the BEGINNING of an epoch.
"""
assert ep >= 1, "Current epoch number should be >= 1"
if ep not in decay_at_epochs:
return
ind = find_index(decay_at_epochs, ep)
for g in optimizer.param_groups:
g['lr'] = base_lr * factor ** (ind + 1)
print('=====> lr adjusted to {:.10f}'.format(g['lr']).rstrip('0'))
@contextmanager
def measure_time(enter_msg):
st = time.time()
print(enter_msg)
yield
print('Done, {:.2f}s'.format(time.time() - st))
# @profile
def generate_features(appearance_model, patches):
features = []
for patch in patches:
patch = patch.unsqueeze(0)
with torch.no_grad():
feature = appearance_model(patch)
feature = feature.squeeze(0).cpu().numpy()
features.append(feature)
return features
# @profile
def generate_features_batched(appearance_model, patches, object_ids = None):
# return generate_features(appearance_model, patches) #TODO: Fix batched appearance features. This currently gives bad features
maxx = -1
maxy = -1
idxs = []
features = []
for i, patch in enumerate(patches):
if patch is None or patch.nelement()==0:
continue
maxx = max(maxx, patch.size()[1])
maxy = max(maxy, patch.size()[2])
idxs.append(i)
if(maxx==-1 and maxy==-1):
return features
batch = torch.zeros(len(idxs),3,maxx,maxy).to('cuda:1')
padding = []
for i, idx in enumerate(idxs):
patch = patches[idx]
patchx = patch.size()[1]
patchy = patch.size()[2]
batch[i,:,:patchx,:patchy] = patch
padding.append((patchx, patchy))
with torch.no_grad():
features_torch = appearance_model(batch)
# out_features = features_torch.mean()
# for feat, pad in zip(features_torch, padding):
# out_features.append(feat[:, :pad[0], :pad[1]].mean())
i = 0
ctr = 0
for idx in idxs:
while(i < idx):
features.append(None)
i+=1
features.append(features_torch[ctr])
i+=1
ctr+=1
while(i.txt are in rect camera coord.
2d box xy are in image2 coord
Points in .bin are in Velodyne coord.
y_image2 = P^2_rect * x_rect
y_image2 = P^2_rect * R0_rect * Tr_velo_to_cam * x_velo
x_ref = Tr_velo_to_cam * x_velo
x_rect = R0_rect * x_ref
P^2_rect = [f^2_u, 0, c^2_u, -f^2_u b^2_x;
0, f^2_v, c^2_v, -f^2_v b^2_y;
0, 0, 1, 0]
= K * [1|t]
image2 coord:
----> x-axis (u)
|
|
v y-axis (v)
velodyne coord:
front x, left y, up z
rect/ref camera coord:
right x, down y, front z
Ref (KITTI paper): http://www.cvlibs.net/publications/Geiger2013IJRR.pdf
TODO(rqi): do matrix multiplication only once for each projection.
'''
def __init__(self, calib_filepath):
calibs = self.read_calib_file(calib_filepath)
# Projection matrix from rect camera coord to image2 coord
self.P = calibs['P2']
self.P = np.reshape(self.P, [3,4])
self.P_torch = torch.from_numpy(self.P).float().cuda()
# Rigid transform from Velodyne coord to reference camera coord
try:
self.V2C = calibs['Tr_velo_to_cam']
except:
self.V2C = calibs['Tr_velo_cam']
self.V2C = np.reshape(self.V2C, [3,4])
self.C2V = inverse_rigid_trans(self.V2C)
# Rotation from reference camera coord to rect camera coord
try:
self.R0 = calibs['R0_rect']
except:
self.R0 = calibs['R_rect']
self.R0 = np.reshape(self.R0,[3,3])
self.R0_torch = torch.from_numpy(self.R0).float().cuda()
RA = np.zeros((4,4))
RA[:3,:3] = self.R0
RA[3,3] = 1
self.D = np.matmul(self.P,RA).T
self.D_torch = torch.from_numpy(self.D).float().cuda()
# Camera intrinsics and extrinsics
self.c_u = self.P[0,2]
self.c_v = self.P[1,2]
self.f_u = self.P[0,0]
self.f_v = self.P[1,1]
self.b_x = self.P[0,3]/(-self.f_u) # relative
self.b_y = self.P[1,3]/(-self.f_v)
def read_calib_file(self, filepath):
''' Read in a calibration file and parse into a dictionary.
Ref: https://github.com/utiasSTARS/pykitti/blob/master/pykitti/utils.py
'''
data = {}
with open(filepath, 'r') as f:
for line in f.readlines():
line = line.rstrip()
if len(line)==0: continue
key, value = line.split(' ', 1)
if key.endswith(':'):
key = key[:-1]
# The only non-float values in these files are dates, which
# we don't care about anyway
try:
data[key] = np.array([float(x) for x in value.split()])
except ValueError:
pass
return data
def read_calib_from_video(self, calib_root_dir):
''' Read calibration for camera 2 from video calib files.
there are calib_cam_to_cam and calib_velo_to_cam under the calib_root_dir
'''
data = {}
cam2cam = self.read_calib_file(os.path.join(calib_root_dir, 'calib_cam_to_cam.txt'))
velo2cam = self.read_calib_file(os.path.join(calib_root_dir, 'calib_velo_to_cam.txt'))
Tr_velo_to_cam = np.zeros((3,4))
Tr_velo_to_cam[0:3,0:3] = np.reshape(velo2cam['R'], [3,3])
Tr_velo_to_cam[:,3] = velo2cam['T']
data['Tr_velo_to_cam'] = np.reshape(Tr_velo_to_cam, [12])
data['R0_rect'] = cam2cam['R_rect_00']
data['P2'] = cam2cam['P_rect_02']
return data
def cart2hom(self, pts_3d):
''' Input: nx3 points in Cartesian
Oupput: nx4 points in Homogeneous by appending 1
'''
n = pts_3d.shape[0]
pts_3d_hom = np.hstack((pts_3d, np.ones((n,1))))
return pts_3d_hom
def cart2hom_torch(self, pts_3d):
n = pts_3d.size()[0]
pts_3d_hom = torch.cat((pts_3d, torch.ones(n,1).to("cuda:0")), 1)
return pts_3d_hom
# ===========================
# ------- 3d to 3d ----------
# ===========================
def project_velo_to_ref(self, pts_3d_velo):
pts_3d_velo = self.cart2hom(pts_3d_velo) # nx4
return np.dot(pts_3d_velo, np.transpose(self.V2C))
def project_ref_to_velo(self, pts_3d_ref):
pts_3d_ref = self.cart2hom(pts_3d_ref) # nx4
return np.dot(pts_3d_ref, np.transpose(self.C2V))
def project_rect_to_ref(self, pts_3d_rect):
''' Input and Output are nx3 points '''
return np.transpose(np.dot(np.linalg.inv(self.R0), np.transpose(pts_3d_rect)))
def project_ref_to_rect(self, pts_3d_ref):
''' Input and Output are nx3 points '''
return np.transpose(np.dot(self.R0, np.transpose(pts_3d_ref)))
def project_ref_to_rect_torch(self, pts_3d_ref):
''' Input and Output are nx3 points '''
return torch.transpose(torch.matmul(self.R0_torch, torch.transpose(pts_3d_ref,0,1)),0,1)
def project_rect_to_velo(self, pts_3d_rect):
''' Input: nx3 points in rect camera coord.
Output: nx3 points in velodyne coord.
'''
pts_3d_ref = self.project_rect_to_ref(pts_3d_rect)
return self.project_ref_to_velo(pts_3d_ref)
def project_velo_to_rect(self, pts_3d_velo):
pts_3d_ref = self.project_velo_to_ref(pts_3d_velo)
return self.project_ref_to_rect(pts_3d_ref)
# ===========================
# ------- 3d to 2d ----------
# ===========================
def project_rect_to_image(self, pts_3d_rect):
''' Input: nx3 points in rect camera coord.
Output: nx2 points in image2 coord.
'''
pts_3d_rect = self.cart2hom(pts_3d_rect)
pts_2d = np.dot(pts_3d_rect, np.transpose(self.P)) # nx3
pts_2d[:,0] /= pts_2d[:,2]
pts_2d[:,1] /= pts_2d[:,2]
return pts_2d[:,0:2]
def project_rect_to_image_torch(self, pts_3d_rect):
''' Input: nx3 points in rect camera coord.
Output: nx2 points in image2 coord.
'''
pts_3d_rect = self.cart2hom_torch(pts_3d_rect)
pts_2d = torch.matmul(pts_3d_rect, torch.transpose(self.P_torch,0,1)) # nx3
pts_2d[:,0] /= pts_2d[:,2]
pts_2d[:,1] /= pts_2d[:,2]
return pts_2d[:,0:2]
def project_ref_to_image_torch(self, pts_3d_ref):
''' Input: nx3 points in ref camera coord.
Output: nx2 points in image2 coord.
'''
pts_3d_ref = self.cart2hom_torch(pts_3d_ref)
pts_2d = torch.matmul(pts_3d_ref, self.D_torch) # nx3
pts_2d[:,0] /= pts_2d[:,2]
pts_2d[:,1] /= pts_2d[:,2]
return pts_2d[:,0:2]
def project_velo_to_image(self, pts_3d_velo):
''' Input: nx3 points in velodyne coord.
Output: nx2 points in image2 coord.
'''
pts_3d_rect = self.project_velo_to_rect(pts_3d_velo)
return self.project_rect_to_image(pts_3d_rect)
# ===========================
# ------- 2d to 3d ----------
# ===========================
def project_image_to_rect(self, uv_depth):
''' Input: nx3 first two channels are uv, 3rd channel
is depth in rect camera coord.
Output: nx3 points in rect camera coord.
'''
n = uv_depth.shape[0]
x = ((uv_depth[:,0]-self.c_u)*uv_depth[:,2])/self.f_u + self.b_x
y = ((uv_depth[:,1]-self.c_v)*uv_depth[:,2])/self.f_v + self.b_y
pts_3d_rect = np.zeros((n,3))
pts_3d_rect[:,0] = x
pts_3d_rect[:,1] = y
pts_3d_rect[:,2] = uv_depth[:,2]
return pts_3d_rect
def project_image_to_velo(self, uv_depth):
pts_3d_rect = self.project_image_to_rect(uv_depth)
return self.project_rect_to_velo(pts_3d_rect)
def rotx(t):
''' 3D Rotation about the x-axis. '''
c = np.cos(t)
s = np.sin(t)
return np.array([[1, 0, 0],
[0, c, -s],
[0, s, c]])
def roty(t):
''' Rotation about the y-axis. '''
c = np.cos(t)
s = np.sin(t)
return np.array([[c, 0, s],
[0, 1, 0],
[-s, 0, c]])
def rotz(t):
''' Rotation about the z-axis. '''
c = np.cos(t)
s = np.sin(t)
return np.array([[c, -s, 0],
[s, c, 0],
[0, 0, 1]])
def transform_from_rot_trans(R, t):
''' Transforation matrix from rotation matrix and translation vector. '''
R = R.reshape(3, 3)
t = t.reshape(3, 1)
return np.vstack((np.hstack([R, t]), [0, 0, 0, 1]))
def inverse_rigid_trans(Tr):
''' Inverse a rigid body transform matrix (3x4 as [R|t])
[R'|-R't; 0|1]
'''
inv_Tr = np.zeros_like(Tr) # 3x4
inv_Tr[0:3,0:3] = np.transpose(Tr[0:3,0:3])
inv_Tr[0:3,3] = np.dot(-np.transpose(Tr[0:3,0:3]), Tr[0:3,3])
return inv_Tr
def read_label(label_filename):
lines = [line.rstrip() for line in open(label_filename)]
objects = [Object3d(line) for line in lines]
return objects
def load_image(img_filename):
return cv2.imread(img_filename)
def load_velo_scan(velo_filename):
scan = np.fromfile(velo_filename, dtype=np.float32)
scan = scan.reshape((-1, 4))
return scan
def project_to_image(pts_3d, P):
''' Project 3d points to image plane.
Usage: pts_2d = projectToImage(pts_3d, P)
input: pts_3d: nx3 matrix
P: 3x4 projection matrix
output: pts_2d: nx2 matrix
P(3x4) dot pts_3d_extended(4xn) = projected_pts_2d(3xn)
=> normalize projected_pts_2d(2xn)
<=> pts_3d_extended(nx4) dot P'(4x3) = projected_pts_2d(nx3)
=> normalize projected_pts_2d(nx2)
'''
n = pts_3d.shape[0]
pts_3d_extend = np.hstack((pts_3d, np.ones((n,1))))
print(('pts_3d_extend shape: ', pts_3d_extend.shape))
pts_2d = np.dot(pts_3d_extend, np.transpose(P)) # nx3
pts_2d[:,0] /= pts_2d[:,2]
pts_2d[:,1] /= pts_2d[:,2]
return pts_2d[:,0:2]
def compute_box_3d(obj, P):
''' Takes an object and a projection matrix (P) and projects the 3d
bounding box into the image plane.
Returns:
corners_2d: (8,2) array in left image coord.
corners_3d: (8,3) array in in rect camera coord.
'''
# compute rotational matrix around yaw axis
R = roty(obj.ry)
# 3d bounding box dimensions
l = obj.l;
w = obj.w;
h = obj.h;
# 3d bounding box corners
x_corners = [l/2,l/2,-l/2,-l/2,l/2,l/2,-l/2,-l/2];
y_corners = [0,0,0,0,-h,-h,-h,-h];
z_corners = [w/2,-w/2,-w/2,w/2,w/2,-w/2,-w/2,w/2];
# rotate and translate 3d bounding box
corners_3d = np.dot(R, np.vstack([x_corners,y_corners,z_corners]))
#print corners_3d.shape
corners_3d[0,:] = corners_3d[0,:] + obj.t[0];
corners_3d[1,:] = corners_3d[1,:] + obj.t[1];
corners_3d[2,:] = corners_3d[2,:] + obj.t[2];
#print 'cornsers_3d: ', corners_3d
# only draw 3d bounding box for objs in front of the camera
if np.any(corners_3d[2,:]<0.1):
corners_2d = None
return corners_2d, np.transpose(corners_3d)
# project the 3d bounding box into the image plane
corners_2d = project_to_image(np.transpose(corners_3d), P);
#print 'corners_2d: ', corners_2d
return corners_2d, np.transpose(corners_3d)
def compute_orientation_3d(obj, P):
''' Takes an object and a projection matrix (P) and projects the 3d
object orientation vector into the image plane.
Returns:
orientation_2d: (2,2) array in left image coord.
orientation_3d: (2,3) array in in rect camera coord.
'''
# compute rotational matrix around yaw axis
R = roty(obj.ry)
# orientation in object coordinate system
orientation_3d = np.array([[0.0, obj.l],[0,0],[0,0]])
# rotate and translate in camera coordinate system, project in image
orientation_3d = np.dot(R, orientation_3d)
orientation_3d[0,:] = orientation_3d[0,:] + obj.t[0]
orientation_3d[1,:] = orientation_3d[1,:] + obj.t[1]
orientation_3d[2,:] = orientation_3d[2,:] + obj.t[2]
# vector behind image plane?
if np.any(orientation_3d[2,:]<0.1):
orientation_2d = None
return orientation_2d, np.transpose(orientation_3d)
# project orientation into the image plane
orientation_2d = project_to_image(np.transpose(orientation_3d), P);
return orientation_2d, np.transpose(orientation_3d)
def draw_projected_box3d(image, qs, color=(255,255,255), thickness=2):
''' Draw 3d bounding box in image
qs: (8,3) array of vertices for the 3d box in following order:
1 -------- 0
/| /|
2 -------- 3 .
| | | |
. 5 -------- 4
|/ |/
6 -------- 7
'''
qs = qs.astype(np.int32)
for k in range(0,4):
# Ref: http://docs.enthought.com/mayavi/mayavi/auto/mlab_helper_functions.html
i,j=k,(k+1)%4
# use LINE_AA for opencv3
cv2.line(image, (qs[i,0],qs[i,1]), (qs[j,0],qs[j,1]), color, thickness, cv2.CV_AA)
i,j=k+4,(k+1)%4 + 4
cv2.line(image, (qs[i,0],qs[i,1]), (qs[j,0],qs[j,1]), color, thickness, cv2.CV_AA)
i,j=k,k+4
cv2.line(image, (qs[i,0],qs[i,1]), (qs[j,0],qs[j,1]), color, thickness, cv2.CV_AA)
return image
class OmniCalibration(Calibration):
def __init__(self, calib_folder):
global_config = os.path.join(calib_folder, 'defaults.yaml')
camera_config = os.path.join(calib_folder, 'cameras.yaml')
with open(global_config) as f:
self.global_config_dict = yaml.safe_load(f)
with open(camera_config) as f:
self.camera_config_dict = yaml.safe_load(f)
self.median_focal_length_y = self.calculate_median_param_value(param = 'f_y')
self.median_optical_center_y = self.calculate_median_param_value(param = 't_y')
# image shape is (color channels, height, width)
self.img_shape = 3, self.global_config_dict['image']['height'], self.global_config_dict['image']['width']
def project_ref_to_image_torch(self, pointcloud):
theta = (torch.atan2(pointcloud[:, 0], pointcloud[:, 2]) + np.pi) %(2*np.pi)
horizontal_fraction = theta/ (2*np.pi)
x = (horizontal_fraction * self.img_shape[2]) % self.img_shape[2]
y = -self.median_focal_length_y*(pointcloud[:, 1]*torch.cos(theta)/pointcloud[:, 2]) + self.median_optical_center_y
pts_2d = torch.stack([x, y], dim=1)
return pts_2d
def project_image_to_rect(self, uvdepth):
theta = (uvdepth[:, 0]/self.img_shape[2])*2*np.pi - np.pi
z = uvdepth[:, 2]*np.cos(theta)
x = uvdepth[:, 2]*np.sin(theta)
y = z*-1*(uvdepth[:, 1] - self.median_optical_center_y)/(self.median_focal_length_y * np.cos(theta))
return np.stack([x,y,z], axis=1)
def project_velo_to_ref(self, pointcloud):
pointcloud = pointcloud[:, [1, 2, 0]]
pointcloud[:, 0] *= -1
pointcloud[:, 1] *= -1
return pointcloud
def move_lidar_to_camera_frame(self, pointcloud, upper = True):
# assumed only rotation about z axis
if upper:
pointcloud[:,:3] = \
pointcloud[:,:3] - torch.Tensor(self.global_config_dict['calibrated']
['lidar_upper_to_rgb']['translation']).type(pointcloud.type())
theta = self.global_config_dict['calibrated']['lidar_upper_to_rgb']['rotation'][-1]
else:
pointcloud[:,:3] = \
pointcloud[:,:3] - torch.Tensor(self.global_config_dict['calibrated']
['lidar_lower_to_rgb']['translation']).type(pointcloud.type())
theta = self.global_config_dict['calibrated']['lidar_lower_to_rgb']['rotation'][-1]
rotation_matrix = torch.Tensor([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]]).type(pointcloud.type())
pointcloud[:, :2] = torch.matmul(rotation_matrix, pointcloud[:, :2].unsqueeze(2)).squeeze()
pointcloud[:, :3] = self.project_velo_to_ref(pointcloud[:, :3])
return pointcloud
def calculate_median_param_value(self, param):
if param=='f_y':
idx = 4
elif param == 'f_x':
idx = 0
elif param == 't_y':
idx = 5
elif param == 't_x':
idx = 2
elif param == 's':
idx = 1
else:
raise 'Wrong parameter!'
omni_camera = ['sensor_0', 'sensor_2', 'sensor_4', 'sensor_6', 'sensor_8']
parameter_list = []
for sensor, camera_params in self.camera_config_dict['cameras'].items():
if sensor not in omni_camera:
continue
K_matrix = camera_params['K'].split(' ')
parameter_list.append(float(K_matrix[idx]))
return np.median(parameter_list)
================================================
FILE: src/combination_model.py
================================================
import pdb
import numpy as np
import torch.nn as nn
class CombiNet(nn.Module):
def __init__(self, in_dim = 2560, hidden_units = 512, out_dim = 2560):
super().__init__()
self.fc1 = nn.Linear(in_dim, 2*hidden_units)
# self.bn1 = nn.BatchNorm1d(hidden_units)
self.fc2 = nn.Linear(2*hidden_units, 2*hidden_units)
# self.bn2 = nn.BatchNorm1d(2*hidden_units)
self.fc3 = nn.Linear(2*hidden_units, out_dim)
self.relu = nn.ReLU()
self.apply(weight_init)
def forward(self, x):
out = nn.functional.normalize(x)
skip = out
out = self.fc1(x)
# out = self.bn1(out)
out = self.relu(out)
out = self.fc2(out)
# out = self.bn2(out)
out = self.relu(out)
out = self.fc3(out)
out = nn.functional.normalize(out)
out += skip
return out
class CombiLSTM(nn.Module):
def __init__(self, in_dim = 2560, hidden_units = 512, out_dim = 2560):
super().__init__()
self.in_linear1 = nn.Linear(in_dim, hidden_units)
# self.bn1 = nn.BatchNorm1d(hidden_units)
self.in_linear2 = nn.Linear(hidden_units, hidden_units)
self.rnn = nn.LSTM(input_size = hidden_units, hidden_size = hidden_units, dropout = 0)
self.out_linear1 = nn.Linear(hidden_units, hidden_units)
# self.bn2 = nn.BatchNorm1d(hidden_units)
self.out_linear2 = nn.Linear(hidden_units, out_dim)
self.relu = nn.ReLU()
self.apply(weight_init)
def forward(self, x, hidden = None):
out = nn.functional.normalize(x)
skip = out
out = self.in_linear1(out)
# out = self.bn1(out)
out = self.relu(out)
out = self.in_linear2(out)
out = out.unsqueeze(1) #Adding batch dimension
if hidden is None:
out, hidden = self.rnn(out)
else:
out, hidden = self.rnn(out, hidden)
out = out.squeeze(1) #removing batch dimension
out = self.out_linear1(out)
# out = self.bn2(out)
out = self.relu(out)
out = self.out_linear2(out)
out = nn.functional.normalize(out)
out += skip
return out, hidden
def weight_init(m):
if type(m)==nn.Linear:
nn.init.xavier_normal_(m.weight, gain=np.sqrt(2))
elif type(m)==nn.LSTM:
nn.init.xavier_normal_(m.weight_ih_l0)
nn.init.xavier_normal_(m.weight_hh_l0)
================================================
FILE: src/deep_sort_utils.py
================================================
# vim: expandtab:ts=4:sw=4
import numpy as np
import cv2
def non_max_suppression(boxes, max_bbox_overlap, scores=None):
"""Suppress overlapping detections.
Original code from [1]_ has been adapted to include confidence score.
.. [1] http://www.pyimagesearch.com/2015/02/16/
faster-non-maximum-suppression-python/
Examples
--------
>>> boxes = [d.roi for d in detections]
>>> scores = [d.confidence for d in detections]
>>> indices = non_max_suppression(boxes, max_bbox_overlap, scores)
>>> detections = [detections[i] for i in indices]
Parameters
----------
boxes : ndarray
Array of ROIs (x, y, width, height).
max_bbox_overlap : float
ROIs that overlap more than this values are suppressed.
scores : Optional[array_like]
Detector confidence score.
Returns
-------
List[int]
Returns indices of detections that have survived non-maxima suppression.
"""
if len(boxes) == 0:
return []
boxes = boxes.astype(np.float)
pick = []
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2] + boxes[:, 0]
y2 = boxes[:, 3] + boxes[:, 1]
area = (x2 - x1 + 1) * (y2 - y1 + 1)
if scores is not None:
idxs = np.argsort(scores)
else:
idxs = np.argsort(y2)
while len(idxs) > 0:
last = len(idxs) - 1
i = idxs[last]
pick.append(i)
xx1 = np.maximum(x1[i], x1[idxs[:last]])
yy1 = np.maximum(y1[i], y1[idxs[:last]])
xx2 = np.minimum(x2[i], x2[idxs[:last]])
yy2 = np.minimum(y2[i], y2[idxs[:last]])
w = np.maximum(0, xx2 - xx1 + 1)
h = np.maximum(0, yy2 - yy1 + 1)
overlap = (w * h) / (area[idxs[:last]]) # + area[idxs[last:last+1]] - w * h) #changed from deepsort to sum both areas
idxs = np.delete(
idxs, np.concatenate(
([last], np.where(overlap > max_bbox_overlap)[0])))
return pick
================================================
FILE: src/detection.py
================================================
# vim: expandtab:ts=4:sw=4
import numpy as np
class Detection(object):
"""
This class represents a bounding box detection in a single image.
Parameters
----------
tlwh : array_like
Bounding box in format `(x, y, w, h)`.
confidence : float
Detector confidence score.
feature : array_like
A feature vector that describes the object contained in this image.
Attributes
----------
tlwh : ndarray
Bounding box in format `(top left x, top left y, width, height)`.
confidence : ndarray
Detector confidence score.
feature : ndarray | NoneType
A feature vector that describes the object contained in this image.
"""
def __init__(self, tlwh, box_3d, confidence, appearance_feature, feature):
self.tlwh = np.asarray(tlwh, dtype=np.float)
# Note that detections format is centre of 3D box and dimensions (not bottom face)
self.box_3d = box_3d
if box_3d is not None:
self.box_3d[1] -= box_3d[4]/2
self.box_3d = np.asarray(box_3d, dtype=np.float32)
self.confidence = float(confidence)
self.appearance_feature = appearance_feature
if feature is not None:
self.feature = feature
else:
self.feature = None
def to_tlbr(self):
"""Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
`(top left, bottom right)`.
"""
ret = self.tlwh.copy()
ret[2:] += ret[:2]
return ret
def to_xyah(self):
"""Convert bounding box to format `(center x, center y, aspect ratio,
height)`, where the aspect ratio is `width / height`.
"""
ret = self.tlwh.copy()
ret[:2] += ret[2:] / 2
ret[2] /= ret[3]
return ret
def to_xywh(self):
"""Convert bounding box to format `(center x, center y, aspect ratio,
height)`, where the aspect ratio is `width / height`.
"""
ret = self.tlwh.copy()
ret[:2] += ret[2:] / 2
return ret
def get_3d_distance(self):
if self.box_3d is not None:
return np.sqrt(self.box_3d[0]**2 + self.box_3d[2]**2)
================================================
FILE: src/distances.py
================================================
"""py-motmetrics - metrics for multiple object tracker (MOT) benchmarking.
Christoph Heindl, 2017
https://github.com/cheind/py-motmetrics
"""
import numpy as np
import pdb
def norm2squared_matrix(objs, hyps, max_d2=float('inf')):
"""Computes the squared Euclidean distance matrix between object and hypothesis points.
Params
------
objs : NxM array
Object points of dim M in rows
hyps : KxM array
Hypothesis points of dim M in rows
Kwargs
------
max_d2 : float
Maximum tolerable squared Euclidean distance. Object / hypothesis points
with larger distance are set to np.nan signalling do-not-pair. Defaults
to +inf
Returns
-------
C : NxK array
Distance matrix containing pairwise distances or np.nan.
"""
objs = np.atleast_2d(objs).astype(float)
hyps = np.atleast_2d(hyps).astype(float)
if objs.size == 0 or hyps.size == 0:
return np.empty((0,0))
assert hyps.shape[1] == objs.shape[1], "Dimension mismatch"
C = np.empty((objs.shape[0], hyps.shape[0]))
for o in range(objs.shape[0]):
for h in range(hyps.shape[0]):
e = objs[o] - hyps[h]
C[o, h] = e.dot(e)
C[C > max_d2] = np.nan
return C
def iou_matrix(objs, hyps, max_iou=1.):
"""Computes 'intersection over union (IoU)' distance matrix between object and hypothesis rectangles.
The IoU is computed as
IoU(a,b) = 1. - isect(a, b) / union(a, b)
where isect(a,b) is the area of intersection of two rectangles and union(a, b) the area of union. The
IoU is bounded between zero and one. 0 when the rectangles overlap perfectly and 1 when the overlap is
zero.
Params
------
objs : Nx4 array
Object rectangles (x,y,w,h) in rows
hyps : Kx4 array
Hypothesis rectangles (x,y,w,h) in rows
Kwargs
------
max_iou : float
Maximum tolerable overlap distance. Object / hypothesis points
with larger distance are set to np.nan signalling do-not-pair. Defaults
to 0.5
Returns
-------
C : NxK array
Distance matrix containing pairwise distances or np.nan.
"""
objs = np.atleast_2d(objs).astype(float)
hyps = np.atleast_2d(hyps).astype(float)
if objs.size == 0 or hyps.size == 0:
return np.empty((0,0))
assert objs.shape[1] == 4
assert hyps.shape[1] == 4
br_objs = objs[:, :2] + objs[:, 2:]
br_hyps = hyps[:, :2] + hyps[:, 2:]
C = np.empty((objs.shape[0], hyps.shape[0]))
for o in range(objs.shape[0]):
for h in range(hyps.shape[0]):
isect_xy = np.maximum(objs[o, :2], hyps[h, :2])
isect_wh = np.maximum(np.minimum(br_objs[o], br_hyps[h]) - isect_xy, 0)
isect_a = isect_wh[0]*isect_wh[1]
union_a = objs[o, 2]*objs[o, 3] + hyps[h, 2]*hyps[h, 3] - isect_a
if union_a != 0:
C[o, h] = 1. - isect_a / union_a
else:
C[o, h] = np.nan
C[C > max_iou] = np.nan
return C
def find_area(vertices):
area = 0
for i in range(len(vertices)):
area += vertices[i][0]*(vertices[(i+1)%len(vertices)][1] - vertices[i-1][1])
return 0.5*abs(area)
def get_angle(p):
x, y = p
angle = np.arctan2(y,x)
if angle < 0:
angle += np.pi*2
return angle
def clip_polygon(box1, box2):
#clips box 1 by the edges in box2
x,y,z,l,h,w,theta = box2
theta = -theta
box2_edges = np.asarray([(-np.cos(theta), -np.sin(theta), l/2-x*np.cos(theta)-z*np.sin(theta)),
(-np.sin(theta), np.cos(theta), w/2-x*np.sin(theta)+z*np.cos(theta)),
(np.cos(theta), np.sin(theta), l/2+x*np.cos(theta)+z*np.sin(theta)),
(np.sin(theta), -np.cos(theta), w/2+x*np.sin(theta)-z*np.cos(theta))])
x,y,z,l,h,w,theta = box1
theta = -theta
box1_vertices = [(x+l/2*np.cos(theta)-w/2*np.sin(theta), z+l/2*np.sin(theta)+w/2*np.cos(theta)),
(x+l/2*np.cos(theta)+w/2*np.sin(theta), z+l/2*np.sin(theta)-w/2*np.cos(theta)),
(x-l/2*np.cos(theta)-w/2*np.sin(theta), z-l/2*np.sin(theta)+w/2*np.cos(theta)),
(x-l/2*np.cos(theta)+w/2*np.sin(theta), z-l/2*np.sin(theta)-w/2*np.cos(theta))]
out_vertices = box1_vertices
for edge in box2_edges:
vertex_list = out_vertices[:]
out_vertices = []
for idx, current_vertex in enumerate(vertex_list):
previous_vertex = vertex_list[idx-1]
if point_inside_edge(current_vertex, edge):
if not point_inside_edge(previous_vertex, edge):
out_vertices.append(compute_intersection_point(previous_vertex, current_vertex, edge))
out_vertices.append(current_vertex)
elif point_inside_edge(previous_vertex, edge):
out_vertices.append(compute_intersection_point(previous_vertex, current_vertex, edge))
to_remove = []
for i in range(len(out_vertices)):
if i in to_remove:
continue
for j in range(i+1, len(out_vertices)):
if abs(out_vertices[i][0] - out_vertices[j][0]) < 1e-6 and abs(out_vertices[i][1] - out_vertices[j][1]) < 1e-6:
to_remove.append(j)
out_vertices = sorted([(v[0]-x, v[1]-z) for i,v in enumerate(out_vertices) if i not in to_remove], key = lambda p: get_angle((p[0],p[1])))
return out_vertices
def compute_intersection_point(pt1, pt2, line1):
if pt1[0] == pt2[0]:
slope = np.inf
else:
slope = (pt1[1]-pt2[1])/(pt1[0] - pt2[0])
if np.isinf(slope):
line2 = (1, 0, pt1[0])
else:
line2 = (slope, -1, pt1[0]*slope-pt1[1])
# print("Line1:", line1)
# print("Line2:", line2)
if line1[1] == 0:
x = line1[2]/line1[0]
y = (line2[2] - line2[0]*x)/line2[1]
elif line1[0] == 0:
y = line1[2]/line1[1]
x = (line2[2] - line2[1]*y)/line2[0]
elif line2[1] == 0:
x = pt1[0]
y = (line1[2]-x*line1[0])/line1[1]
else:
tmp_line = (line2 - line1*(line2[1]/line1[1]))
x = tmp_line[2]/tmp_line[0]
y = (line2[2] - line2[0]*x)/line2[1]
return (x,y)
def point_inside_edge(pt, edge):
lhs = pt[0]*edge[0] + pt[1]*edge[1]
if lhs < edge[2] - 1e-6:
return True
else:
return False
def iou_matrix_3d(objs, hyps, max_iou=1.):
"""Computes 'intersection over union (IoU)' distance matrix between object and hypothesis rectangles.
The IoU is computed as
IoU(a,b) = 1. - isect(a, b) / union(a, b)
where isect(a,b) is the area of intersection of two rectangles and union(a, b) the area of union. The
IoU is bounded between zero and one. 0 when the rectangles overlap perfectly and 1 when the overlap is
zero.
Params
------
objs : Nx4 array
Object rectangles (x,y,w,h) in rows
hyps : Kx4 array
Hypothesis rectangles (x,y,w,h) in rows
Kwargs
------
max_iou : float
Maximum tolerable overlap distance. Object / hypothesis points
with larger distance are set to np.nan signalling do-not-pair. Defaults
to 0.5
Returns
-------
C : NxK array
Distance matrix containing pairwise distances or np.nan.
"""
objs = np.atleast_2d(objs).astype(float)
hyps = np.atleast_2d(hyps).astype(float)
if objs.size == 0 or hyps.size == 0:
return np.empty((0,0))
assert objs.shape[1] == 7
assert hyps.shape[1] == 7
C = np.empty((objs.shape[0], hyps.shape[0]))
for o in range(objs.shape[0]):
for h in range(hyps.shape[0]):
base_area = find_area(clip_polygon(objs[o], hyps[h]))
height = min(objs[o][1], hyps[h][1]) - max(objs[o][1] - objs[o][4], hyps[h][1]-hyps[h][4])
intersect = base_area*height
union = objs[o][3]*objs[o][4]*objs[o][5] + hyps[h][3]*hyps[h][4]*hyps[h][5] - intersect
if union != 0:
C[o, h] = 1. - intersect / union
else:
C[o, h] = np.nan
C[C > max_iou] = np.nan
return C
================================================
FILE: src/double_measurement_kf.py
================================================
import random
import numpy as np
import scipy.linalg
import EKF
import pdb
import kf_2d
import os
import pickle
import torch
from copy import deepcopy
import matplotlib.pyplot as plt
np.set_printoptions(precision=4, suppress=True)
from calibration import Calibration
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from evaluation.distances import iou_matrix
class KF_3D(kf_2d.KalmanFilter2D):
"""
3D Kalman Filter that tracks objets in 3D space
The 8-dimensional state space
x, y, z, l, h, w, theta, vx, vz
contains the bounding box center position (x, z), the heading angle theta, the
box dimensions l, w, h, and the x and z velocities.
Object motion follows a constant velocity model. The bounding box location
(x, y) is taken as direct observation of the state space (linear
observation model).
"""
def __init__(self, calib, pos_weight_3d, pos_weight, velocity_weight, theta_weight,
std_process, std_measurement_2d, std_measurement_3d,
initial_uncertainty, omni = True, debug=True):
self.ndim, self.dt = 9, 4.
# Create Kalman filter model matrices.
# Motion model is constant velocity, i.e. x = x + Vx*dt
self._motion_mat = np.eye(self.ndim, self.ndim)
self._motion_mat[0, 7] = self.dt
self._motion_mat[2, 8] = self.dt
# Sensor model is direct observation, i.e. x = x
self._observation_mat = np.eye(self.ndim - 2, self.ndim)
if omni:
self.x_constant = calib.img_shape[2]/(2*np.pi)
self.y_constant = calib.median_focal_length_y
self.calib = calib
else:
self.projection_matrix = calib.P
self.omni = omni
self._std_weight_pos_3d = pos_weight_3d
self._std_weight_pos = pos_weight
self._std_weight_vel = velocity_weight
self._std_weight_theta= theta_weight
self._std_weight_process = std_process
self._initial_uncertainty = initial_uncertainty
self._std_weight_measurement_2d = std_measurement_2d
self._std_weight_measurement_3d = std_measurement_3d
self.debug = debug
def initiate(self, measurement_3d):
mean_pos = measurement_3d
mean_vel = np.zeros((2,))
mean = np.r_[mean_pos, mean_vel]
std = [
self._std_weight_pos_3d,
self._std_weight_pos_3d * 0.15,
self._std_weight_pos_3d,
self._std_weight_pos_3d * 0.015,
self._std_weight_pos_3d * 0.015,
self._std_weight_pos_3d * 0.015,
self._std_weight_theta * 10,
self._std_weight_vel*5,
self._std_weight_vel*5]
covariance = np.diag(np.square(std))*(self._initial_uncertainty*self._std_weight_process)**2
return mean, covariance
def get_process_noise(self, mean):
std_pos = [
self._std_weight_pos_3d, # x
self._std_weight_pos_3d * 0.15, # y
self._std_weight_pos_3d, # z
self._std_weight_pos_3d * 0.015, # l
self._std_weight_pos_3d * 0.015, # h
self._std_weight_pos_3d * 0.015, # w
self._std_weight_theta # theta
]
std_vel = [
self._std_weight_vel, # x
self._std_weight_vel, # z
]
self._motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))*self._std_weight_process**2
return motion_cov
def get_2d_measurement_noise(self, measurement_2d):
# Returns Qt the sensor noise covariance
# Measurement uncertainty scaled by estimated height
std = [
self._std_weight_pos*measurement_2d[2],
self._std_weight_pos*measurement_2d[3],
self._std_weight_pos*measurement_2d[2],
self._std_weight_pos*measurement_2d[3]]
innovation_cov = np.diag(np.square(std))*self._std_weight_measurement_2d**2
return innovation_cov
def get_3d_measurement_noise(self, measurement):
# Returns Qt the sensor noise covariance
# Measurement uncertainty scaled by estimated height
std = [
self._std_weight_pos_3d, # x
self._std_weight_pos_3d * 0.5, # y
self._std_weight_pos_3d, # z
self._std_weight_pos_3d, # l
self._std_weight_pos_3d, # h
self._std_weight_pos_3d, # w
self._std_weight_theta * 25 # theta
]
innovation_cov = np.diag(np.square(std))*self._std_weight_measurement_3d**2
return innovation_cov
def gating_distance(self, mean, covariance, measurements,
only_position=False,
use_3d=True):
"""Compute gating distance between state distribution and measurements.
A suitable distance threshold can be obtained from `chi2inv95`. If
`only_position` is False, the chi-square distribution has 4 degrees of
freedom, otherwise 2.
Parameters
----------
mean : ndarray
Mean vector over the state distribution (8 dimensional).
covariance : ndarray
Covariance of the state distribution (8x8 dimensional).
measurements : ndarray
An Nx4 dimensional matrix of N measurements, each in
format (x, y, a, h) where (x, y) is the bounding box center
position, a the aspect ratio, and h the height.
only_position : Optional[bool]
If True, distance computation is done with respect to the bounding
box center position only.
Returns
-------
ndarray
Returns an array of length N, where the i-th element contains the
squared Mahalanobis distance between (mean, covariance) and
`measurements[i]`.
"""
if not use_3d:
corner_points, corner_points_3d = self.calculate_corners(mean)
H_2d = self.get_2d_measurement_matrix(mean, corner_points, corner_points_3d)
min_x, min_y = np.amin(corner_points, axis = 0)[:2]
max_x, max_y = np.amax(corner_points, axis = 0)[:2]
cov = self.project_cov_2d(mean, covariance, H_2d)
mean = np.array([min_x, min_y, max_x - min_x, max_y - min_y])
else:
mean, cov = mean[:7], self.project_cov(mean, covariance)
if only_position:
if use_3d:
mean, cov = mean[[0, 2]], np.reshape(cov[[0, 0, 2, 2], [0, 2, 0, 2]], (2,2))
measurements = measurements[:, [0, 2]]
else:
mean, cov = mean[:2], cov[:2, :2]
measurements = measurements[:, :2]
self.LIMIT=0.3
if np.amax(cov) > self.LIMIT:
cov_2 = cov * self.LIMIT / np.amax(cov)
return EKF.squared_mahalanobis_distance(mean, cov2, measurements)
def project_cov(self, mean, covariance):
# Returns S the innovation covariance (projected covariance)
measurement_noise = self.get_3d_measurement_noise(mean)
innovation_cov = (np.linalg.multi_dot((self._observation_mat, covariance,
self._observation_mat.T))
+ measurement_noise)
return innovation_cov
def project_cov_2d(self, mean, covariance, H_2d):
# Returns S the innovation covariance (projected covariance)
measurement_noise = self.get_2d_measurement_noise(mean)
innovation_cov = (np.linalg.multi_dot((H_2d, covariance,
H_2d.T))
+ measurement_noise)
return innovation_cov
# @profile
def update(self, mean, covariance, measurement_2d, measurement_3d = None, marginalization=None, JPDA=False):
"""Run Kalman filter correction step.
Parameters
----------
mean : ndarray
The predicted state's mean vector (9 dimensional).
covariance : ndarray
The state's covariance matrix (9x9 dimensional).
measurement_2d : ndarray
The 4 dimensional measurement vector (x, y, w, h), where (x, y)
is the center position, a the aspect ratio, and h the height of the
bounding box.
measurement_3d : ndarray
The 7 dimensional measurement vector (x, y, z, l, h, w, theta), where (x, y, z)
is the center bottom of the box, l, q, h are the dimensions of the bounding box
theta is the orientation angle w.r.t. the positive x axis.
Returns
-------
(ndarray, ndarray)
Returns the measurement-corrected state distribution.
"""
if np.any(np.isnan(mean)):
return mean, covariance
out_cov = deepcopy(covariance)
H_3d = self._observation_mat
do_3d = True
covariance_3d = None
post_3d_mean = mean
if measurement_3d is None:
do_3d = False
else:
for meas in measurement_3d:
if meas is None:
do_3d = False
break
if do_3d:
S_matrix = self.project_cov(mean, out_cov)
try:
chol_factor, lower = scipy.linalg.cho_factor(
S_matrix, lower=True, check_finite=False)
kalman_gain = scipy.linalg.cho_solve(
(chol_factor, lower), np.dot(out_cov, H_3d.T).T,
check_finite=False).T
except:
# in case cholesky factorization fails, revert to standard solver
kalman_gain = np.linalg.multi_dot((out_cov, H_3d.T, np.linalg.inv(S_matrix)))
out_cov -= np.linalg.multi_dot((kalman_gain, S_matrix, kalman_gain.T))
if JPDA:
innovation_3d = 0
cov_uncertainty_3d = 0
for i, detection_3d in enumerate(measurement_3d):
innovation_partial = detection_3d - mean[:7]
innovation_3d += innovation_partial * marginalization[i+1]
cov_uncertainty_3d += marginalization[i+1] * np.outer(innovation_partial, innovation_partial)
partial_cov = cov_uncertainty_3d-np.outer(innovation_3d, innovation_3d)
out_cov *= 1 - marginalization[0]
out_cov += np.linalg.multi_dot((kalman_gain, partial_cov, kalman_gain.T))
out_cov += marginalization[0]*covariance
else:
out_cov = out_cov - np.linalg.multi_dot((kalman_gain, H_3d, out_cov))
innovation_3d = measurement_3d - mean[:7]
mean = mean + np.dot(kalman_gain, innovation_3d)
post_3d_mean = mean
covariance_3d = deepcopy(out_cov)
if measurement_2d is not None:
corner_points, corner_points_3d = self.calculate_corners(mean)
H_2d = self.get_2d_measurement_matrix(mean, corner_points, corner_points_3d)
#update based on 2D
min_x, min_y = np.amin(corner_points, axis = 0)[:2]
max_x, max_y = np.amax(corner_points, axis = 0)[:2]
if min_y < 0:
min_y = 0
if max_y >= self.calib.img_shape[1]:
max_y = self.calib.img_shape[1] - 1
S_matrix = self.project_cov_2d(np.array([min_x, min_y, max_x - min_x, max_y - min_y]), out_cov, H_2d)
try:
chol_factor, lower = scipy.linalg.cho_factor(
S_matrix, lower=True, check_finite=False)
kalman_gain = scipy.linalg.cho_solve(
(chol_factor, lower), np.dot(out_cov, H_2d.T).T,
check_finite=False).T
except:
# in case cholesky factorization fails, revert to standard solver
kalman_gain = np.linalg.multi_dot((out_cov, H_2d.T, np.linalg.inv(S_matrix)))
out_cov = np.dot(np.eye(*out_cov.shape)-np.dot(kalman_gain, H_2d), out_cov)
if JPDA:
innovation_2d = 0
cov_uncertainty_2d = 0
for i, detection_2d in enumerate(measurement_2d):
innovation_partial = detection_2d[:4] - np.array([min_x, min_y, max_x - min_x, max_y - min_y])
innovation_2d += innovation_partial * marginalization[i+1] # +1 to account for dummy node
cov_uncertainty_2d += marginalization[i+1] * np.outer(innovation_partial, innovation_partial)
partial_cov = cov_uncertainty_2d-np.outer(innovation_2d, innovation_2d)
out_cov *= 1 - marginalization[0]
out_cov += np.linalg.multi_dot((kalman_gain, partial_cov, kalman_gain.T))
if covariance_3d is None:
out_cov += marginalization[0]*covariance
else:
out_cov += marginalization[0]*covariance_3d
else:
innovation_2d = measurement_2d[:4] - np.array([min_x, min_y, max_x - min_x, max_y - min_y])
mean = mean + np.dot(kalman_gain, innovation_2d)
if self.debug:
return mean, out_cov, post_3d_mean
return mean, out_cov
# @profile
def get_2d_measurement_matrix(self, mean, corner_points, corner_points_3d):
min_x = np.inf
min_x_idx = None
max_x = -np.inf
max_x_idx = None
min_y = np.inf
min_y_idx = None
max_y = -np.inf
max_y_idx = None
for idx, pt in enumerate(corner_points):
if pt[0] < min_x:
min_x_idx = idx
min_x = pt[0]
if pt[0] > max_x:
max_x_idx = idx
max_x = pt[0]
if pt[1] < min_y:
min_y_idx = idx
min_y = pt[1]
if pt[1] > max_y:
max_y_idx = idx
max_y = pt[1]
if self.omni:
jac_x = np.dot(self.jacobian_omni(corner_points_3d[min_x_idx])[0], self.corner_jacobian(mean, min_x_idx))
jac_y = np.dot(self.jacobian_omni(corner_points_3d[min_y_idx])[1], self.corner_jacobian(mean, min_y_idx))
jac_w = np.dot(self.jacobian_omni(corner_points_3d[max_x_idx])[0], self.corner_jacobian(mean, max_x_idx)) - jac_x
jac_h = np.dot(self.jacobian_omni(corner_points_3d[max_y_idx])[1], self.corner_jacobian(mean, max_y_idx)) - jac_y
else:
jac_x = np.dot(self.jacobian(corner_points_3d[min_x_idx])[0], self.corner_jacobian(mean, min_x_idx))
jac_y = np.dot(self.jacobian(corner_points_3d[min_y_idx])[1], self.corner_jacobian(mean, min_y_idx))
jac_w = np.dot(self.jacobian(corner_points_3d[max_x_idx])[0], self.corner_jacobian(mean, max_x_idx)) - jac_x
jac_h = np.dot(self.jacobian(corner_points_3d[max_y_idx])[1], self.corner_jacobian(mean, max_y_idx)) - jac_y
jac = np.vstack([jac_x, jac_y, jac_w, jac_h])
jac = np.hstack([jac, np.zeros((jac.shape[0], 2))])
return jac
# Jacobian for projective transformation
def jacobian(self, pt_3d):
pt_2d = self.project_2d(pt_3d[None, :])
den = np.sum(self.projection_matrix[2] * pt_3d)
dxy = (self.projection_matrix[0:2] - self.projection_matrix[2:3] * pt_2d.T)/den
return dxy[:, :3]
def jacobian_omni(self, pt_3d):
jac = np.zeros((2, 3))
x, y, z = pt_3d[0], pt_3d[1], pt_3d[2]
denominator = (x**2 + z**2)
jac[0, 0] = self.x_constant*(z/denominator)
jac[0, 2] = -self.x_constant*(x/denominator)
jac[1, 0] = self.y_constant*x*y/denominator
jac[1, 1] = -self.y_constant
jac[1,2] = self.y_constant*z*y/denominator
jac[1, :] /= np.sqrt(denominator)
return jac
def calculate_corners(self, box):
x,y,z,l,h,w,theta = box[:7]
pt_3d = []
x_delta_1 = np.cos(theta)*l/2+np.sin(theta)*w/2
x_delta_2 = np.cos(theta)*l/2 - np.sin(theta)*w/2
z_delta_1 = np.sin(theta)*l/2-np.cos(theta)*w/2
z_delta_2 = np.sin(theta)*l/2+np.cos(theta)*w/2
pt_3d.append((x+x_delta_1, y + h/2, z+z_delta_1, 1))
pt_3d.append((x+x_delta_2, y + h/2, z+z_delta_2, 1))
pt_3d.append((x-x_delta_2, y + h/2, z-z_delta_2, 1))
pt_3d.append((x-x_delta_1, y + h/2, z-z_delta_1, 1))
pt_3d.append((x+x_delta_1, y - h/2, z+z_delta_1, 1))
pt_3d.append((x+x_delta_2, y - h/2, z+z_delta_2, 1))
pt_3d.append((x-x_delta_2, y - h/2, z-z_delta_2, 1))
pt_3d.append((x-x_delta_1, y - h/2, z-z_delta_1, 1))
pts_3d = np.vstack(pt_3d)
pts_2d = self.project_2d(pts_3d)
return pts_2d, pts_3d
def corner_jacobian(self, pt_3d, corner_idx):
_, _, _, l, _, w, theta = pt_3d[:7]
jac = np.eye(3,7)
jac[1, 4] = 0.5 if corner_idx < 4 else -0.5
jac[0, 3] = 0.5*np.sin(theta) if corner_idx % 4 < 2 else -0.5*np.sin(theta)
jac[0, 5] = 0.5*np.cos(theta) if corner_idx % 2 == 0 else -0.5*np.cos(theta)
jac[2, 3] = 0.5*np.cos(theta) if corner_idx%4 < 2 else -0.5*np.cos(theta)
jac[2, 5] = 0.5*np.sin(theta) if corner_idx%2 == 0 else -0.5*np.sin(theta)
if corner_idx%4 == 0:
jac[0, 6] = -np.sin(theta)*l/2 + np.cos(theta)*w/2
jac[2, 6] = np.cos(theta)*l/2 + np.sin(theta)*w/2
elif corner_idx%4==1:
jac[0, 6] = -np.sin(theta)*l/2 - np.cos(theta)*w/2
jac[2, 6] = np.cos(theta)*l/2 - np.sin(theta)*w/2
elif corner_idx%4==2:
jac[0, 6] = +np.sin(theta)*l/2 + np.cos(theta)*w/2
jac[2, 6] = -np.cos(theta)*l/2 + np.sin(theta)*w/2
else:
jac[0, 6] = +np.sin(theta)*l/2 - np.cos(theta)*w/2
jac[2, 6] = -np.cos(theta)*l/2 - np.sin(theta)*w/2
return jac
def project_2d(self, pts_3d):
if self.omni:
pts_2d = np.array(self.calib.project_ref_to_image_torch(torch.from_numpy(pts_3d)))
else:
pts_2d = np.dot(pts_3d, self.projection_matrix.T)
pts_2d /= np.expand_dims(pts_2d[:, 2], 1)
for pt in pts_2d:
if pt[1] > self.calib.img_shape[1]:
pt[1] = self.calib.img_shape[1]
elif pt[1] < 0:
pt[1] = 0
# min_x = np.argmin(pts_2d[:, 0])
# max_x = np.argmax(pts_2d[:, 0])
# if abs(min_x - max_x) > 1800:
# # wrap around!
# pts_2d[min_x], pts_2d[max_x] = pts_2d[max_x], pts_2d[min_x]
# pts_2d[max_x, 0] += self.calib.img_shape[2]
return pts_2d[:, :2]
def swap(detections_3d, iou, idx, swap_prob = 0):
if random.random() > swap_prob:
return detections_3d[idx]
else:
iou_row = iou[idx]
iou_row[idx] = -1
max_idx = np.argmax(iou_row)
if iou_row[max_idx] > 0.4:
# print("SWAP")
return detections_3d[max_idx]
else:
return detections_3d[idx]
================================================
FILE: src/evaluation/__init__.py
================================================
================================================
FILE: src/evaluation/distances 2.py
================================================
"""py-motmetrics - metrics for multiple object tracker (MOT) benchmarking.
Christoph Heindl, 2017
https://github.com/cheind/py-motmetrics
"""
import numpy as np
import pdb
def norm2squared_matrix(objs, hyps, max_d2=float('inf')):
"""Computes the squared Euclidean distance matrix between object and hypothesis points.
Params
------
objs : NxM array
Object points of dim M in rows
hyps : KxM array
Hypothesis points of dim M in rows
Kwargs
------
max_d2 : float
Maximum tolerable squared Euclidean distance. Object / hypothesis points
with larger distance are set to np.nan signalling do-not-pair. Defaults
to +inf
Returns
-------
C : NxK array
Distance matrix containing pairwise distances or np.nan.
"""
objs = np.atleast_2d(objs).astype(float)
hyps = np.atleast_2d(hyps).astype(float)
if objs.size == 0 or hyps.size == 0:
return np.empty((0,0))
assert hyps.shape[1] == objs.shape[1], "Dimension mismatch"
C = np.empty((objs.shape[0], hyps.shape[0]))
for o in range(objs.shape[0]):
for h in range(hyps.shape[0]):
e = objs[o] - hyps[h]
C[o, h] = e.dot(e)
C[C > max_d2] = np.nan
return C
def iou_matrix(objs, hyps, max_iou=1.):
"""Computes 'intersection over union (IoU)' distance matrix between object and hypothesis rectangles.
The IoU is computed as
IoU(a,b) = 1. - isect(a, b) / union(a, b)
where isect(a,b) is the area of intersection of two rectangles and union(a, b) the area of union. The
IoU is bounded between zero and one. 0 when the rectangles overlap perfectly and 1 when the overlap is
zero.
Params
------
objs : Nx4 array
Object rectangles (x,y,w,h) in rows
hyps : Kx4 array
Hypothesis rectangles (x,y,w,h) in rows
Kwargs
------
max_iou : float
Maximum tolerable overlap distance. Object / hypothesis points
with larger distance are set to np.nan signalling do-not-pair. Defaults
to 0.5
Returns
-------
C : NxK array
Distance matrix containing pairwise distances or np.nan.
"""
objs = np.atleast_2d(objs).astype(float)
hyps = np.atleast_2d(hyps).astype(float)
if objs.size == 0 or hyps.size == 0:
return np.empty((0,0))
assert objs.shape[1] == 4
assert hyps.shape[1] == 4
br_objs = objs[:, :2] + objs[:, 2:]
br_hyps = hyps[:, :2] + hyps[:, 2:]
C = np.empty((objs.shape[0], hyps.shape[0]))
for o in range(objs.shape[0]):
for h in range(hyps.shape[0]):
isect_xy = np.maximum(objs[o, :2], hyps[h, :2])
isect_wh = np.maximum(np.minimum(br_objs[o], br_hyps[h]) - isect_xy, 0)
isect_a = isect_wh[0]*isect_wh[1]
union_a = objs[o, 2]*objs[o, 3] + hyps[h, 2]*hyps[h, 3] - isect_a
if union_a != 0:
C[o, h] = 1. - isect_a / union_a
else:
C[o, h] = np.nan
C[C > max_iou] = np.nan
return C
def find_area(vertices):
area = 0
for i in range(len(vertices)):
area += vertices[i][0]*(vertices[(i+1)%len(vertices)][1] - vertices[i-1][1])
return 0.5*abs(area)
def get_angle(p):
x, y = p
angle = np.arctan2(y,x)
if angle < 0:
angle += np.pi*2
return angle
def clip_polygon(box1, box2):
#clips box 1 by the edges in box2
x,y,z,l,h,w,theta = box2
theta = -theta
box2_edges = np.asarray([(-np.cos(theta), -np.sin(theta), l/2-x*np.cos(theta)-z*np.sin(theta)),
(-np.sin(theta), np.cos(theta), w/2-x*np.sin(theta)+z*np.cos(theta)),
(np.cos(theta), np.sin(theta), l/2+x*np.cos(theta)+z*np.sin(theta)),
(np.sin(theta), -np.cos(theta), w/2+x*np.sin(theta)-z*np.cos(theta))])
x,y,z,l,h,w,theta = box1
theta = -theta
box1_vertices = [(x+l/2*np.cos(theta)-w/2*np.sin(theta), z+l/2*np.sin(theta)+w/2*np.cos(theta)),
(x+l/2*np.cos(theta)+w/2*np.sin(theta), z+l/2*np.sin(theta)-w/2*np.cos(theta)),
(x-l/2*np.cos(theta)-w/2*np.sin(theta), z-l/2*np.sin(theta)+w/2*np.cos(theta)),
(x-l/2*np.cos(theta)+w/2*np.sin(theta), z-l/2*np.sin(theta)-w/2*np.cos(theta))]
out_vertices = sort_points(box1_vertices, (x, z))
for edge in box2_edges:
vertex_list = out_vertices.copy()
out_vertices = []
for idx, current_vertex in enumerate(vertex_list):
previous_vertex = vertex_list[idx-1]
if point_inside_edge(current_vertex, edge):
if not point_inside_edge(previous_vertex, edge):
out_vertices.append(compute_intersection_point(previous_vertex, current_vertex, edge))
out_vertices.append(current_vertex)
elif point_inside_edge(previous_vertex, edge):
out_vertices.append(compute_intersection_point(previous_vertex, current_vertex, edge))
to_remove = []
for i in range(len(out_vertices)):
if i in to_remove:
continue
for j in range(i+1, len(out_vertices)):
if abs(out_vertices[i][0] - out_vertices[j][0]) < 1e-6 and abs(out_vertices[i][1] - out_vertices[j][1]) < 1e-6:
to_remove.append(j)
out_vertices = sorted([(v[0]-x, v[1]-z) for i,v in enumerate(out_vertices) if i not in to_remove], key = lambda p: get_angle((p[0],p[1])))
return out_vertices
def sort_points(pts, center):
x, z = center
sorted_pts = sorted([(i, (v[0]-x, v[1]-z)) for i,v in enumerate(pts)], key = lambda p: get_angle((p[1][0],p[1][1])))
idx, _ = zip(*sorted_pts)
return [pts[i] for i in idx]
def compute_intersection_point(pt1, pt2, line1):
if pt1[0] == pt2[0]:
slope = np.inf
else:
slope = (pt1[1]-pt2[1])/(pt1[0] - pt2[0])
if np.isinf(slope):
line2 = (1, 0, pt1[0])
else:
line2 = (slope, -1, pt1[0]*slope-pt1[1])
# print("Line1:", line1)
# print("Line2:", line2)
if line1[1] == 0:
x = line1[2]/line1[0]
y = (line2[2] - line2[0]*x)/line2[1]
elif line1[0] == 0:
y = line1[2]/line1[1]
x = (line2[2] - line2[1]*y)/line2[0]
elif line2[1] == 0:
x = pt1[0]
y = (line1[2]-x*line1[0])/line1[1]
else:
tmp_line = (line2 - line1*(line2[1]/line1[1]))
x = tmp_line[2]/tmp_line[0]
y = (line2[2] - line2[0]*x)/line2[1]
return (x,y)
def point_inside_edge(pt, edge):
lhs = pt[0]*edge[0] + pt[1]*edge[1]
if lhs < edge[2] - 1e-6:
return True
else:
return False
def iou_matrix_3d(objs, hyps, max_iou=1.):
"""Computes 'intersection over union (IoU)' distance matrix between object and hypothesis rectangles.
The IoU is computed as
IoU(a,b) = 1. - isect(a, b) / union(a, b)
where isect(a,b) is the area of intersection of two rectangles and union(a, b) the area of union. The
IoU is bounded between zero and one. 0 when the rectangles overlap perfectly and 1 when the overlap is
zero.
Params
------
objs : Nx4 array
Object rectangles (x,y,w,h) in rows
hyps : Kx4 array
Hypothesis rectangles (x,y,w,h) in rows
Kwargs
------
max_iou : float
Maximum tolerable overlap distance. Object / hypothesis points
with larger distance are set to np.nan signalling do-not-pair. Defaults
to 0.5
Returns
-------
C : NxK array
Distance matrix containing pairwise distances or np.nan.
"""
objs = np.atleast_2d(objs).astype(float)
hyps = np.atleast_2d(hyps).astype(float)
if objs.size == 0 or hyps.size == 0:
return np.empty((0,0))
assert objs.shape[1] == 7
assert hyps.shape[1] == 7
C = np.empty((objs.shape[0], hyps.shape[0]))
for o in range(objs.shape[0]):
for h in range(hyps.shape[0]):
base_area = find_area(clip_polygon(objs[o], hyps[h]))
height = max(objs[o][1], hyps[h][1]) - min(objs[o][1] - objs[o][4], hyps[h][1]-hyps[h][4])
intersect = base_area*height
union = objs[o][3]*objs[o][4]*objs[o][5] + hyps[h][3]*hyps[h][4]*hyps[h][5] - intersect
if union != 0:
C[o, h] = 1. - intersect / union
else:
C[o, h] = np.nan
C[C > max_iou] = np.nan
return C
================================================
FILE: src/evaluation/distances.py
================================================
"""py-motmetrics - metrics for multiple object tracker (MOT) benchmarking.
Christoph Heindl, 2017
https://github.com/cheind/py-motmetrics
"""
import numpy as np
import pdb
def norm2squared_matrix(objs, hyps, max_d2=float('inf')):
"""Computes the squared Euclidean distance matrix between object and hypothesis points.
Params
------
objs : NxM array
Object points of dim M in rows
hyps : KxM array
Hypothesis points of dim M in rows
Kwargs
------
max_d2 : float
Maximum tolerable squared Euclidean distance. Object / hypothesis points
with larger distance are set to np.nan signalling do-not-pair. Defaults
to +inf
Returns
-------
C : NxK array
Distance matrix containing pairwise distances or np.nan.
"""
objs = np.atleast_2d(objs).astype(float)
hyps = np.atleast_2d(hyps).astype(float)
if objs.size == 0 or hyps.size == 0:
return np.empty((0,0))
assert hyps.shape[1] == objs.shape[1], "Dimension mismatch"
C = np.empty((objs.shape[0], hyps.shape[0]))
for o in range(objs.shape[0]):
for h in range(hyps.shape[0]):
e = objs[o] - hyps[h]
C[o, h] = e.dot(e)
C[C > max_d2] = np.nan
return C
def iou_matrix(objs, hyps, max_iou=1.):
"""Computes 'intersection over union (IoU)' distance matrix between object and hypothesis rectangles.
The IoU is computed as
IoU(a,b) = 1. - isect(a, b) / union(a, b)
where isect(a,b) is the area of intersection of two rectangles and union(a, b) the area of union. The
IoU is bounded between zero and one. 0 when the rectangles overlap perfectly and 1 when the overlap is
zero.
Params
------
objs : Nx4 array
Object rectangles (x,y,w,h) in rows
hyps : Kx4 array
Hypothesis rectangles (x,y,w,h) in rows
Kwargs
------
max_iou : float
Maximum tolerable overlap distance. Object / hypothesis points
with larger distance are set to np.nan signalling do-not-pair. Defaults
to 0.5
Returns
-------
C : NxK array
Distance matrix containing pairwise distances or np.nan.
"""
objs = np.atleast_2d(objs).astype(float)
hyps = np.atleast_2d(hyps).astype(float)
if objs.size == 0 or hyps.size == 0:
return np.empty((0,0))
assert objs.shape[1] == 4
assert hyps.shape[1] == 4
br_objs = objs[:, :2] + objs[:, 2:]
br_hyps = hyps[:, :2] + hyps[:, 2:]
C = np.empty((objs.shape[0], hyps.shape[0]))
for o in range(objs.shape[0]):
for h in range(hyps.shape[0]):
isect_xy = np.maximum(objs[o, :2], hyps[h, :2])
isect_wh = np.maximum(np.minimum(br_objs[o], br_hyps[h]) - isect_xy, 0)
isect_a = isect_wh[0]*isect_wh[1]
union_a = objs[o, 2]*objs[o, 3] + hyps[h, 2]*hyps[h, 3] - isect_a
if union_a != 0:
C[o, h] = 1. - isect_a / union_a
else:
C[o, h] = np.nan
C[C > max_iou] = np.nan
return C
def find_area(vertices):
area = 0
for i in range(len(vertices)):
area += vertices[i][0]*(vertices[(i+1)%len(vertices)][1] - vertices[i-1][1])
return 0.5*abs(area)
def get_angle(p):
x, y = p
angle = np.arctan2(y,x)
if angle < 0:
angle += np.pi*2
return angle
def clip_polygon(box1, box2):
#clips box 1 by the edges in box2
x,y,z,l,h,w,theta = box2
theta = -theta
box2_edges = np.asarray([(-np.cos(theta), -np.sin(theta), l/2-x*np.cos(theta)-z*np.sin(theta)),
(-np.sin(theta), np.cos(theta), w/2-x*np.sin(theta)+z*np.cos(theta)),
(np.cos(theta), np.sin(theta), l/2+x*np.cos(theta)+z*np.sin(theta)),
(np.sin(theta), -np.cos(theta), w/2+x*np.sin(theta)-z*np.cos(theta))])
x,y,z,l,h,w,theta = box1
theta = -theta
box1_vertices = [(x+l/2*np.cos(theta)-w/2*np.sin(theta), z+l/2*np.sin(theta)+w/2*np.cos(theta)),
(x+l/2*np.cos(theta)+w/2*np.sin(theta), z+l/2*np.sin(theta)-w/2*np.cos(theta)),
(x-l/2*np.cos(theta)-w/2*np.sin(theta), z-l/2*np.sin(theta)+w/2*np.cos(theta)),
(x-l/2*np.cos(theta)+w/2*np.sin(theta), z-l/2*np.sin(theta)-w/2*np.cos(theta))]
out_vertices = sort_points(box1_vertices, (x, z))
for edge in box2_edges:
vertex_list = out_vertices.copy()
out_vertices = []
for idx, current_vertex in enumerate(vertex_list):
previous_vertex = vertex_list[idx-1]
if point_inside_edge(current_vertex, edge):
if not point_inside_edge(previous_vertex, edge):
out_vertices.append(compute_intersection_point(previous_vertex, current_vertex, edge))
out_vertices.append(current_vertex)
elif point_inside_edge(previous_vertex, edge):
out_vertices.append(compute_intersection_point(previous_vertex, current_vertex, edge))
to_remove = []
for i in range(len(out_vertices)):
if i in to_remove:
continue
for j in range(i+1, len(out_vertices)):
if abs(out_vertices[i][0] - out_vertices[j][0]) < 1e-6 and abs(out_vertices[i][1] - out_vertices[j][1]) < 1e-6:
to_remove.append(j)
out_vertices = sorted([(v[0]-x, v[1]-z) for i,v in enumerate(out_vertices) if i not in to_remove], key = lambda p: get_angle((p[0],p[1])))
return out_vertices
def sort_points(pts, center):
x, z = center
sorted_pts = sorted([(i, (v[0]-x, v[1]-z)) for i,v in enumerate(pts)], key = lambda p: get_angle((p[1][0],p[1][1])))
idx, _ = zip(*sorted_pts)
return [pts[i] for i in idx]
def compute_intersection_point(pt1, pt2, line1):
if pt1[0] == pt2[0]:
slope = np.inf
else:
slope = (pt1[1]-pt2[1])/(pt1[0] - pt2[0])
if np.isinf(slope):
line2 = (1, 0, pt1[0])
else:
line2 = (slope, -1, pt1[0]*slope-pt1[1])
# print("Line1:", line1)
# print("Line2:", line2)
if line1[1] == 0:
x = line1[2]/line1[0]
y = (line2[2] - line2[0]*x)/line2[1]
elif line1[0] == 0:
y = line1[2]/line1[1]
x = (line2[2] - line2[1]*y)/line2[0]
elif line2[1] == 0:
x = pt1[0]
y = (line1[2]-x*line1[0])/line1[1]
else:
tmp_line = (line2 - line1*(line2[1]/line1[1]))
x = tmp_line[2]/tmp_line[0]
y = (line2[2] - line2[0]*x)/line2[1]
return (x,y)
def point_inside_edge(pt, edge):
lhs = pt[0]*edge[0] + pt[1]*edge[1]
if lhs < edge[2] - 1e-6:
return True
else:
return False
def iou_matrix_3d(objs, hyps, max_iou=1.):
"""Computes 'intersection over union (IoU)' distance matrix between object and hypothesis rectangles.
The IoU is computed as
IoU(a,b) = 1. - isect(a, b) / union(a, b)
where isect(a,b) is the area of intersection of two rectangles and union(a, b) the area of union. The
IoU is bounded between zero and one. 0 when the rectangles overlap perfectly and 1 when the overlap is
zero.
Params
------
objs : Nx4 array
Object rectangles (x,y,w,h) in rows
hyps : Kx4 array
Hypothesis rectangles (x,y,w,h) in rows
Kwargs
------
max_iou : float
Maximum tolerable overlap distance. Object / hypothesis points
with larger distance are set to np.nan signalling do-not-pair. Defaults
to 0.5
Returns
-------
C : NxK array
Distance matrix containing pairwise distances or np.nan.
"""
objs = np.atleast_2d(objs).astype(float)
hyps = np.atleast_2d(hyps).astype(float)
if objs.size == 0 or hyps.size == 0:
return np.empty((0,0))
assert objs.shape[1] == 7
assert hyps.shape[1] == 7
C = np.empty((objs.shape[0], hyps.shape[0]))
for o in range(objs.shape[0]):
for h in range(hyps.shape[0]):
base_area = find_area(clip_polygon(objs[o], hyps[h]))
height = max(objs[o][1], hyps[h][1]) - min(objs[o][1] - objs[o][4], hyps[h][1]-hyps[h][4])
intersect = base_area*height
union = objs[o][3]*objs[o][4]*objs[o][5] + hyps[h][3]*hyps[h][4]*hyps[h][5] - intersect
if union != 0:
C[o, h] = 1. - intersect / union
else:
C[o, h] = np.nan
C[C > max_iou] = np.nan
return C
================================================
FILE: src/featurepointnet_model.py
================================================
import os, pdb
import numpy as np
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)
import configparser
import featurepointnet_tf_util as tf_util
import featurepointnet_model_util as model_util
from calibration import Calibration, OmniCalibration
batch_size = 6 #TODO: Update if needed?
class FPointNet():
def __init__(self, config_path):
parser = configparser.SafeConfigParser()
parser.read(config_path)
self.num_point = parser.getint('general', 'num_point')
self.model_path = parser.get('general', 'model_path')
with tf.device('/gpu:'+str('0')):
pointclouds_pl, one_hot_vec_pl, labels_pl, centers_pl, \
heading_class_label_pl, heading_residual_label_pl, \
size_class_label_pl, size_residual_label_pl = model_util.placeholder_inputs(batch_size, self.num_point)
is_training_pl = tf.placeholder(tf.bool, shape=())
end_points, depth_feature = self.get_model(pointclouds_pl, one_hot_vec_pl, is_training_pl)
self.object_pointcloud = tf.placeholder(tf.float32, shape=(None, None, 3))
#depth_feature = self.get_depth_feature_op(is_training_pl)
loss = model_util.get_loss(labels_pl, centers_pl, heading_class_label_pl, heading_residual_label_pl, size_class_label_pl, size_residual_label_pl, end_points)
self.saver = tf.train.Saver()
# Create a session
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.allow_soft_placement = True
self.sess = tf.Session(config=config)
#Initialize variables
self.sess.run(tf.global_variables_initializer())
# Restore variables from disk.
self.saver.restore(self.sess, self.model_path)
self.ops = {'pointclouds_pl': pointclouds_pl,
'one_hot_vec_pl': one_hot_vec_pl,
'labels_pl': labels_pl,
'centers_pl': centers_pl,
'heading_class_label_pl': heading_class_label_pl,
'heading_residual_label_pl': heading_residual_label_pl,
'size_class_label_pl': size_class_label_pl,
'size_residual_label_pl': size_residual_label_pl,
'is_training_pl': is_training_pl,
'logits': end_points['mask_logits'],
'center': end_points['center'],
'end_points': end_points,
'depth_feature':depth_feature,
'loss': loss}
# @profile
def __call__(self, input_point_cloud, rot_angle, peds=False):
'''
one_hot_vec = np.zeros((batch_size, 3))
feed_dict = {self.pointclouds_pl: input_point_cloud,
self.one_hot_vec_pl: one_hot_vec,
self.is_training_pl: False}
features = self.sess.run(self.feature,feed_dict=feed_dict)
return features '''
''' Run inference for frustum pointnets in batch mode '''
one_hot_vec = np.zeros((batch_size,3))
if peds:
one_hot_vec[:, 1] = 1
num_batches = input_point_cloud.shape[0]//batch_size + 1
num_inputs = input_point_cloud.shape[0]
if input_point_cloud.shape[0]%batch_size !=0:
input_point_cloud = np.vstack([input_point_cloud, np.zeros((batch_size - input_point_cloud.shape[0]%batch_size, self.num_point, 4))])
else:
num_batches -= 1
logits = np.zeros((input_point_cloud.shape[0], input_point_cloud.shape[1], 2))
centers = np.zeros((input_point_cloud.shape[0], 3))
heading_logits = np.zeros((input_point_cloud.shape[0], model_util.NUM_HEADING_BIN))
heading_residuals = np.zeros((input_point_cloud.shape[0], model_util.NUM_HEADING_BIN))
size_logits = np.zeros((input_point_cloud.shape[0], model_util.NUM_SIZE_CLUSTER))
size_residuals = np.zeros((input_point_cloud.shape[0], model_util.NUM_SIZE_CLUSTER, 3))
scores = np.zeros((input_point_cloud.shape[0],)) # 3D box score
features = np.zeros((input_point_cloud.shape[0], 512))
for i in range(num_batches):
ep = self.ops['end_points']
feed_dict = {\
self.ops['pointclouds_pl']: input_point_cloud[i*batch_size: (i+1)*batch_size],
self.ops['one_hot_vec_pl']: one_hot_vec,
self.ops['is_training_pl']: False}
batch_centers, \
batch_heading_scores, batch_heading_residuals, \
batch_size_scores, batch_size_residuals, batch_features = \
self.sess.run([self.ops['center'],
ep['heading_scores'], ep['heading_residuals'],
ep['size_scores'], ep['size_residuals'], self.ops['depth_feature']],
feed_dict=feed_dict)
# logits[i*batch_size: (i+1)*batch_size] = batch_logits
centers[i*batch_size: (i+1)*batch_size] = batch_centers
heading_logits[i*batch_size: (i+1)*batch_size] = batch_heading_scores
heading_residuals[i*batch_size: (i+1)*batch_size] = batch_heading_residuals
size_logits[i*batch_size: (i+1)*batch_size] = batch_size_scores
size_residuals[i*batch_size: (i+1)*batch_size] = batch_size_residuals
features[i*batch_size: (i+1)*batch_size] = batch_features[:,0,:]
heading_cls = np.argmax(heading_logits, 1) # B
size_cls = np.argmax(size_logits, 1) # B
heading_res = np.vstack([heading_residuals[i, heading_cls[i]] for i in range(heading_cls.shape[0])])
size_res = np.vstack([size_residuals[i, size_cls[i], :] for i in range(size_cls.shape[0])])
#TODO: Make this accept batches if wanted
boxes = []
for i in range(num_inputs):
box = np.array(model_util.from_prediction_to_label_format(centers[i], heading_cls[i], heading_res[i], size_cls[i], size_res[i], rot_angle[i]))
box[6] = np.squeeze(box[6])
swp = box[5]
box[5] = box[4]
box[4] = swp
boxes.append(box)
boxes = np.vstack(boxes)
return boxes, scores[:num_inputs], features[:num_inputs]
def get_instance_seg_v1_net(self, point_cloud, one_hot_vec, is_training, bn_decay, end_points):
''' 3D instance segmentation PointNet v1 network.
Input:
point_cloud: TF tensor in shape (B,N,4)
frustum point clouds with XYZ and intensity in point channels
XYZs are in frustum coordinate
one_hot_vec: TF tensor in shape (B,3)
length-3 vectors indicating predicted object type
is_training: TF boolean scalar
bn_decay: TF float scalar
end_points: dict
Output:
logits: TF tensor in shape (B,N,2), scores for bkg/clutter and object
end_points: dict
'''
num_point = point_cloud.get_shape()[1].value
net = tf.expand_dims(point_cloud, 2)
net = tf_util.conv2d(net, 64, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv1', bn_decay=bn_decay)
net = tf_util.conv2d(net, 64, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv2', bn_decay=bn_decay)
point_feat = tf_util.conv2d(net, 64, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv3', bn_decay=bn_decay)
net = tf_util.conv2d(point_feat, 128, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv4', bn_decay=bn_decay)
net = tf_util.conv2d(net, 1024, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv5', bn_decay=bn_decay)
global_feat = tf_util.max_pool2d(net, [num_point,1],
padding='VALID', scope='maxpool')
global_feat = tf.concat([global_feat, tf.expand_dims(tf.expand_dims(one_hot_vec, 1), 1)], axis=3)
global_feat_expand = tf.tile(global_feat, [1, num_point, 1, 1])
concat_feat = tf.concat(axis=3, values=[point_feat, global_feat_expand])
net = tf_util.conv2d(concat_feat, 512, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv6', bn_decay=bn_decay)
net = tf_util.conv2d(net, 256, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv7', bn_decay=bn_decay)
net = tf_util.conv2d(net, 128, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv8', bn_decay=bn_decay)
net = tf_util.conv2d(net, 128, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv9', bn_decay=bn_decay)
net = tf_util.dropout(net, is_training, 'dp1', keep_prob=0.5)
logits = tf_util.conv2d(net, 2, [1,1],
padding='VALID', stride=[1,1], activation_fn=None,
scope='conv10')
logits = tf.squeeze(logits, [2]) # BxNxC
return logits, end_points
def get_3d_box_estimation_v1_net(self, object_point_cloud, one_hot_vec,is_training, bn_decay, end_points):
''' 3D Box Estimation PointNet v1 network.
Input:
object_point_cloud: TF tensor in shape (B,M,C)
point clouds in object coordinate
one_hot_vec: TF tensor in shape (B,3)
length-3 vectors indicating predicted object type
Output:
output: TF tensor in shape (B,3+NUM_HEADING_BIN*2+NUM_SIZE_CLUSTER*4)
including box centers, heading bin class scores and residuals,
and size cluster scores and residuals
'''
num_point = object_point_cloud.get_shape()[1].value
net = tf.expand_dims(object_point_cloud, 2)
net = tf_util.conv2d(net, 128, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv-reg1', bn_decay=bn_decay)
net = tf_util.conv2d(net, 128, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv-reg2', bn_decay=bn_decay)
net = tf_util.conv2d(net, 256, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv-reg3', bn_decay=bn_decay)
net = tf_util.conv2d(net, 512, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv-reg4', bn_decay=bn_decay)
features = tf.reduce_max(net, axis = 1)
net = tf_util.max_pool2d(net, [num_point,1],
padding='VALID', scope='maxpool2')
net = tf.squeeze(net, axis=[1,2])
net = tf.concat([net, one_hot_vec], axis=1)
net = tf_util.fully_connected(net, 512, scope='fc1', bn=True,
is_training=is_training, bn_decay=bn_decay)
net = tf_util.fully_connected(net, 256, scope='fc2', bn=True,
is_training=is_training, bn_decay=bn_decay)
# The first 3 numbers: box center coordinates (cx,cy,cz),
# the next NUM_HEADING_BIN*2: heading bin class scores and bin residuals
# next NUM_SIZE_CLUSTER*4: box cluster scores and residuals
output = tf_util.fully_connected(net,
3+model_util.NUM_HEADING_BIN*2+model_util.NUM_SIZE_CLUSTER*4, activation_fn=None, scope='fc3')
return output, end_points, features
def get_model(self, point_cloud, one_hot_vec, is_training, bn_decay=None):
''' Frustum PointNets model. The model predict 3D object masks and
amodel bounding boxes for objects in frustum point clouds.
Input:
point_cloud: TF tensor in shape (B,N,4)
frustum point clouds with XYZ and intensity in point channels
XYZs are in frustum coordinate
one_hot_vec: TF tensor in shape (B,3)
length-3 vectors indicating predicted object type
is_training: TF boolean scalar
bn_decay: TF float scalar
Output:
end_points: dict (map from name strings to TF tensors)
'''
end_points = {}
# 3D Instance Segmentation PointNet
logits, end_points = self.get_instance_seg_v1_net(\
point_cloud, one_hot_vec,
is_training, bn_decay, end_points)
end_points['mask_logits'] = logits
# Masking
# select masked points and translate to masked points' centroid
object_point_cloud_xyz, mask_xyz_mean, end_points = \
model_util.point_cloud_masking(point_cloud, logits, end_points)
# T-Net and coordinate translation
center_delta, end_points = model_util.get_center_regression_net(\
object_point_cloud_xyz, one_hot_vec,
is_training, bn_decay, end_points)
stage1_center = center_delta + mask_xyz_mean # Bx3
end_points['stage1_center'] = stage1_center
# Get object point cloud in object coordinate
object_point_cloud_xyz_new = \
object_point_cloud_xyz - tf.expand_dims(center_delta, 1)
# Amodel Box Estimation PointNet
output, end_points, features = self.get_3d_box_estimation_v1_net(\
object_point_cloud_xyz_new, one_hot_vec,
is_training, bn_decay, end_points)
# Parse output to 3D box parameters
end_points = model_util.parse_output_to_tensors(output, end_points)
end_points['center'] = end_points['center_boxnet'] + stage1_center # Bx3
return end_points, features
def get_depth_feature_op(self, is_training):
net = tf.expand_dims(self.object_pointcloud, 2)
net = tf_util.conv2d(net, 128, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv-reg1', bn_decay=None)
net = tf_util.conv2d(net, 128, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv-reg2', bn_decay=None)
net = tf_util.conv2d(net, 256, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv-reg3', bn_decay=None)
net = tf_util.conv2d(net, 512, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv-reg4', bn_decay=None)
net = tf.reduce_max(net, axis = 1)
return net
def get_depth_feature(self, object_pointcloud):
feed_dict = {self.object_pointcloud:object_pointcloud, self.ops['is_training_pl']:False}
depth_feature = self.sess.run([self.ops['depth_feature']], feed_dict = feed_dict)
return depth_feature
def softmax(self, x):
''' Numpy function for softmax'''
shape = x.shape
probs = np.exp(x - np.max(x, axis=len(shape)-1, keepdims=True))
probs /= np.sum(probs, axis=len(shape)-1, keepdims=True)
return probs
def create_depth_model(model, config_path):
#Note that folder path must be the folder containing the config.yaml file if omni_camera is True
if model == 'FPointNet':
return FPointNet(config_path)
elif model == 'PointNet':
return PointNet(config_path)
================================================
FILE: src/featurepointnet_model_util.py
================================================
# import open3d as o3d
import numpy as np
import tensorflow as tf
import os
import sys
import torch
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(BASE_DIR)
import featurepointnet_tf_util as tf_util
# -----------------
# Global Constants
# -----------------
NUM_HEADING_BIN = 12
NUM_SIZE_CLUSTER = 8 # one cluster for each type
NUM_OBJECT_POINT = 512
g_type2class={'Car':0, 'Van':1, 'Truck':2, 'Pedestrian':3,
'Person_sitting':4, 'Cyclist':5, 'Tram':6, 'Misc':7}
g_class2type = {g_type2class[t]:t for t in g_type2class}
g_type2onehotclass = {'Car': 0, 'Pedestrian': 1, 'Cyclist': 2}
#Added 0.5m and 0.2m for car and pedestrian to make boxes slightly bigger
g_type_mean_size = {'Car': np.array([3.88311640418,1.62856739989,1.52563191462]),
'Van': np.array([5.06763659,1.9007158,2.20532825]),
'Truck': np.array([10.13586957,2.58549199,3.2520595]),
'Pedestrian': np.array([0.84422524,0.66068622,1.76255119]),
'Person_sitting': np.array([0.80057803,0.5983815,1.27450867]),
'Cyclist': np.array([1.76282397,0.59706367,1.73698127]),
'Tram': np.array([16.17150617,2.53246914,3.53079012]),
'Misc': np.array([3.64300781,1.54298177,1.92320313])}
g_mean_size_arr = np.zeros((NUM_SIZE_CLUSTER, 3)) # size clustrs
for i in range(NUM_SIZE_CLUSTER):
g_mean_size_arr[i,:] = g_type_mean_size[g_class2type[i]]
# -----------------
# TF Functions Helpers
# -----------------
def tf_gather_object_pc(point_cloud, mask, npoints=512):
''' Gather object point clouds according to predicted masks.
Input:
point_cloud: TF tensor in shape (B,N,C)
mask: TF tensor in shape (B,N) of 0 (not pick) or 1 (pick)
npoints: int scalar, maximum number of points to keep (default: 512)
Output:
object_pc: TF tensor in shape (B,npoint,C)
indices: TF int tensor in shape (B,npoint,2)
'''
def mask_to_indices(mask):
indices = np.zeros((mask.shape[0], npoints, 2), dtype=np.int32)
for i in range(mask.shape[0]):
pos_indices = np.where(mask[i,:]>0.5)[0]
# skip cases when pos_indices is empty
if len(pos_indices) > 0:
if len(pos_indices) > npoints:
choice = np.random.choice(len(pos_indices),
npoints, replace=False)
else:
choice = np.random.choice(len(pos_indices),
npoints-len(pos_indices), replace=True)
choice = np.concatenate((np.arange(len(pos_indices)), choice))
np.random.shuffle(choice)
indices[i,:,1] = pos_indices[choice]
indices[i,:,0] = i
return indices
indices = tf.py_func(mask_to_indices, [mask], tf.int32)
object_pc = tf.gather_nd(point_cloud, indices)
return object_pc, indices
def get_box3d_corners_helper(centers, headings, sizes):
""" TF layer. Input: (N,3), (N,), (N,3), Output: (N,8,3) """
#print '-----', centers
N = centers.get_shape()[0].value
l = tf.slice(sizes, [0,0], [-1,1]) # (N,1)
w = tf.slice(sizes, [0,1], [-1,1]) # (N,1)
h = tf.slice(sizes, [0,2], [-1,1]) # (N,1)
#print l,w,h
x_corners = tf.concat([l/2,l/2,-l/2,-l/2,l/2,l/2,-l/2,-l/2], axis=1) # (N,8)
y_corners = tf.concat([h/2,h/2,h/2,h/2,-h/2,-h/2,-h/2,-h/2], axis=1) # (N,8)
z_corners = tf.concat([w/2,-w/2,-w/2,w/2,w/2,-w/2,-w/2,w/2], axis=1) # (N,8)
corners = tf.concat([tf.expand_dims(x_corners,1), tf.expand_dims(y_corners,1), tf.expand_dims(z_corners,1)], axis=1) # (N,3,8)
#print x_corners, y_corners, z_corners
c = tf.cos(headings)
s = tf.sin(headings)
ones = tf.ones([N], dtype=tf.float32)
zeros = tf.zeros([N], dtype=tf.float32)
row1 = tf.stack([c,zeros,s], axis=1) # (N,3)
row2 = tf.stack([zeros,ones,zeros], axis=1)
row3 = tf.stack([-s,zeros,c], axis=1)
R = tf.concat([tf.expand_dims(row1,1), tf.expand_dims(row2,1), tf.expand_dims(row3,1)], axis=1) # (N,3,3)
#print row1, row2, row3, R, N
corners_3d = tf.matmul(R, corners) # (N,3,8)
corners_3d += tf.tile(tf.expand_dims(centers,2), [1,1,8]) # (N,3,8)
corners_3d = tf.transpose(corners_3d, perm=[0,2,1]) # (N,8,3)
return corners_3d
def get_box3d_corners(center, heading_residuals, size_residuals):
""" TF layer.
Inputs:
center: (B,3)
heading_residuals: (B,NH)
size_residuals: (B,NS,3)
Outputs:
box3d_corners: (B,NH,NS,8,3) tensor
"""
batch_size = center.get_shape()[0].value
heading_bin_centers = tf.constant(np.arange(0,2*np.pi,2*np.pi/NUM_HEADING_BIN), dtype=tf.float32) # (NH,)
headings = heading_residuals + tf.expand_dims(heading_bin_centers, 0) # (B,NH)
mean_sizes = tf.expand_dims(tf.constant(g_mean_size_arr, dtype=tf.float32), 0) + size_residuals # (B,NS,1)
sizes = mean_sizes + size_residuals # (B,NS,3)
sizes = tf.tile(tf.expand_dims(sizes,1), [1,NUM_HEADING_BIN,1,1]) # (B,NH,NS,3)
headings = tf.tile(tf.expand_dims(headings,-1), [1,1,NUM_SIZE_CLUSTER]) # (B,NH,NS)
centers = tf.tile(tf.expand_dims(tf.expand_dims(center,1),1), [1,NUM_HEADING_BIN, NUM_SIZE_CLUSTER,1]) # (B,NH,NS,3)
N = batch_size*NUM_HEADING_BIN*NUM_SIZE_CLUSTER
corners_3d = get_box3d_corners_helper(tf.reshape(centers, [N,3]), tf.reshape(headings, [N]), tf.reshape(sizes, [N,3]))
return tf.reshape(corners_3d, [batch_size, NUM_HEADING_BIN, NUM_SIZE_CLUSTER, 8, 3])
def huber_loss(error, delta):
abs_error = tf.abs(error)
quadratic = tf.minimum(abs_error, delta)
linear = (abs_error - quadratic)
losses = 0.5 * quadratic**2 + delta * linear
return tf.reduce_mean(losses)
def parse_output_to_tensors(output, end_points):
''' Parse batch output to separate tensors (added to end_points)
Input:
output: TF tensor in shape (B,3+2*NUM_HEADING_BIN+4*NUM_SIZE_CLUSTER)
end_points: dict
Output:
end_points: dict (updated)
'''
batch_size = output.get_shape()[0].value
center = tf.slice(output, [0,0], [-1,3])
end_points['center_boxnet'] = center
heading_scores = tf.slice(output, [0,3], [-1,NUM_HEADING_BIN])
heading_residuals_normalized = tf.slice(output, [0,3+NUM_HEADING_BIN],
[-1,NUM_HEADING_BIN])
end_points['heading_scores'] = heading_scores # BxNUM_HEADING_BIN
end_points['heading_residuals_normalized'] = \
heading_residuals_normalized # BxNUM_HEADING_BIN (-1 to 1)
end_points['heading_residuals'] = \
heading_residuals_normalized * (np.pi/NUM_HEADING_BIN) # BxNUM_HEADING_BIN
size_scores = tf.slice(output, [0,3+NUM_HEADING_BIN*2],
[-1,NUM_SIZE_CLUSTER]) # BxNUM_SIZE_CLUSTER
size_residuals_normalized = tf.slice(output,
[0,3+NUM_HEADING_BIN*2+NUM_SIZE_CLUSTER], [-1,NUM_SIZE_CLUSTER*3])
size_residuals_normalized = tf.reshape(size_residuals_normalized,
[batch_size, NUM_SIZE_CLUSTER, 3]) # BxNUM_SIZE_CLUSTERx3
end_points['size_scores'] = size_scores
end_points['size_residuals_normalized'] = size_residuals_normalized
end_points['size_residuals'] = size_residuals_normalized * \
tf.expand_dims(tf.constant(g_mean_size_arr, dtype=tf.float32), 0)
return end_points
# -----------------
# Box Parsing Helpers
# -----------------
def from_prediction_to_label_format(center, angle_class, angle_res,\
size_class, size_res, rot_angle):
''' Convert predicted box parameters to label format. '''
l,w,h = class2size(size_class, size_res)
ry = class2angle(angle_class, angle_res, NUM_HEADING_BIN) + rot_angle
tx,ty,tz = rotate_pc_along_y(np.expand_dims(center,0),-rot_angle).squeeze()
ty += h/2.0
return tx,ty,tz,l,w,h,ry
def size2class(size, type_name):
''' Convert 3D bounding box size to template class and residuals.
todo (rqi): support multiple size clusters per type.
Input:
size: numpy array of shape (3,) for (l,w,h)
type_name: string
Output:
size_class: int scalar
size_residual: numpy array of shape (3,)
'''
size_class = g_type2class[type_name]
size_residual = size - g_type_mean_size[type_name]
return size_class, size_residual
def class2size(pred_cls, residual):
''' Inverse function to size2class. '''
mean_size = g_type_mean_size[g_class2type[pred_cls]]
return mean_size + residual
def angle2class(angle, num_class):
''' Convert continuous angle to discrete class and residual.
Input:
angle: rad scalar, from 0-2pi (or -pi~pi), class center at
0, 1*(2pi/N), 2*(2pi/N) ... (N-1)*(2pi/N)
num_class: int scalar, number of classes N
Output:
class_id, int, among 0,1,...,N-1
residual_angle: float, a number such that
class*(2pi/N) + residual_angle = angle
'''
angle = angle%(2*np.pi)
assert(angle>=0 and angle<=2*np.pi)
angle_per_class = 2*np.pi/float(num_class)
shifted_angle = (angle+angle_per_class/2)%(2*np.pi)
class_id = int(shifted_angle/angle_per_class)
residual_angle = shifted_angle - \
(class_id * angle_per_class + angle_per_class/2)
return class_id, residual_angle
def class2angle(pred_cls, residual, num_class, to_label_format=True):
''' Inverse function to angle2class.
If to_label_format, adjust angle to the range as in labels.
'''
angle_per_class = 2*np.pi/float(num_class)
angle_center = pred_cls * angle_per_class
angle = angle_center + residual
if to_label_format and angle>np.pi:
angle = angle - 2*np.pi
return angle
def rotate_pc_along_y(pc, rot_angle):
'''
Input:
pc: numpy array (N,C), first 3 channels are XYZ
z is facing forward, x is left ward, y is downward
rot_angle: rad scalar
Output:
pc: updated pc with XYZ rotated
'''
cosval = np.cos(rot_angle)
sinval = np.sin(rot_angle)
rotmat = np.array([[cosval, -sinval],[sinval, cosval]])
pc[:,[0,2]] = np.dot(pc[:,[0,2]], np.transpose(rotmat))
return pc
def rotate_pc_along_y_torch(pc, rot_angle):
'''
Input:
pc: numpy array (N,C), first 3 channels are XYZ
z is facing forward, x is left ward, y is downward
rot_angle: rad scalar
Output:
pc: updated pc with XYZ rotated
'''
rotmats = []
for angle in rot_angle:
cosval = np.cos(angle)
sinval = np.sin(angle)
rotmat = torch.Tensor([[cosval, sinval],[-sinval, cosval]]).type(pc.type())
rotmats.append(rotmat)
rotmats = torch.stack(rotmats, dim=0)
pc[:, :,[0,2]] = torch.bmm(pc[:, :,[0,2]], rotmats)
return pc
# --------------------------------------
# Shared subgraphs for v1 and v2 models
# --------------------------------------
def placeholder_inputs(batch_size, num_point):
''' Get useful placeholder tensors.
Input:
batch_size: scalar int
num_point: scalar int
Output:
TF placeholders for inputs and ground truths
'''
pointclouds_pl = tf.placeholder(tf.float32,
shape=(batch_size, num_point, 4))
one_hot_vec_pl = tf.placeholder(tf.float32, shape=(batch_size, 3))
# labels_pl is for segmentation label
labels_pl = tf.placeholder(tf.int32, shape=(batch_size, num_point))
centers_pl = tf.placeholder(tf.float32, shape=(batch_size, 3))
heading_class_label_pl = tf.placeholder(tf.int32, shape=(batch_size,))
heading_residual_label_pl = tf.placeholder(tf.float32, shape=(batch_size,))
size_class_label_pl = tf.placeholder(tf.int32, shape=(batch_size,))
size_residual_label_pl = tf.placeholder(tf.float32, shape=(batch_size,3))
return pointclouds_pl, one_hot_vec_pl, labels_pl, centers_pl, \
heading_class_label_pl, heading_residual_label_pl, \
size_class_label_pl, size_residual_label_pl
def point_cloud_masking(point_cloud, logits, end_points, xyz_only=True):
''' Select point cloud with predicted 3D mask,
translate coordinates to the masked points centroid.
Input:
point_cloud: TF tensor in shape (B,N,C)
logits: TF tensor in shape (B,N,2)
end_points: dict
xyz_only: boolean, if True only return XYZ channels
Output:
object_point_cloud: TF tensor in shape (B,M,3)
for simplicity we only keep XYZ here
M = NUM_OBJECT_POINT as a hyper-parameter
mask_xyz_mean: TF tensor in shape (B,3)
'''
batch_size = point_cloud.get_shape()[0].value
num_point = point_cloud.get_shape()[1].value
mask = tf.slice(logits,[0,0,0],[-1,-1,1]) < \
tf.slice(logits,[0,0,1],[-1,-1,1])
mask = tf.to_float(mask) # BxNx1
mask_count = tf.tile(tf.reduce_sum(mask,axis=1,keep_dims=True),
[1,1,3]) # Bx1x3
point_cloud_xyz = tf.slice(point_cloud, [0,0,0], [-1,-1,3]) # BxNx3
mask_xyz_mean = tf.reduce_sum(tf.tile(mask, [1,1,3])*point_cloud_xyz,
axis=1, keep_dims=True) # Bx1x3
mask = tf.squeeze(mask, axis=[2]) # BxN
end_points['mask'] = mask
mask_xyz_mean = mask_xyz_mean/tf.maximum(mask_count,1) # Bx1x3
# Translate to masked points' centroid
point_cloud_xyz_stage1 = point_cloud_xyz - \
tf.tile(mask_xyz_mean, [1,num_point,1])
if xyz_only:
point_cloud_stage1 = point_cloud_xyz_stage1
else:
point_cloud_features = tf.slice(point_cloud, [0,0,3], [-1,-1,-1])
point_cloud_stage1 = tf.concat(\
[point_cloud_xyz_stage1, point_cloud_features], axis=-1)
num_channels = point_cloud_stage1.get_shape()[2].value
object_point_cloud, _ = tf_gather_object_pc(point_cloud_stage1,
mask, NUM_OBJECT_POINT)
object_point_cloud.set_shape([batch_size, NUM_OBJECT_POINT, num_channels])
return object_point_cloud, tf.squeeze(mask_xyz_mean, axis=1), end_points
def get_center_regression_net(object_point_cloud, one_hot_vec,
is_training, bn_decay, end_points):
''' Regression network for center delta. a.k.a. T-Net.
Input:
object_point_cloud: TF tensor in shape (B,M,C)
point clouds in 3D mask coordinate
one_hot_vec: TF tensor in shape (B,3)
length-3 vectors indicating predicted object type
Output:
predicted_center: TF tensor in shape (B,3)
'''
num_point = object_point_cloud.get_shape()[1].value
net = tf.expand_dims(object_point_cloud, 2)
net = tf_util.conv2d(net, 128, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv-reg1-stage1', bn_decay=bn_decay)
net = tf_util.conv2d(net, 128, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv-reg2-stage1', bn_decay=bn_decay)
net = tf_util.conv2d(net, 256, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv-reg3-stage1', bn_decay=bn_decay)
net = tf_util.max_pool2d(net, [num_point,1],
padding='VALID', scope='maxpool-stage1')
net = tf.squeeze(net, axis=[1,2])
net = tf.concat([net, one_hot_vec], axis=1)
net = tf_util.fully_connected(net, 256, scope='fc1-stage1', bn=True,
is_training=is_training, bn_decay=bn_decay)
net = tf_util.fully_connected(net, 128, scope='fc2-stage1', bn=True,
is_training=is_training, bn_decay=bn_decay)
predicted_center = tf_util.fully_connected(net, 3, activation_fn=None,
scope='fc3-stage1')
return predicted_center, end_points
def softmax(x):
''' Numpy function for softmax'''
shape = x.shape
probs = np.exp(x - np.max(x, axis=len(shape)-1, keepdims=True))
probs /= np.sum(probs, axis=len(shape)-1, keepdims=True)
return probs
def get_loss(mask_label, center_label, \
heading_class_label, heading_residual_label, \
size_class_label, size_residual_label, \
end_points, \
corner_loss_weight=10.0, \
box_loss_weight=1.0):
''' Loss functions for 3D object detection.
Input:
mask_label: TF int32 tensor in shape (B,N)
center_label: TF tensor in shape (B,3)
heading_class_label: TF int32 tensor in shape (B,)
heading_residual_label: TF tensor in shape (B,)
size_class_label: TF tensor int32 in shape (B,)
size_residual_label: TF tensor tensor in shape (B,)
end_points: dict, outputs from our model
corner_loss_weight: float scalar
box_loss_weight: float scalar
Output:
total_loss: TF scalar tensor
the total_loss is also added to the losses collection
'''
# 3D Segmentation loss
mask_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(\
logits=end_points['mask_logits'], labels=mask_label))
tf.summary.scalar('3d mask loss', mask_loss)
# Center regression losses
center_dist = tf.norm(center_label - end_points['center'], axis=-1)
center_loss = huber_loss(center_dist, delta=2.0)
tf.summary.scalar('center loss', center_loss)
stage1_center_dist = tf.norm(center_label - \
end_points['stage1_center'], axis=-1)
stage1_center_loss = huber_loss(stage1_center_dist, delta=1.0)
tf.summary.scalar('stage1 center loss', stage1_center_loss)
# Heading loss
heading_class_loss = tf.reduce_mean( \
tf.nn.sparse_softmax_cross_entropy_with_logits( \
logits=end_points['heading_scores'], labels=heading_class_label))
tf.summary.scalar('heading class loss', heading_class_loss)
hcls_onehot = tf.one_hot(heading_class_label,
depth=NUM_HEADING_BIN,
on_value=1, off_value=0, axis=-1) # BxNUM_HEADING_BIN
heading_residual_normalized_label = \
heading_residual_label / (np.pi/NUM_HEADING_BIN)
heading_residual_normalized_loss = huber_loss(tf.reduce_sum( \
end_points['heading_residuals_normalized']*tf.to_float(hcls_onehot), axis=1) - \
heading_residual_normalized_label, delta=1.0)
tf.summary.scalar('heading residual normalized loss',
heading_residual_normalized_loss)
# Size loss
size_class_loss = tf.reduce_mean( \
tf.nn.sparse_softmax_cross_entropy_with_logits( \
logits=end_points['size_scores'], labels=size_class_label))
tf.summary.scalar('size class loss', size_class_loss)
scls_onehot = tf.one_hot(size_class_label,
depth=NUM_SIZE_CLUSTER,
on_value=1, off_value=0, axis=-1) # BxNUM_SIZE_CLUSTER
scls_onehot_tiled = tf.tile(tf.expand_dims( \
tf.to_float(scls_onehot), -1), [1,1,3]) # BxNUM_SIZE_CLUSTERx3
predicted_size_residual_normalized = tf.reduce_sum( \
end_points['size_residuals_normalized']*scls_onehot_tiled, axis=[1]) # Bx3
mean_size_arr_expand = tf.expand_dims( \
tf.constant(g_mean_size_arr, dtype=tf.float32),0) # 1xNUM_SIZE_CLUSTERx3
mean_size_label = tf.reduce_sum( \
scls_onehot_tiled * mean_size_arr_expand, axis=[1]) # Bx3
size_residual_label_normalized = size_residual_label / mean_size_label
size_normalized_dist = tf.norm( \
size_residual_label_normalized - predicted_size_residual_normalized,
axis=-1)
size_residual_normalized_loss = huber_loss(size_normalized_dist, delta=1.0)
tf.summary.scalar('size residual normalized loss',
size_residual_normalized_loss)
# Corner loss
# We select the predicted corners corresponding to the
# GT heading bin and size cluster.
corners_3d = get_box3d_corners(end_points['center'],
end_points['heading_residuals'],
end_points['size_residuals']) # (B,NH,NS,8,3)
gt_mask = tf.tile(tf.expand_dims(hcls_onehot, 2), [1,1,NUM_SIZE_CLUSTER]) * \
tf.tile(tf.expand_dims(scls_onehot,1), [1,NUM_HEADING_BIN,1]) # (B,NH,NS)
corners_3d_pred = tf.reduce_sum( \
tf.to_float(tf.expand_dims(tf.expand_dims(gt_mask,-1),-1)) * corners_3d,
axis=[1,2]) # (B,8,3)
heading_bin_centers = tf.constant( \
np.arange(0,2*np.pi,2*np.pi/NUM_HEADING_BIN), dtype=tf.float32) # (NH,)
heading_label = tf.expand_dims(heading_residual_label,1) + \
tf.expand_dims(heading_bin_centers, 0) # (B,NH)
heading_label = tf.reduce_sum(tf.to_float(hcls_onehot)*heading_label, 1)
mean_sizes = tf.expand_dims( \
tf.constant(g_mean_size_arr, dtype=tf.float32), 0) # (1,NS,3)
size_label = mean_sizes + \
tf.expand_dims(size_residual_label, 1) # (1,NS,3) + (B,1,3) = (B,NS,3)
size_label = tf.reduce_sum( \
tf.expand_dims(tf.to_float(scls_onehot),-1)*size_label, axis=[1]) # (B,3)
corners_3d_gt = get_box3d_corners_helper( \
center_label, heading_label, size_label) # (B,8,3)
corners_3d_gt_flip = get_box3d_corners_helper( \
center_label, heading_label+np.pi, size_label) # (B,8,3)
corners_dist = tf.minimum(tf.norm(corners_3d_pred - corners_3d_gt, axis=-1),
tf.norm(corners_3d_pred - corners_3d_gt_flip, axis=-1))
corners_loss = huber_loss(corners_dist, delta=1.0)
tf.summary.scalar('corners loss', corners_loss)
# Weighted sum of all losses
total_loss = mask_loss + box_loss_weight * (center_loss + \
heading_class_loss + size_class_loss + \
heading_residual_normalized_loss*20 + \
size_residual_normalized_loss*20 + \
stage1_center_loss + \
corner_loss_weight*corners_loss)
tf.add_to_collection('losses', total_loss)
return total_loss
def get_lidar_in_image_fov(pc_velo, calib, xmin, ymin, xmax, ymax,
clip_distance=40.0):
''' Filter lidar points, keep those in image FOV '''
#pts_2d = calib.project_rect_to_image(calib.project_ref_to_rect(pc_velo))
#pts_2d = calib.project_rect_to_image_torch(calib.project_ref_to_rect_torch(torch.from_numpy(pc_velo).cuda()))
pts_2d = calib.project_ref_to_image_torch(pc_velo)
fov_inds = (pts_2d[:,0]=xmin) & \
(pts_2d[:,1]=ymin)
# fov_inds = fov_inds & (pc_velo[:,2]=xmin) & \
(pc_image_coord[:,1]=ymin)
pc_in_box_fov = point_cloud[box_fov_inds,:]
if omni:
frustum_angle = ((xmin+xmax)/2.0) /calib.img_shape[2] * (2 * np.pi) - np.pi/2
else:
box_center = np.array([xmax+xmin, ymin+ymax])/2
uvdepth = np.zeros((1,3))
uvdepth[0,0:2] = box_center
uvdepth[0,2] = 20 # some random depth
box2d_center_rect = calib.project_image_to_rect(uvdepth)
frustum_angle = np.pi/2 - np.arctan2(box2d_center_rect[0,2],
box2d_center_rect[0,0])
rot_angles.append(frustum_angle)
if len(pc_in_box_fov) num_point:
idx = np.random.choice(range(pc_in_box_fov.shape[0]), size = (num_point), replace=False)
pc_in_box_fov = pc_in_box_fov[idx].unsqueeze(0)
else:
idx = np.random.choice(range(pc_in_box_fov.shape[0]), size = (num_point-pc_in_box_fov.shape[0]), replace=True)
pc_in_box_fov = torch.cat([pc_in_box_fov, pc_in_box_fov[idx]], dim=0).unsqueeze(0)
point_clouds.append(pc_in_box_fov)
point_clouds = torch.cat(point_clouds, dim=0)
point_clouds = rotate_pc_along_y_torch(point_clouds, rot_angles)
return point_clouds, rot_angles, ids_3d
# @profile
def generate_detections_3d(detector, detections_2d, point_cloud, calib, img_shape, peds=False, omni=False):
_, img_height, img_width = img_shape
pc_image_coord, img_fov_inds = get_lidar_in_image_fov(point_cloud[:,:3], calib, 0, 0, img_width, img_height)
pc_image_coord = pc_image_coord[img_fov_inds,:]
point_cloud = point_cloud[img_fov_inds,:]
point_cloud_frustrums, rot_angles, ids_3d = preprocess_pointcloud(detections_2d, point_cloud, pc_image_coord, calib, num_point = detector.num_point, omni=omni)
point_cloud_frustrums = point_cloud_frustrums.cpu().numpy()
boxes_3d, scores_3d, depth_features = detector(point_cloud_frustrums, np.asarray(rot_angles), peds)
for i in range(len(ids_3d)):
if ids_3d[i] == -1 or np.isnan(scores_3d[i]):
boxes_3d[i] = None
ids_3d[i] = -1
return boxes_3d, ids_3d, rot_angles, scores_3d, depth_features, point_cloud_frustrums
def convert_depth_features(depth_features_orig, ids_3d):
depth_features = []
for i, depth_feature_orig in enumerate(depth_features_orig):
if depth_feature_orig is None or ids_3d[i] == -1:
depth_features.append(None)
else:
depth_features.append(depth_feature_orig)
return depth_features
================================================
FILE: src/featurepointnet_tf_util.py
================================================
""" Wrapper functions for TensorFlow layers.
Author: Charles R. Qi
Date: November 2017
"""
import numpy as np
import tensorflow as tf
def _variable_on_cpu(name, shape, initializer, use_fp16=False):
"""Helper to create a Variable stored on CPU memory.
Args:
name: name of the variable
shape: list of ints
initializer: initializer for Variable
Returns:
Variable Tensor
"""
with tf.device("/cpu:0"):
dtype = tf.float16 if use_fp16 else tf.float32
var = tf.get_variable(name, shape, initializer=initializer, dtype=dtype)
return var
def _variable_with_weight_decay(name, shape, stddev, wd, use_xavier=True):
"""Helper to create an initialized Variable with weight decay.
Note that the Variable is initialized with a truncated normal distribution.
A weight decay is added only if one is specified.
Args:
name: name of the variable
shape: list of ints
stddev: standard deviation of a truncated Gaussian
wd: add L2Loss weight decay multiplied by this float. If None, weight
decay is not added for this Variable.
use_xavier: bool, whether to use xavier initializer
Returns:
Variable Tensor
"""
if use_xavier:
initializer = tf.contrib.layers.xavier_initializer()
else:
initializer = tf.truncated_normal_initializer(stddev=stddev)
var = _variable_on_cpu(name, shape, initializer)
if wd is not None:
weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss')
tf.add_to_collection('losses', weight_decay)
return var
def conv1d(inputs,
num_output_channels,
kernel_size,
scope,
stride=1,
padding='SAME',
data_format='NHWC',
use_xavier=True,
stddev=1e-3,
weight_decay=None,
activation_fn=tf.nn.relu,
bn=False,
bn_decay=None,
is_training=None):
""" 1D convolution with non-linear operation.
Args:
inputs: 3-D tensor variable BxLxC
num_output_channels: int
kernel_size: int
scope: string
stride: int
padding: 'SAME' or 'VALID'
data_format: 'NHWC' or 'NCHW'
use_xavier: bool, use xavier_initializer if true
stddev: float, stddev for truncated_normal init
weight_decay: float
activation_fn: function
bn: bool, whether to use batch norm
bn_decay: float or float tensor variable in [0,1]
is_training: bool Tensor variable
Returns:
Variable tensor
"""
with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
assert(data_format=='NHWC' or data_format=='NCHW')
if data_format == 'NHWC':
num_in_channels = inputs.get_shape()[-1].value
elif data_format=='NCHW':
num_in_channels = inputs.get_shape()[1].value
kernel_shape = [kernel_size,
num_in_channels, num_output_channels]
kernel = _variable_with_weight_decay('weights',
shape=kernel_shape,
use_xavier=use_xavier,
stddev=stddev,
wd=weight_decay)
outputs = tf.nn.conv1d(inputs, kernel,
stride=stride,
padding=padding,
data_format=data_format)
biases = _variable_on_cpu('biases', [num_output_channels],
tf.constant_initializer(0.0))
outputs = tf.nn.bias_add(outputs, biases, data_format=data_format)
if bn:
outputs = batch_norm_for_conv1d(outputs, is_training,
bn_decay=bn_decay, scope='bn',
data_format=data_format)
if activation_fn is not None:
outputs = activation_fn(outputs)
return outputs
def conv2d(inputs,
num_output_channels,
kernel_size,
scope,
stride=[1, 1],
padding='SAME',
data_format='NHWC',
use_xavier=True,
stddev=1e-3,
weight_decay=None,
activation_fn=tf.nn.relu,
bn=False,
bn_decay=None,
is_training=None):
""" 2D convolution with non-linear operation.
Args:
inputs: 4-D tensor variable BxHxWxC
num_output_channels: int
kernel_size: a list of 2 ints
scope: string
stride: a list of 2 ints
padding: 'SAME' or 'VALID'
data_format: 'NHWC' or 'NCHW'
use_xavier: bool, use xavier_initializer if true
stddev: float, stddev for truncated_normal init
weight_decay: float
activation_fn: function
bn: bool, whether to use batch norm
bn_decay: float or float tensor variable in [0,1]
is_training: bool Tensor variable
Returns:
Variable tensor
"""
with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
kernel_h, kernel_w = kernel_size
assert(data_format=='NHWC' or data_format=='NCHW')
if data_format == 'NHWC':
num_in_channels = inputs.get_shape()[-1].value
elif data_format=='NCHW':
num_in_channels = inputs.get_shape()[1].value
kernel_shape = [kernel_h, kernel_w,
num_in_channels, num_output_channels]
kernel = _variable_with_weight_decay('weights',
shape=kernel_shape,
use_xavier=use_xavier,
stddev=stddev,
wd=weight_decay)
stride_h, stride_w = stride
outputs = tf.nn.conv2d(inputs, kernel,
[1, stride_h, stride_w, 1],
padding=padding,
data_format=data_format)
biases = _variable_on_cpu('biases', [num_output_channels],
tf.constant_initializer(0.0))
outputs = tf.nn.bias_add(outputs, biases, data_format=data_format)
if bn:
outputs = batch_norm_for_conv2d(outputs, is_training,
bn_decay=bn_decay, scope='bn',
data_format=data_format)
if activation_fn is not None:
outputs = activation_fn(outputs)
return outputs
def conv2d_transpose(inputs,
num_output_channels,
kernel_size,
scope,
stride=[1, 1],
padding='SAME',
use_xavier=True,
stddev=1e-3,
weight_decay=None,
activation_fn=tf.nn.relu,
bn=False,
bn_decay=None,
is_training=None):
""" 2D convolution transpose with non-linear operation.
Args:
inputs: 4-D tensor variable BxHxWxC
num_output_channels: int
kernel_size: a list of 2 ints
scope: string
stride: a list of 2 ints
padding: 'SAME' or 'VALID'
use_xavier: bool, use xavier_initializer if true
stddev: float, stddev for truncated_normal init
weight_decay: float
activation_fn: function
bn: bool, whether to use batch norm
bn_decay: float or float tensor variable in [0,1]
is_training: bool Tensor variable
Returns:
Variable tensor
Note: conv2d(conv2d_transpose(a, num_out, ksize, stride), a.shape[-1], ksize, stride) == a
"""
with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
kernel_h, kernel_w = kernel_size
num_in_channels = inputs.get_shape()[-1].value
kernel_shape = [kernel_h, kernel_w,
num_output_channels, num_in_channels] # reversed to conv2d
kernel = _variable_with_weight_decay('weights',
shape=kernel_shape,
use_xavier=use_xavier,
stddev=stddev,
wd=weight_decay)
stride_h, stride_w = stride
# from slim.convolution2d_transpose
def get_deconv_dim(dim_size, stride_size, kernel_size, padding):
dim_size *= stride_size
if padding == 'VALID' and dim_size is not None:
dim_size += max(kernel_size - stride_size, 0)
return dim_size
# caculate output shape
batch_size = inputs.get_shape()[0].value
height = inputs.get_shape()[1].value
width = inputs.get_shape()[2].value
out_height = get_deconv_dim(height, stride_h, kernel_h, padding)
out_width = get_deconv_dim(width, stride_w, kernel_w, padding)
output_shape = [batch_size, out_height, out_width, num_output_channels]
outputs = tf.nn.conv2d_transpose(inputs, kernel, output_shape,
[1, stride_h, stride_w, 1],
padding=padding)
biases = _variable_on_cpu('biases', [num_output_channels],
tf.constant_initializer(0.0))
outputs = tf.nn.bias_add(outputs, biases)
if bn:
outputs = batch_norm_for_conv2d(outputs, is_training,
bn_decay=bn_decay, scope='bn')
if activation_fn is not None:
outputs = activation_fn(outputs)
return outputs
def conv3d(inputs,
num_output_channels,
kernel_size,
scope,
stride=[1, 1, 1],
padding='SAME',
use_xavier=True,
stddev=1e-3,
weight_decay=None,
activation_fn=tf.nn.relu,
bn=False,
bn_decay=None,
is_training=None):
""" 3D convolution with non-linear operation.
Args:
inputs: 5-D tensor variable BxDxHxWxC
num_output_channels: int
kernel_size: a list of 3 ints
scope: string
stride: a list of 3 ints
padding: 'SAME' or 'VALID'
use_xavier: bool, use xavier_initializer if true
stddev: float, stddev for truncated_normal init
weight_decay: float
activation_fn: function
bn: bool, whether to use batch norm
bn_decay: float or float tensor variable in [0,1]
is_training: bool Tensor variable
Returns:
Variable tensor
"""
with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
kernel_d, kernel_h, kernel_w = kernel_size
num_in_channels = inputs.get_shape()[-1].value
kernel_shape = [kernel_d, kernel_h, kernel_w,
num_in_channels, num_output_channels]
kernel = _variable_with_weight_decay('weights',
shape=kernel_shape,
use_xavier=use_xavier,
stddev=stddev,
wd=weight_decay)
stride_d, stride_h, stride_w = stride
outputs = tf.nn.conv3d(inputs, kernel,
[1, stride_d, stride_h, stride_w, 1],
padding=padding)
biases = _variable_on_cpu('biases', [num_output_channels],
tf.constant_initializer(0.0))
outputs = tf.nn.bias_add(outputs, biases)
if bn:
outputs = batch_norm_for_conv3d(outputs, is_training,
bn_decay=bn_decay, scope='bn')
if activation_fn is not None:
outputs = activation_fn(outputs)
return outputs
def fully_connected(inputs,
num_outputs,
scope,
use_xavier=True,
stddev=1e-3,
weight_decay=None,
activation_fn=tf.nn.relu,
bn=False,
bn_decay=None,
is_training=None):
""" Fully connected layer with non-linear operation.
Args:
inputs: 2-D tensor BxN
num_outputs: int
Returns:
Variable tensor of size B x num_outputs.
"""
with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
num_input_units = inputs.get_shape()[-1].value
weights = _variable_with_weight_decay('weights',
shape=[num_input_units, num_outputs],
use_xavier=use_xavier,
stddev=stddev,
wd=weight_decay)
outputs = tf.matmul(inputs, weights)
biases = _variable_on_cpu('biases', [num_outputs],
tf.constant_initializer(0.0))
outputs = tf.nn.bias_add(outputs, biases)
if bn:
outputs = batch_norm_for_fc(outputs, is_training, bn_decay, 'bn')
if activation_fn is not None:
outputs = activation_fn(outputs)
return outputs
def max_pool2d(inputs,
kernel_size,
scope,
stride=[2, 2],
padding='VALID'):
""" 2D max pooling.
Args:
inputs: 4-D tensor BxHxWxC
kernel_size: a list of 2 ints
stride: a list of 2 ints
Returns:
Variable tensor
"""
with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
kernel_h, kernel_w = kernel_size
stride_h, stride_w = stride
outputs = tf.nn.max_pool(inputs,
ksize=[1, kernel_h, kernel_w, 1],
strides=[1, stride_h, stride_w, 1],
padding=padding,
name=sc.name)
return outputs
def avg_pool2d(inputs,
kernel_size,
scope,
stride=[2, 2],
padding='VALID'):
""" 2D avg pooling.
Args:
inputs: 4-D tensor BxHxWxC
kernel_size: a list of 2 ints
stride: a list of 2 ints
Returns:
Variable tensor
"""
with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
kernel_h, kernel_w = kernel_size
stride_h, stride_w = stride
outputs = tf.nn.avg_pool(inputs,
ksize=[1, kernel_h, kernel_w, 1],
strides=[1, stride_h, stride_w, 1],
padding=padding,
name=sc.name)
return outputs
def max_pool3d(inputs,
kernel_size,
scope,
stride=[2, 2, 2],
padding='VALID'):
""" 3D max pooling.
Args:
inputs: 5-D tensor BxDxHxWxC
kernel_size: a list of 3 ints
stride: a list of 3 ints
Returns:
Variable tensor
"""
with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
kernel_d, kernel_h, kernel_w = kernel_size
stride_d, stride_h, stride_w = stride
outputs = tf.nn.max_pool3d(inputs,
ksize=[1, kernel_d, kernel_h, kernel_w, 1],
strides=[1, stride_d, stride_h, stride_w, 1],
padding=padding,
name=sc.name)
return outputs
def avg_pool3d(inputs,
kernel_size,
scope,
stride=[2, 2, 2],
padding='VALID'):
""" 3D avg pooling.
Args:
inputs: 5-D tensor BxDxHxWxC
kernel_size: a list of 3 ints
stride: a list of 3 ints
Returns:
Variable tensor
"""
with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
kernel_d, kernel_h, kernel_w = kernel_size
stride_d, stride_h, stride_w = stride
outputs = tf.nn.avg_pool3d(inputs,
ksize=[1, kernel_d, kernel_h, kernel_w, 1],
strides=[1, stride_d, stride_h, stride_w, 1],
padding=padding,
name=sc.name)
return outputs
def batch_norm_template_unused(inputs, is_training, scope, moments_dims, bn_decay):
""" NOTE: this is older version of the util func. it is deprecated.
Batch normalization on convolutional maps and beyond...
Ref.: http://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow
Args:
inputs: Tensor, k-D input ... x C could be BC or BHWC or BDHWC
is_training: boolean tf.Varialbe, true indicates training phase
scope: string, variable scope
moments_dims: a list of ints, indicating dimensions for moments calculation
bn_decay: float or float tensor variable, controling moving average weight
Return:
normed: batch-normalized maps
"""
with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
num_channels = inputs.get_shape()[-1].value
beta = _variable_on_cpu(name='beta',shape=[num_channels],
initializer=tf.constant_initializer(0))
gamma = _variable_on_cpu(name='gamma',shape=[num_channels],
initializer=tf.constant_initializer(1.0))
batch_mean, batch_var = tf.nn.moments(inputs, moments_dims, name='moments')
decay = bn_decay if bn_decay is not None else 0.9
ema = tf.train.ExponentialMovingAverage(decay=decay)
# Operator that maintains moving averages of variables.
# Need to set reuse=False, otherwise if reuse, will see moments_1/mean/ExponentialMovingAverage/ does not exist
# https://github.com/shekkizh/WassersteinGAN.tensorflow/issues/3
with tf.variable_scope(tf.get_variable_scope(), reuse=False):
ema_apply_op = tf.cond(is_training,
lambda: ema.apply([batch_mean, batch_var]),
lambda: tf.no_op())
# Update moving average and return current batch's avg and var.
def mean_var_with_update():
with tf.control_dependencies([ema_apply_op]):
return tf.identity(batch_mean), tf.identity(batch_var)
# ema.average returns the Variable holding the average of var.
mean, var = tf.cond(is_training,
mean_var_with_update,
lambda: (ema.average(batch_mean), ema.average(batch_var)))
normed = tf.nn.batch_normalization(inputs, mean, var, beta, gamma, 1e-3)
return normed
def batch_norm_template(inputs, is_training, scope, moments_dims_unused, bn_decay, data_format='NHWC'):
""" Batch normalization on convolutional maps and beyond...
Ref.: http://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow
Args:
inputs: Tensor, k-D input ... x C could be BC or BHWC or BDHWC
is_training: boolean tf.Varialbe, true indicates training phase
scope: string, variable scope
moments_dims: a list of ints, indicating dimensions for moments calculation
bn_decay: float or float tensor variable, controling moving average weight
data_format: 'NHWC' or 'NCHW'
Return:
normed: batch-normalized maps
"""
bn_decay = bn_decay if bn_decay is not None else 0.9
return tf.contrib.layers.batch_norm(inputs,
center=True, scale=True,
is_training=is_training, decay=bn_decay,updates_collections=None,
scope=scope,
data_format=data_format)
def batch_norm_for_fc(inputs, is_training, bn_decay, scope):
""" Batch normalization on FC data.
Args:
inputs: Tensor, 2D BxC input
is_training: boolean tf.Varialbe, true indicates training phase
bn_decay: float or float tensor variable, controling moving average weight
scope: string, variable scope
Return:
normed: batch-normalized maps
"""
return batch_norm_template(inputs, is_training, scope, [0,], bn_decay)
def batch_norm_for_conv1d(inputs, is_training, bn_decay, scope, data_format):
""" Batch normalization on 1D convolutional maps.
Args:
inputs: Tensor, 3D BLC input maps
is_training: boolean tf.Varialbe, true indicates training phase
bn_decay: float or float tensor variable, controling moving average weight
scope: string, variable scope
data_format: 'NHWC' or 'NCHW'
Return:
normed: batch-normalized maps
"""
return batch_norm_template(inputs, is_training, scope, [0,1], bn_decay, data_format)
def batch_norm_for_conv2d(inputs, is_training, bn_decay, scope, data_format):
""" Batch normalization on 2D convolutional maps.
Args:
inputs: Tensor, 4D BHWC input maps
is_training: boolean tf.Varialbe, true indicates training phase
bn_decay: float or float tensor variable, controling moving average weight
scope: string, variable scope
data_format: 'NHWC' or 'NCHW'
Return:
normed: batch-normalized maps
"""
return batch_norm_template(inputs, is_training, scope, [0,1,2], bn_decay, data_format)
def batch_norm_for_conv3d(inputs, is_training, bn_decay, scope):
""" Batch normalization on 3D convolutional maps.
Args:
inputs: Tensor, 5D BDHWC input maps
is_training: boolean tf.Varialbe, true indicates training phase
bn_decay: float or float tensor variable, controling moving average weight
scope: string, variable scope
Return:
normed: batch-normalized maps
"""
return batch_norm_template(inputs, is_training, scope, [0,1,2,3], bn_decay)
def dropout(inputs,
is_training,
scope,
keep_prob=0.5,
noise_shape=None):
""" Dropout layer.
Args:
inputs: tensor
is_training: boolean tf.Variable
scope: string
keep_prob: float in [0,1]
noise_shape: list of ints
Returns:
tensor variable
"""
with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as sc:
outputs = tf.cond(is_training,
lambda: tf.nn.dropout(inputs, keep_prob, noise_shape),
lambda: inputs)
return outputs
================================================
FILE: src/iou_matching.py
================================================
# vim: expandtab:ts=4:sw=4
from __future__ import absolute_import
import numpy as np
import linear_assignment
import pdb
def iou(bbox, candidates):
"""Computer intersection over union.
Parameters
----------
bbox : ndarray
A bounding box in format `(top left x, top left y, width, height)`.
candidates : ndarray
A matrix of candidate bounding boxes (one per row) in the same format
as `bbox`.
Returns
-------
ndarray
The intersection over union in [0, 1] between the `bbox` and each
candidate. A higher score means a larger fraction of the `bbox` is
occluded by the candidate.
"""
bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:]
candidates_tl = candidates[:, :2]
candidates_br = candidates[:, :2] + candidates[:, 2:]
tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis],
np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]]
br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis],
np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]]
wh = np.maximum(0., br - tl)
area_intersection = wh.prod(axis=1)
area_bbox = bbox[2:].prod()
area_candidates = candidates[:, 2:].prod(axis=1)
return area_intersection / (area_bbox + area_candidates - area_intersection)
def iou_cost(tracks, detections, track_indices=None,
detection_indices=None, use3d=False, kf=None):
"""An intersection over union distance metric.
Parameters
----------
tracks : List[deep_sort.track.Track]
A list of tracks.
detections : List[deep_sort.detection.Detection]
A list of detections.
track_indices : Optional[List[int]]
A list of indices to tracks that should be matched. Defaults to
all `tracks`.
detection_indices : Optional[List[int]]
A list of indices to detections that should be matched. Defaults
to all `detections`.
box_expansion_factor:
Multiplier for box size to bias towards higher recall
Returns
-------
ndarray
Returns a cost matrix of shape
len(track_indices), len(detection_indices) where entry (i, j) is
`1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`.
"""
if track_indices is None:
track_indices = np.arange(len(tracks))
if detection_indices is None:
detection_indices = np.arange(len(detections))
cost_matrix = np.zeros((len(track_indices), len(detection_indices)))
if cost_matrix.shape[0] == 0 or cost_matrix.shape[1] == 0:
return cost_matrix
if use3d:
# Convert 3d detctions to tlwh format
# @TODO: Should use a Detection3D class to do this
candidates = np.array([detections[i].box_3d for i in detection_indices])
candidates[:,:2] -= candidates[:,3:5] / 2
candidates = candidates[:, [0,2,3,5]]
else:
candidates = np.asarray([detections[i].tlwh for i in detection_indices])
for row, track_idx in enumerate(track_indices):
if use3d:
bbox = tracks[track_idx].to_tlwh3d()
bbox[:2] -= bbox[3:5] / 2
bbox = bbox[[0,2,3,5]]
else:
bbox = tracks[track_idx].to_tlwh(kf)
cost_matrix[row, :] = 1. - iou(bbox, candidates)
return cost_matrix
================================================
FILE: src/kf_2d.py
================================================
# vim: expandtab:ts=4:sw=4
import numpy as np
import scipy.linalg
import EKF
import pdb
np.set_printoptions(precision=4, suppress=True)
class KalmanFilter2D(EKF.EKF):
"""
A simple Kalman filter for tracking bounding boxes in image space.
The 8-dimensional state space
x, y, w, h, vx, vy, vw, vh
contains the bounding box center position (x, y), width w, height h,
and their respective velocities.
Object motion follows a constant velocity model. The bounding box location
(x, y, w, h) is taken as direct observation of the state space (linear
observation model).
"""
def __init__(self, pos_weight, velocity_weight, std_process, std_measurement, initial_uncertainty, gate_limit):
ndim, dt = 4, 1.
self.ndim = ndim
self.img_center = 1242
# Create Kalman filter model matrices.
# Motion model is constant velocity, i.e. x = x + Vx*dt
self._motion_mat = np.eye(2 * ndim, 2 * ndim)
for i in range(ndim):
self._motion_mat[i, ndim + i] = dt
# Sensor model is direct observation, i.e. x = x
self._observation_mat = np.eye(ndim, 2 * ndim)
# Motion and observation uncertainty are chosen relative to the current
# state estimate. These weights control the amount of uncertainty in
# the model. This is a bit hacky.
self._std_weight_process = std_process
self._std_weight_measurement = std_measurement
self._std_weight_pos = pos_weight
self._std_weight_vel = velocity_weight
self._initial_uncertainty = initial_uncertainty
self.LIMIT = gate_limit
def initiate(self, measurement, flow):
"""Create track from unassociated measurement.
Parameters
----------
measurement : ndarray
Bounding box coordinates (x, y, a, h) with center position (x, y),
aspect ratio a, and height h.
Returns
-------
(ndarray, ndarray)
Returns the mean vector (8 dimensional) and covariance matrix (8x8
dimensional) of the new track. Unobserved velocities are initialized
to 0 mean.
"""
mean_pos = measurement
mean_vel = np.zeros_like(mean_pos)
if flow is not None:
vel = np.mean(np.reshape(flow[int(mean_pos[1]):int(mean_pos[1]+mean_pos[3]),
int(mean_pos[0]):int(mean_pos[0]+mean_pos[2]), :], (-1, 2)), axis=0)
mean_vel[:2] = vel
mean = np.r_[mean_pos, mean_vel]
# Initialize covariance based on w, h and configured std
std = [
(1 + abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_pos * measurement[2],
(1 + abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_pos * measurement[3],
(1 + abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_pos * measurement[2],
(1 + abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_pos * measurement[3],
(1 + 1.5*abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_vel * measurement[2],
(1 + 1.5*abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_vel * measurement[3],
(1 + 1.5*abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_vel * measurement[2],
(1 + 1.5*abs(mean_pos[0]/self.img_center - 1)) * self._std_weight_vel * measurement[3]]
covariance = np.diag(np.square(std))*(self._initial_uncertainty*self._std_weight_process)**2
return mean, covariance
def predict_mean(self, mean):
# Updates predicted state from previous state (function g)
# Calculates motion update Jacobian (Gt)
# Returns (g(mean), Gt)
return np.dot(self._motion_mat, mean)
def predict_covariance(self, mean, covariance):
# Updates predicted state from previous state (function g)
# Calculates motion update Jacobian (Gt)
# Returns (g(mean), Gt)
process_noise = self.get_process_noise(mean)
return (np.linalg.multi_dot((self._motion_mat, covariance, self._motion_mat.T))
+ process_noise)
def get_process_noise(self, mean):
# Returns Rt the motion noise covariance
# Motion uncertainty scaled by estimated height
std_pos = [
(1 + abs(mean[0]/self.img_center - 1)) * self._std_weight_pos * mean[2],
(1 + abs(mean[0]/self.img_center - 1)) * self._std_weight_pos * mean[3],
(1 + abs(mean[0]/self.img_center - 1)) * self._std_weight_pos * mean[2],
(1 + abs(mean[0]/self.img_center - 1)) * self._std_weight_pos * mean[3]]
std_vel = [
(1 + abs(mean[0]/self.img_center - 1)) * self._std_weight_vel * mean[2],
(1 + abs(mean[0]/self.img_center - 1)) * self._std_weight_vel * mean[3],
(1 + abs(mean[0]/self.img_center - 1)) * self._std_weight_vel * mean[2],
(1 + abs(mean[0]/self.img_center - 1)) * self._std_weight_vel * mean[3]]
motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))*self._std_weight_process**2
return motion_cov
def project_mean(self, mean):
# Measurement prediction from state (function h)
# Calculations sensor update Jacobian (Ht)
# Returns (h(mean), Ht)
return np.dot(self._observation_mat, mean)
def get_measurement_noise(self, measurement):
# Returns Qt the sensor noise covariance
# Measurement uncertainty scaled by estimated height
std = [
self._std_weight_pos*measurement[2],
self._std_weight_pos*measurement[3],
self._std_weight_pos*measurement[2],
self._std_weight_pos*measurement[3]]
innovation_cov = np.diag(np.square(std))*self._std_weight_measurement**2
return innovation_cov
def project_cov(self, mean, covariance):
# Returns S the innovation covariance (projected covariance)
measurement_noise = self.get_measurement_noise(mean)
innovation_cov = (np.linalg.multi_dot((self._observation_mat, covariance,
self._observation_mat.T))
+ measurement_noise)
return innovation_cov
def gating_distance(self, mean, covariance, measurements,
only_position=False, use_3d=False):
"""Compute gating distance between state distribution and measurements.
A suitable distance threshold can be obtained from `chi2inv95`. If
`only_position` is False, the chi-square distribution has 4 degrees of
freedom, otherwise 2.
Parameters
----------
mean : ndarray
Mean vector over the state distribution (8 dimensional).
covariance : ndarray
Covariance of the state distribution (8x8 dimensional).
measurements : ndarray
An Nx4 dimensional matrix of N measurements, each in
format (x, y, a, h) where (x, y) is the bounding box center
position, a the aspect ratio, and h the height.
only_position : Optional[bool]
If True, distance computation is done with respect to the bounding
box center position only.
Returns
-------
ndarray
Returns an array of length N, where the i-th element contains the
squared Mahalanobis distance between (mean, covariance) and
`measurements[i]`.
"""
projected_mean, projected_covariance = self.project(mean, covariance)
if only_position:
projected_mean, projected_covariance = projected_mean[:2], projected_covariance[:2, :2]
measurements = measurements[:, :2]
max_val = np.amax(projected_covariance)
# LIMIT = max(mean[2], mean[3]) #*(1 + abs(3*mean[0]/self.img_center - 1))
if max_val > self.LIMIT:
projected_covariance *= self.LIMIT / max_val
return EKF.squared_mahalanobis_distance(projected_mean, projected_covariance, measurements)
class RandomWalkKalmanFilter2D(KalmanFilter2D): #TODO UPDATE THIS DOCUMENTATION
"""
A simple Kalman filter for tracking bounding boxes in image space.
The 8-dimensional state space
x, y, w, h
contains the bounding box center position (x, y), aspect ratio a, height h,
and their respective velocities.
Object motion follows a constant velocity model. The bounding box location
(x, y, a, h) is taken as direct observation of the state space (linear
observation model).
"""
def __init__(self, pos_weight, velocity_weight, std_process, std_measurement, initial_uncertainty, img_center=1242):
ndim, dt = 4, 1.
self.ndim = ndim
self.img_center = img_center
# Create Kalman filter model matrices.
# Motion model is constant velocity, i.e. x = x + Vx*dt
self._motion_mat = np.eye(2*ndim, 2*ndim)
self._motion_mat[ndim:, ndim:] = 0
# Sensor model is direct observation, i.e. x = x
self._observation_mat = np.eye(ndim, 2*ndim)
# Motion and observation uncertainty are chosen relative to the current
# state estimate. These weights control the amount of uncertainty in
# the model. This is a bit hacky.
self._std_weight_process = std_process
self._std_weight_measurement = std_measurement
self._std_weight_pos = pos_weight
self._std_weight_vel = velocity_weight
self._initial_uncertainty = initial_uncertainty
================================================
FILE: src/linear_assignment.py
================================================
# vim: expandtab:ts=4:sw=4
from __future__ import absolute_import
import numpy as np
from sklearn.utils.linear_assignment_ import linear_assignment
import EKF
import pdb
from mbest_ilp import new_m_best_sol
from multiprocessing import Pool
from functools import partial
#from mbest_ilp import m_best_sol as new_m_best_sol
INFTY_COST = 1e+5
APP_COUNT = 0
IOU_COUNT = 0
def min_marg_matching(marginalizations, track_indices=None, max_distance=1):
cost_matrix = 1 - marginalizations
num_tracks, num_detections = cost_matrix.shape
if track_indices is None:
track_indices = np.arange(num_tracks)
detection_indices = np.arange(num_detections-1)
if num_tracks == 0 or num_detections == 0:
return [], track_indices, detection_indices # Nothing to match.
extra_dummy_cols = np.tile(cost_matrix[:,0,np.newaxis], (1, num_tracks-1))
expanded_cost_matrix = np.hstack((extra_dummy_cols, cost_matrix))
indices = linear_assignment(expanded_cost_matrix)
matches, unmatched_tracks, unmatched_detections = [], [], []
# gather unmatched detections (new track)
for col, detection_idx in enumerate(detection_indices):
if col+num_tracks not in indices[:, 1]:
unmatched_detections.append(detection_idx)
# gather unmatched tracks (no detection)
for row, track_idx in enumerate(track_indices):
if row not in indices[:, 0]:
unmatched_tracks.append(track_idx)
# thresholding and matches
for row, col in indices:
track_idx = track_indices[row]
detection_idx = col - num_tracks
if detection_idx < 0:
unmatched_tracks.append(track_idx)
continue
if expanded_cost_matrix[row, col] > max_distance:
# apply thresholding
unmatched_tracks.append(track_idx)
unmatched_detections.append(detection_idx)
else:
# associate matches
matches.append((track_idx, detection_idx))
return matches, unmatched_tracks, unmatched_detections
def min_cost_matching(
distance_metric, max_distance, tracks, detections, track_indices=None,
detection_indices=None, compare_2d = False, detections_3d=None):
"""Solve linear assignment problem.
Parameters
----------
distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
The distance metric is given a list of tracks and detections as well as
a list of N track indices and M detection indices. The metric should
return the NxM dimensional cost matrix, where element (i, j) is the
association cost between the i-th track in the given track indices and
the j-th detection in the given detection_indices.
max_distance : float
Gating threshold. Associations with cost larger than this value are
disregarded.
tracks : List[track.Track]
A list of predicted tracks at the current time step.
detections : List[detection.Detection]
A list of detections at the current time step.
track_indices : List[int]
List of track indices that maps rows in `cost_matrix` to tracks in
`tracks` (see description above).
detection_indices : List[int]
List of detection indices that maps columns in `cost_matrix` to
detections in `detections` (see description above).
Returns
-------
(List[(int, int)], List[int], List[int])
Returns a tuple with the following three entries:
* A list of matched track and detection indices.
* A list of unmatched track indices.
* A list of unmatched detection indices.
"""
if track_indices is None:
track_indices = np.arange(len(tracks))
if detection_indices is None:
detection_indices = np.arange(len(detections))
if len(detection_indices) == 0 or len(track_indices) == 0:
return [], track_indices, detection_indices # Nothing to match.
cost_matrix = distance_metric(
tracks, detections, track_indices, detection_indices, compare_2d, detections_3d)
cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5
#print("\n\nCascade Cost Matrix: ", cost_matrix)
indices = linear_assignment(cost_matrix)
matches, unmatched_tracks, unmatched_detections = [], [], []
# gather unmatched detections (new track)
for col, detection_idx in enumerate(detection_indices):
if col not in indices[:, 1]:
unmatched_detections.append(detection_idx)
# gather unmatched trackes (no detection)
for row, track_idx in enumerate(track_indices):
if row not in indices[:, 0]:
unmatched_tracks.append(track_idx)
# thresholding and matches
for row, col in indices:
track_idx = track_indices[row]
detection_idx = detection_indices[col]
if cost_matrix[row, col] > max_distance:
# apply thresholding
unmatched_tracks.append(track_idx)
unmatched_detections.append(detection_idx)
else:
# associate matches
matches.append((track_idx, detection_idx))
return matches, unmatched_tracks, unmatched_detections
# @profile
def JPDA(
distance_metric, dummy_node_cost_app, dummy_node_cost_iou, tracks, detections, track_indices=None,
detection_indices=None, m=1, compare_2d = False, windowing = False):
"""Solve linear assignment problem.
Parameters
----------
distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
The distance metric is given a list of tracks and detections as well as
a list of N track indices and M detection indices. The metric should
return the NxM dimensional cost matrix, where element (i, j) is the
association cost between the i-th track in the given track indices and
the j-th detection in the given detection_indices.
max_distance : float
Gating threshold. Associations with cost larger than this value are
disregarded.
tracks : List[track.Track]
A list of predicted tracks at the current time step.
detections : List[detection.Detection]
A list of detections at the current time step.
track_indices : List[int]
List of track indices that maps rows in `cost_matrix` to tracks in
`tracks` (see description above).
detection_indices : List[int]
List of detection indices that maps columns in `cost_matrix` to
detections in `detections` (see description above).
Returns
-------
(List[(int, int)], List[int], List[int])
Returns a tuple with the following three entries:
* A list of matched track and detection indices.
* A list of unmatched track indices.
* A list of unmatched detection indices.
"""
if track_indices is None:
track_indices = np.arange(len(tracks))
if detection_indices is None:
detection_indices = np.arange(len(detections))
if len(detection_indices) == 0 or len(track_indices) == 0:
return np.zeros((0, len(detections) + 1)) # Nothing to match.
cost_matrix, gate_mask = distance_metric(
tracks, detections, track_indices, detection_indices, compare_2d)
num_tracks, num_detections = cost_matrix.shape[0], cost_matrix.shape[1]
cost_matrix[gate_mask] = INFTY_COST
clusters = find_clusters(cost_matrix[:,:,0], INFTY_COST - 0.0001)
jpda_output = []
for cluster in clusters:
jpda_output.append(get_JPDA_output(cluster, cost_matrix, dummy_node_cost_app, dummy_node_cost_iou, INFTY_COST - 0.0001, m))
if not jpda_output:
mc = np.zeros((num_tracks, num_detections + 1))
mc[:, 0] = 1
return mc
assignments, assignment_cost = zip(*jpda_output)
assignments = np.vstack([item for sublist in assignments for item in sublist])
assignment_cost = np.array([item for sublist in assignment_cost for item in sublist])
marginalised_cost = np.sum(assignments*np.exp(-np.expand_dims(assignment_cost, 1)), axis = 0)
marginalised_cost = np.reshape(marginalised_cost, (num_tracks, num_detections+1))
return marginalised_cost
def calculate_entropy(matrix, idx, idy):
mask = np.ones(matrix.shape)
mask[idx, idy] = 0
entropy = matrix/np.sum(mask*matrix, axis=1, keepdims=True)
entropy = (-entropy*np.log(entropy)) * mask
entropy = np.mean(np.sum(entropy, axis=1))
return entropy
def get_JPDA_output(cluster, cost_matrix, dummy_node_cost_app, dummy_node_cost_iou, cutoff, m):
if len(cluster[1]) == 0:
assignment = np.zeros((cost_matrix.shape[0], cost_matrix.shape[1]+1))
assignment[cluster[0], 0] = 1
assignment = assignment.reshape(1,-1)
return [assignment], np.array([0])
new_cost_matrix_appearance = np.reshape(cost_matrix[np.repeat(cluster[0], len(cluster[1])),
np.tile(cluster[1] - 1, len(cluster[0])),
[0]*(len(cluster[1])*len(cluster[0]))],
(len(cluster[0]), len(cluster[1])))
new_cost_matrix_iou = np.reshape(cost_matrix[np.repeat(cluster[0], len(cluster[1])), np.tile(cluster[1] - 1, len(cluster[0])), 1],
(len(cluster[0]), len(cluster[1])))
idx_x, idx_y = np.where(new_cost_matrix_appearance > cutoff)
appearance_entropy = calculate_entropy(new_cost_matrix_appearance, idx_x, idx_y)
iou_entropy = calculate_entropy(new_cost_matrix_iou, idx_x, idx_y)
if appearance_entropy < iou_entropy:
new_cost_matrix = new_cost_matrix_appearance
# new_cost_matrix = 2*np.ones(new_cost_matrix.shape)/(new_cost_matrix+1) - 1
global APP_COUNT
APP_COUNT += 1
dummy_node_cost = dummy_node_cost_app
else:
global IOU_COUNT
IOU_COUNT += 1
new_cost_matrix = new_cost_matrix_iou
new_cost_matrix[new_cost_matrix==1] -= 1e-3
new_cost_matrix = 1 - new_cost_matrix
dummy_node_cost = -np.log(1-dummy_node_cost_iou)
new_cost_matrix = -np.log(new_cost_matrix)
new_cost_matrix[idx_x, idx_y] = cutoff
if len(cluster[0]) == 1:
new_cost_matrix = np.concatenate([np.ones((new_cost_matrix.shape[0], 1))*dummy_node_cost, new_cost_matrix], axis = 1)
total_cost = np.sum(np.exp(-new_cost_matrix))
new_assignment = np.zeros((cost_matrix.shape[0], cost_matrix.shape[1]+1))
new_assignment[np.repeat(cluster[0], len(cluster[1])+1), np.tile(
np.concatenate([np.zeros(1, dtype = np.int32), cluster[1]]), len(cluster[0]))] = np.exp(-new_cost_matrix)/total_cost
new_assignment = new_assignment.reshape(1, -1)
return [new_assignment], np.array([0])
if new_cost_matrix.ndim <= 1:
new_cost_matrix = np.expand_dims(new_cost_matrix, 1)
# print(new_cost_matrix)
assignments, assignment_cost = new_m_best_sol(new_cost_matrix, m, dummy_node_cost)
offset = np.amin(assignment_cost)
assignment_cost -= offset
new_assignments = []
total_cost = np.sum(np.exp(-assignment_cost))
for assignment in assignments:
new_assignment = np.zeros((cost_matrix.shape[0], cost_matrix.shape[1]+1))
new_assignment[np.repeat(cluster[0], len(cluster[1])+1), np.tile(
np.concatenate([np.zeros(1, dtype = np.int32), cluster[1]]), len(cluster[0]))] = \
assignment/total_cost
new_assignments.append(new_assignment.reshape(1, -1))
return new_assignments, assignment_cost
def matching_cascade(
distance_metric, max_distance, cascade_depth, tracks, detections,
track_indices=None, detection_indices=None, compare_2d = False, detections_3d=None):
"""Run matching cascade.
Parameters
----------
distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
The distance metric is given a list of tracks and detections as well as
a list of N track indices and M detection indices. The metric should
return the NxM dimensional cost matrix, where element (i, j) is the
association cost between the i-th track in the given track indices and
the j-th detection in the given detection indices.
max_distance : float
Gating threshold. Associations with cost larger than this value are
disregarded.
cascade_depth: int
The cascade depth, should be se to the maximum track age.
tracks : List[track.Track]
A list of predicted tracks at the current time step.
detections : List[detection.Detection]
A list of detections at the current time step.
track_indices : Optional[List[int]]
List of track indices that maps rows in `cost_matrix` to tracks in
`tracks` (see description above). Defaults to all tracks.
detection_indices : Optional[List[int]]
List of detection indices that maps columns in `cost_matrix` to
detections in `detections` (see description above). Defaults to all
detections.
Returns
-------
(List[(int, int)], List[int], List[int])
Returns a tuple with the following three entries:
* A list of matched track and detection indices.
* A list of unmatched track indices.
* A list of unmatched detection indices.
"""
if track_indices is None:
track_indices = list(range(len(tracks)))
if detection_indices is None:
detection_indices = list(range(len(detections)))
unmatched_detections = detection_indices
matches = []
for level in range(cascade_depth):
if len(unmatched_detections) == 0: # No detections left
break
track_indices_l = [
k for k in track_indices
if tracks[k].time_since_update == 1 + level
]
if len(track_indices_l) == 0: # Nothing to match at this level
continue
matches_l, _, unmatched_detections = \
min_cost_matching(
distance_metric, max_distance, tracks, detections,
track_indices_l, unmatched_detections, compare_2d, detections_3d=detections_3d)
matches += matches_l
unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))
return matches, unmatched_tracks, unmatched_detections
# @profile
def gate_cost_matrix(
kf, tracks, detections, track_indices, detection_indices,
gated_cost=INFTY_COST, only_position=False, use3d=False, windowing = False):
"""Invalidate infeasible entries in cost matrix based on the state
distributions obtained by Kalman filtering.
Parameters
----------
kf : The Kalman filter.
cost_matrix : ndarray
The NxM dimensional cost matrix, where N is the number of track indices
and M is the number of detection indices, such that entry (i, j) is the
association cost between `tracks[track_indices[i]]` and
`detections[detection_indices[j]]`.
tracks : List[track.Track]
A list of predicted tracks at the current time step.
detections : List[detection.Detection]
A list of detections at the current time step.
track_indices : List[int]
List of track indices that maps rows in `cost_matrix` to tracks in
`tracks` (see description above).
detection_indices : List[int]
List of detection indices that maps columns in `cost_matrix` to
detections in `detections` (see description above).
gated_cost : Optional[float]
Entries in the cost matrix corresponding to infeasible associations are
set this value. Defaults to a very large value.
only_position : Optional[bool]
If True, only the x, y position of the state distribution is considered
during gating. Defaults to False.
Returns
-------
ndarray
Returns the modified cost matrix.
"""
# assert (len(track_indices) == cost_matrix.shape[0]), "Cost matrix shape does not match track indices"
# assert (len(detection_indices) == cost_matrix.shape[1]), "Cost matrix shape does match detection indices"
if len(track_indices) == 0 or len(detection_indices) == 0:
return None
if use3d:
measurements = np.array([det.box_3d for i, det in enumerate(detections) if i in detection_indices])
else:
measurements = np.asarray(
[detections[i].to_xywh() for i in detection_indices])
if only_position:
gating_dim = 2
else:
gating_dim = measurements.shape[1]
gating_threshold = EKF.chi2inv975[gating_dim]
gate_mask = []
for track_idx in track_indices:
track = tracks[track_idx]
gating_distance = kf.gating_distance(
track.mean, track.covariance, measurements, only_position, use3d)
gate_mask.append(gating_distance > gating_threshold)
return np.vstack(gate_mask)
def find_clusters(cost_matrix, cutoff):
num_tracks, _ = cost_matrix.shape
clusters = []
total_tracks = 0
total_detections = 0
all_tracks = set(range(num_tracks))
all_visited_tracks = set()
while total_tracks < num_tracks:
visited_detections = set()
visited_tracks = set()
potential_track = next(iter(all_tracks - all_visited_tracks))
potential_tracks = set()
potential_tracks.add(potential_track)
while potential_tracks:
current_track = potential_tracks.pop()
visited_detections.update((np.where(cost_matrix[current_track] < cutoff)[0])+1)
visited_tracks.add(current_track)
for detection in visited_detections:
connected_tracks = np.where(cost_matrix[:, detection - 1] < cutoff)[0]
for track in connected_tracks:
if track in visited_tracks or track in potential_tracks:
continue
potential_tracks.add(track)
total_tracks += len(visited_tracks)
total_detections += len(visited_detections)
all_visited_tracks.update(visited_tracks)
clusters.append((np.array(list(visited_tracks), dtype = np.int32), np.array(list(visited_detections), dtype = np.int32)))
return clusters
================================================
FILE: src/mbest_ilp.py
================================================
from gurobipy import Model, quicksum, LinExpr, GRB
import numpy as np
import copy
import time
from sklearn.utils.linear_assignment_ import linear_assignment
import pickle
import itertools
import pdb
from copy import deepcopy
import math
"""
Fn: ilp_assignment
------------------
Solves ILP problem using gurobi
"""
def ilp_assignment(model):
model.optimize()
if(model.status == 3):
return -1
return
"""
Fn: initialize_model
--------------------
Initializes gurobi ILP model by setting the base objective
"""
# @profile
def initialize_model(cost_matrix, cutoff, model = None):
#Add dummy detection
cost_matrix = np.insert(cost_matrix,0, np.ones(cost_matrix.shape[0])*cutoff, axis=1)
M,N = cost_matrix.shape
if model is None:
model = Model()
else:
model.remove(model.getVars())
model.remove(model.getConstrs())
model.setParam('OutputFlag', False)
# y = []
# for i in range(M):
# y.append([])
# for j in range(N):
# y[i].append(m.addVar(vtype=GRB.BINARY, name = 'y_%d%d'%(i,j)))
y = model.addVars(M,N, vtype=GRB.BINARY, name = 'y')
model.setObjective(quicksum(quicksum([y[i,j]*cost_matrix[i][j] for j in range(N)]) for i in range(M)), GRB.MINIMIZE)
# for i in range(M):
model.addConstrs((quicksum(y[i,j] for j in range(N))==1 for i in range(M)), name='constraint for track')
# for j in range(1,N):
model.addConstrs((quicksum(y[i,j] for i in range(M))<=1 for j in range(1, N)), name='constraint for detection')
y = list(y.values())
return model, M, N, y
"""
Fn: m_best_sol
--------------
Finds m_best solutions for object/track association givent the
input cost matrix. Solves constrained ILP problems using gurobi solver.
"""
def cache(func):
cache = {}
def cached_function(*args):
cost_matrix = args[0]
cost_matrix = np.hstack((np.ones((cost_matrix.shape[0], 1))*args[1], cost_matrix))
if (cost_matrix.shape[0], cost_matrix.shape[1]) in cache:
solution_list = cache[(cost_matrix.shape[0], cost_matrix.shape[1])]
solution_vals = np.sum(solution_list*cost_matrix.reshape(1, -1), axis = 1)
return solution_list, solution_vals
else:
solution_list, solution_vals = func(*args)
cache[(cost_matrix.shape[0], cost_matrix.shape[1])] = solution_list
return solution_list, solution_vals
return cached_function
# @profile
def num_solutions(cost_matrix):
M,N = cost_matrix.shape
N += 1
count = 0
for i in range(min(M+1, N)):
count += np.prod(range(M-i+1, M+1))*np.prod(range(N-i, N))//math.factorial(i)
if count > 2000:
break
return int(count)
@cache
def enumerate_solutions(cost_matrix, cutoff, num_solutions):
# num_solutions = [[2, 3, 4, 5, 6, 7],[3, 7, 13, 21, 31],[4, 13, 34, 73, 136],[5, 21, 73, 209, 501],[6, 31, 136, 501, 1546], [7]]
cost_matrix = np.hstack((np.ones((cost_matrix.shape[0], 1))*cutoff, cost_matrix))
M,N = cost_matrix.shape
solution_list = np.zeros((num_solutions, M, N), dtype = np.int32)
solution_list[:, :, 0] = 1
count = 0
for i in range(min(M+1, N)):
for chosen in itertools.combinations(range(M), i):
for perm in itertools.permutations(range(1,N), i):
if chosen:
solution_list[[count]*len(chosen), chosen, perm] = 1
solution_list[[count]*len(chosen), chosen, [0]*len(chosen)] = 0
count += 1
solution_vals = np.sum(np.sum(solution_list*np.expand_dims(cost_matrix, 0), axis = 1), axis = 1)
solution_list = np.reshape(solution_list, (num_solutions, -1))
return solution_list, solution_vals
def new_m_best_sol(cost_matrix, m_sol, cutoff, model = None):
sols = num_solutions(cost_matrix)
if sols <= 2000:
return enumerate_solutions(cost_matrix, cutoff, sols)
model, M, N, y = initialize_model(cost_matrix, cutoff, model)
X = np.zeros((m_sol, M*N))
xv = []
if (ilp_assignment(model) == -1):
xv.append(0)
else:
x = model.getAttr("X", y)
X[0] = x
xv.append(model.objVal)
if m_sol > 1:
model.addConstr(LinExpr(x,y) <= M-1, name = 'constraint_0')
if (ilp_assignment(model) == -1):
xv.append(0)
else:
x = model.getAttr("X", y)
X[1] = x
xv.append(model.objVal)
if m_sol > 2:
model.remove(model.getConstrByName('constraint_0'))
second_best_solutions = []
second_best_solution_vals = []
partitions = []
j = np.argmax(np.logical_xor(X[0], X[1]))
partitions.append([j])
partitions.append([j])
model.addConstr(y[j]==X[0][j], name = 'partition_constraint')
model.addConstr(LinExpr(X[0], y) <= M-1, name = 'non_equality_constraint')
ilp_assignment(model)
second_best_solutions.append(model.getAttr("X", y))
second_best_solution_vals.append(model.objVal)
model.remove(model.getConstrByName('non_equality_constraint'))
model.remove(model.getConstrByName('partition_constraint'))
model.addConstr(y[j]==X[1][j], name = 'partition_constraint')
model.addConstr(LinExpr(X[1], y) <= M-1, name = 'non_equality_constraint')
ilp_assignment(model)
second_best_solution_vals.append(model.objVal)
second_best_solutions.append(model.getAttr("X", y))
model.remove(model.getConstrByName('non_equality_constraint'))
model.remove(model.getConstrByName('partition_constraint'))
for m in range(2, m_sol):
l_k = np.argmin(second_best_solution_vals)
X[m] = second_best_solutions[l_k]
xv.append(second_best_solution_vals[l_k])
if m==m_sol-1:
break
j = np.argmax(np.logical_xor(X[m], X[l_k]))
parent_partition = partitions[l_k]
constrs = []
for idx in parent_partition:
constrs.append(model.addConstr(y[idx]==X[l_k, idx]))
model.addConstr(y[j]==X[m][j], name = 'partition_constraint_new')
model.addConstr(LinExpr(X[m], y) <= M-1, name = 'non_equality_constraint')
if(ilp_assignment(model) == -1):
second_best_solutions.append(np.ones((M,N)))
second_best_solution_vals.append(np.inf)
else:
second_best_solutions.append(model.getAttr("X", y))
second_best_solution_vals.append(model.objVal)
model.remove(model.getConstrByName('partition_constraint_new'))
model.remove(model.getConstrByName('non_equality_constraint'))
model.addConstr(LinExpr(X[l_k], y) <= M-1, name = 'non_equality_constraint')
model.addConstr(y[j]==X[l_k][j], name = 'partition_constraint_new')
if(ilp_assignment(model) == -1):
second_best_solution_vals[l_k] = np.inf
second_best_solutions[l_k] = np.ones((M,N))
else:
second_best_solution_vals[l_k] = model.objVal
second_best_solutions[l_k] = model.getAttr("X", y)
model.remove(model.getConstrByName('partition_constraint_new'))
model.remove(model.getConstrByName('non_equality_constraint'))
partitions[l_k].append(j)
partitions.append(copy.deepcopy(partitions[l_k]))
for constr in constrs:
model.remove(constr)
# X = np.asarray(X)
xv = np.asarray(xv)
return X, xv
def linear_assignment_wrapper(a):
return linear_assignment(a)
if __name__=='__main__':
# a = np.random.randn(100,100)
# # cProfile.run('m_best_sol(a,1,10)', 'mbest.profile')
# # cProfile.run('linear_assignment(a)', 'hungarian.profile')
# total = 0
# for i in range(10):
# start = time.time()
# _, sol_cost = m_best_sol(a, 1, 10)
# end = time.time()
# total+= end-start
# print("Time for JPDA m=1, is %f"%(total/10))
# total = 0
# for i in range(10):
# start = time.time()
# ass = linear_assignment(a)
# end = time.time()
# total+= end-start
# print("Time for Hungarian, is %f"%(total/10))
np.random.seed(14295)
# Check JPDA matches Hungarian
# while True:
# print('*******')
# a = np.random.randn(100,100)
# X, _ = new_m_best_sol(a, 1, 10)
# X = np.reshape(X[0], (100,101))[:,1:]
# ass = linear_assignment(a)
# output_hungarian = np.zeros(a.shape)
# output_hungarian[ass[:,0], ass[:, 1]] = 1
# assert(np.all(output_hungarian==X))
#
# Output to file to check
# np.random.seed(14295)
# vals = []
# a = np.random.randn(5,5)
a = np.array([[0.1,0.6,0.2,0.3],[0.4,0.1,0.9,0.4],[0.3,0.5,0.1,0.7],[0.8,0.2,0.2,0.1]])
num_solutions(a)
# enumerate_solutions(a.shape[0], a.shape[1]+1)
# ass = linear_assignment_wrapper(a)
# m = Model()
sols, vals = new_m_best_sol(a, 100, 10)
for i, val in enumerate(vals):
print(np.reshape(sols[i], (4,5)), val)
# print(np.reshape(sols[1], (4,5)), vals[1])
# print(np.reshape(sols[2], (4,5)), vals[2])
# print(np.reshape(sols[3], (4,5)), vals[3])
# with open('test.pkl', 'wb') as f:
# pickle.dump(vals, f)
================================================
FILE: src/nn_matching.py
================================================
# vim: expandtab:ts=4:sw=4
import numpy as np
import pdb
import torch
def _pdist(a, b):
"""Compute pair-wise squared distance between points in `a` and `b`.
Parameters
----------
a : array_like
An NxM matrix of N samples of dimensionality M.
b : array_like
An LxM matrix of L samples of dimensionality M.
Returns
-------
ndarray
Returns a matrix of size len(a), len(b) such that eleement (i, j)
contains the squared distance between `a[i]` and `b[j]`.
"""
a, b = np.asarray(a), np.asarray(b)
if len(a) == 0 or len(b) == 0:
return np.zeros((len(a), len(b)))
a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1)
r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :]
r2 = np.clip(r2, 0., float(np.inf))
return r2
def _cosine_distance(a, b, data_is_normalized=False):
"""Compute pair-wise cosine distance between points in `a` and `b`.
Parameters
----------
a : array_like
An NxM matrix of N samples of dimensionality M.
b : array_like
An LxM matrix of L samples of dimensionality M.
data_is_normalized : Optional[bool]
If True, assumes rows in a and b are unit length vectors.
Otherwise, a and b are explicitly normalized to lenght 1.
Returns
-------
ndarray
Returns a matrix of size len(a), len(b) such that eleement (i, j)
contains the squared distance between `a[i]` and `b[j]`.
"""
if not data_is_normalized:
a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True)
b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True)
return 1. - np.dot(a, b.T)
def _cosine_distance_torch(a, b, data_is_normalized=False):
'''
_cosine_distance but torched
'''
if not data_is_normalized:
a = a / torch.norm(a, dim=1, keepdim=True)
b = b / torch.norm(b, dim=1, keepdim=True)
return 1. - torch.matmul(a, torch.transpose(b,0,1))
def _nn_euclidean_distance(x, y):
""" Helper function for nearest neighbor distance metric (Euclidean).
Parameters
----------
x : ndarray
A matrix of N row-vectors (sample points).
y : ndarray
A matrix of M row-vectors (query points).
Returns
-------
ndarray
A vector of length M that contains for each entry in `y` the
smallest Euclidean distance to a sample in `x`.
"""
distances = _pdist(x, y)
return np.maximum(0.0, distances.min(axis=0))
def _nn_euclidean_distance_torch(x, y):
""" Helper function for nearest neighbor distance metric (Euclidean).
Parameters
----------
x : ndarray
A matrix of N row-vectors (sample points).
y : ndarray
A matrix of M row-vectors (query points).
Returns
-------
ndarray
A vector of length M that contains for each entry in `y` the
smallest Euclidean distance to a sample in `x`.
"""
# x = x/((x*x).sum(1, keepdim = True)).sqrt()
# y = y/((y*y).sum(1, keepdim = True)).sqrt()
sim = (x.unsqueeze(1) - y.unsqueeze(0)).pow(2).sum(2).sqrt()
# sim = sim.exp()
# sim = (sim - 1)/(sim + 1)
sim = torch.min(sim, 0)[0]
return sim
def _nn_cosine_distance(x, y):
""" Helper function for nearest neighbor distance metric (cosine).
Parameters
----------
x : ndarray
A matrix of N row-vectors (sample points).
y : ndarray
A matrix of M row-vectors (query points).
Returns
-------
ndarray
A vector of length M that contains for each entry in `y` the
smallest cosine distance to a sample in `x`.
"""
distances = _cosine_distance(x, y)
return distances.min(axis=0)
def _nn_cosine_distance_torch(x,y):
'''
Same as _nn_cosine_distance except torched
'''
distances = _cosine_distance_torch(x,y)
return torch.min(distances, 0)[0]
class NearestNeighborDistanceMetric(object):
"""
A nearest neighbor distance metric that, for each target, returns
the closest distance to any sample that has been observed so far.
Parameters
----------
metric : str
Either "euclidean" or "cosine".
matching_threshold: float
The matching threshold. Samples with larger distance are considered an
invalid match.
budget : Optional[int]
If not None, fix samples per class to at most this number. Removes
the oldest samples when the budget is reached.
Attributes
----------
samples : Dict[int -> List[ndarray]]
A dictionary that maps from target identities to the list of samples
that have been observed so far.
"""
def __init__(self, metric, budget=None):
if metric == "euclidean":
self._metric = _nn_euclidean_distance
self._metric_torch = _nn_euclidean_distance_torch
elif metric == "cosine":
self._metric = _nn_cosine_distance
self._metric_torch = _nn_cosine_distance_torch
else:
raise ValueError(
"Invalid metric; must be either 'euclidean' or 'cosine'")
self.budget = budget
self.samples = {}
self.samples_2d = {}
def partial_fit(self, features, features_2d, targets, targets_2d, active_targets):
"""Update the distance metric with new data.
Parameters
----------
features : ndarray
An NxM matrix of N features of dimensionality M.
targets : ndarray
An integer array of associated target identities.
active_targets : List[int]
A list of targets that are currently present in the scene.
"""
for feature, target in zip(features, targets):
if feature is not None:
self.samples.setdefault(target, []).append(feature)
else:
self.samples.setdefault(target, [])
if self.budget is not None:
self.samples[target] = self.samples[target][-self.budget:]
self.samples = {k: self.samples[k] for k in active_targets if k in targets}
for target in active_targets:
self.samples.setdefault(target, [])
for feature_2d, target in zip(features_2d, targets_2d):
self.samples_2d.setdefault(target, []).append(feature_2d)
if self.budget is not None:
self.samples_2d[target] = self.samples_2d[target][-self.budget:]
self.samples_2d = {k: self.samples_2d[k] for k in active_targets}
def distance(self, features, targets, compare_2d=False):
"""Compute distance between features and targets.
Parameters
----------
features : ndarray
An NxM matrix of N features of dimensionality M.
targets : List[int]
A list of targets to match the given `features` against.
Returns
-------
ndarray
Returns a cost matrix of shape len(targets), len(features), where
element (i, j) contains the closest squared distance between
`targets[i]` and `features[j]`.
"""
cost_matrix = np.zeros((len(targets), len(features)))
for i, target in enumerate(targets):
if compare_2d:
cost_matrix[i, :] = self._metric(self.samples_2d[target], features)
else:
cost_matrix[i, :] = self._metric(self.samples[target], features)
return cost_matrix
def distance_torch(self, features, targets, compare_2d=False):
'''
Same as distance except torched.
'''
# features = torch.from_numpy(features).cuda()
cost_matrix = torch.zeros(len(targets), len(features)).to('cuda:0')
for i, target in enumerate(targets):
if compare_2d:
cost_matrix[i, :] = self._metric_torch(torch.stack(self.samples_2d[target], dim=0), features)
else:
cost_matrix[i, :] = self._metric_torch(torch.stack(self.samples[target], dim=0), features)
return cost_matrix.cpu().numpy()
def check_samples(self, targets):
for target in targets:
if len(self.samples[target]) == 0:
return True
return False
================================================
FILE: src/pointnet_model.py
================================================
import os, pdb
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)
import configparser
from utils.pointnet_transform_nets import input_transform_net, feature_transform_net
import utils.pointnet_tf_util as pointnet_tf_util
class PointNet():
def __init__(self, config_path):
parser = configparser.SafeConfigParser()
parser.read(config_path)
num_points = parser.getint('general', 'num_point')
depth_model_path = parser.get('general', 'depth_model_path')
with tf.device('/gpu:'+str(0)):
self.pointclouds_pl, _ = self.placeholder_inputs(1, num_points)
self.is_training_pl = tf.placeholder(tf.bool, shape=())
# simple model
feature = self.get_model(self.pointclouds_pl, self.is_training_pl)
self.feature = feature
# Add ops to save and restore all the variables.
self.saver = tf.train.Saver()
#Create session
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.allow_soft_placement = True
config.log_device_placement = False
self.sess = tf.Session(config=config)
#Initialize variables
self.sess.run(tf.global_variables_initializer())
#Restore model weights
self.saver.restore(self.sess, depth_model_path)
def __call__(self, input_point_cloud):
feed_dict = {self.pointclouds_pl: input_point_cloud,
self.is_training_pl: False}
features = self.sess.run(self.feature,feed_dict=feed_dict)
return features
def placeholder_inputs(self, batch_size, num_point):
pointclouds_pl = tf.placeholder(tf.float32, shape=(batch_size, None, 3))
labels_pl = tf.placeholder(tf.int32, shape=(batch_size))
return pointclouds_pl, labels_pl
def get_model(self, point_cloud, is_training, bn_decay=None):
""" Classification PointNet, input is BxNx3, output Bx40 """
batch_size = point_cloud.get_shape()[0].value
end_points = {}
with tf.variable_scope('transform_net1', reuse=tf.AUTO_REUSE) as sc:
transform = input_transform_net(point_cloud, is_training, bn_decay, K=3)
point_cloud_transformed = tf.matmul(point_cloud, transform)
input_image = tf.expand_dims(point_cloud_transformed, -1)
net = pointnet_tf_util.conv2d(input_image, 64, [1,3],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv1', bn_decay=bn_decay)
net = pointnet_tf_util.conv2d(net, 64, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv2', bn_decay=bn_decay)
with tf.variable_scope('transform_net2', reuse=tf.AUTO_REUSE) as sc:
transform = feature_transform_net(net, is_training, bn_decay, K=64)
end_points['transform'] = transform
net_transformed = tf.matmul(tf.squeeze(net, axis=[2]), transform)
net_transformed = tf.expand_dims(net_transformed, [2])
net = pointnet_tf_util.conv2d(net_transformed, 64, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv3', bn_decay=bn_decay)
net = pointnet_tf_util.conv2d(net, 128, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv4', bn_decay=bn_decay)
net = pointnet_tf_util.conv2d(net, 1024, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv5', bn_decay=bn_decay)
# Symmetric function: max pooling
net = tf.reduce_max(net, axis = 1)
net = tf.reshape(net, [batch_size, -1])
feature = net
return feature
def get_loss(self, pred, label, end_points, reg_weight=0.001):
""" pred: B*NUM_CLASSES,
label: B, """
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=pred, labels=label)
classify_loss = tf.reduce_mean(loss)
tf.summary.scalar('classify loss', classify_loss)
# Enforce the transformation as orthogonal matrix
transform = end_points['transform'] # BxKxK
K = transform.get_shape()[1].value
mat_diff = tf.matmul(transform, tf.transpose(transform, perm=[0,2,1]))
mat_diff = mat_diff - tf.constant(np.eye(K), dtype=tf.float32)
mat_diff_loss = tf.nn.l2_loss(mat_diff)
tf.summary.scalar('mat loss', mat_diff_loss)
return classify_loss + mat_diff_loss * reg_weight
================================================
FILE: src/template 2.py
================================================
#!/home/sibot/anaconda2/bin/python
""" yolo_bbox_to_sort.py
Subscribe to the Yolo 2 bboxes, and publish the detections with a 2d appearance feature used for reidentification
"""
import time
import rospy
import sys
import torch
import numpy as np
import os
from std_msgs.msg import Int8
import message_filters
from sensor_msgs.msg import Image
from darknet_ros_msgs.msg import BoundingBoxes, BoundingBox
from cv_bridge import CvBridge, CvBridgeError
from aligned_reid_utils import get_image_patches, generate_features, create_appearance_model
from jpda_rospack.msg import detection2d_with_feature_array, detection2d_with_feature
class Appearance_Features:
def __init__(self):
self.node_name = "aligned_reid_feature_generator"
rospy.init_node(self.node_name)
rospy.on_shutdown(self.cleanup)
apperance_model_ckpt = rospy.get_param('~aligned_reid_model', 'src/jpda_rospack/src/aligned_reid_MOT_weights.pth')
self.appearance_model = create_appearance_model(apperance_model_ckpt)
self.image_sub = message_filters.Subscriber("/ros_indigosdk_node/stitched_image0", Image, queue_size=2)
self.yolo_bbox_sub = message_filters.Subscriber("/omni_yolo_bboxes", BoundingBoxes, queue_size=2)
self.time_sync = message_filters.ApproximateTimeSynchronizer([self.yolo_bbox_sub, self.image_sub], 5, 0.1)
self.time_sync.registerCallback(self.get_2d_feature)
self.cv_bridge = CvBridge()
self.feature_2d_pub = rospy.Publisher("detection2d_with_feature", detection2d_with_feature_array, queue_size=1)
self.debug_pub = rospy.Publisher("/test", Int8, queue_size=1)
rospy.loginfo("Ready.")
def get_2d_feature(self, y1_bboxes, ros_image):
# rospy.loginfo('Processing Image with AlignedReID')
start = time.time()
try:
input_image = self.cv_bridge.imgmsg_to_cv2(ros_image, "bgr8")
except CvBridgeError as e:
print(e)
input_img = torch.from_numpy(input_image).float()
input_img = input_img.to('cuda:1')
input_img = input_img.permute(2, 0, 1)/255
# Generate 2D image feaures for each bounding box
detections = []
frame_det_ids = []
count = 0
for y1_bbox in y1_bboxes.bounding_boxes:
if y1_bbox.Class == 'person':
xmin = y1_bbox.xmin
xmax = y1_bbox.xmax
ymin = y1_bbox.ymin
ymax = y1_bbox.ymax
probability = y1_bbox.probability
frame_det_ids.append(count)
count += 1
detections.append([int(xmin), int(ymin), int(xmax), int(ymax), probability, -1, -1])
features_2d = detection2d_with_feature_array()
features_2d.header.stamp = y1_bboxes.header.stamp
features_2d.header.frame_id = 'occam'
if not detections:
self.feature_2d_pub.publish(features_2d)
return
image_patches = get_image_patches(input_img, detections)
features = generate_features(self.appearance_model, image_patches)
for (det, feature, i) in zip(detections, features, frame_det_ids):
det_msg = detection2d_with_feature()
det_msg.header.stamp = features_2d.header.stamp
det_msg.x1 = det[0]
det_msg.y1 = det[1]
det_msg.x2 = det[2]
det_msg.y2 = det[3]
det_msg.feature = feature
det_msg.valid = True
det_msg.frame_det_id = i
features_2d.detection2d_with_features.append(det_msg)
self.feature_2d_pub.publish(features_2d)
# rospy.loginfo("Aligned_ReID time: {}".format(time.time() - start))
def cleanup(self):
print("Shutting down 2D-Appearance node.")
def main(args):
try:
Appearance_Features()
rospy.spin()
except KeyboardInterrupt:
print("Shutting down 2D-Appearance node.")
if __name__ == '__main__':
main(sys.argv)
================================================
FILE: src/template.py
================================================
#!/home/sibot/anaconda2/bin/python
""" yolo_bbox_to_sort.py
Subscribe to the Yolo 2 bboxes, and publish the detections with a 2d appearance feature used for reidentification
"""
import time
import rospy
import sys
import torch
import numpy as np
import os
from std_msgs.msg import Int8
import message_filters
from sensor_msgs.msg import Image
from darknet_ros_msgs.msg import BoundingBoxes, BoundingBox
from cv_bridge import CvBridge, CvBridgeError
from aligned_reid_utils import get_image_patches, generate_features, create_appearance_model
from jpda_rospack.msg import detection2d_with_feature_array, detection2d_with_feature
class Appearance_Features:
def __init__(self):
self.node_name = "aligned_reid_feature_generator"
rospy.init_node(self.node_name)
rospy.on_shutdown(self.cleanup)
apperance_model_ckpt = rospy.get_param('~aligned_reid_model', 'src/jpda_rospack/src/aligned_reid_MOT_weights.pth')
self.appearance_model = create_appearance_model(apperance_model_ckpt)
self.image_sub = message_filters.Subscriber("/ros_indigosdk_node/stitched_image0", Image, queue_size=2)
self.yolo_bbox_sub = message_filters.Subscriber("/omni_yolo_bboxes", BoundingBoxes, queue_size=2)
self.time_sync = message_filters.ApproximateTimeSynchronizer([self.yolo_bbox_sub, self.image_sub], 5, 0.1)
self.time_sync.registerCallback(self.get_2d_feature)
self.cv_bridge = CvBridge()
self.feature_2d_pub = rospy.Publisher("detection2d_with_feature", detection2d_with_feature_array, queue_size=1)
self.debug_pub = rospy.Publisher("/test", Int8, queue_size=1)
rospy.loginfo("Ready.")
def get_2d_feature(self, y1_bboxes, ros_image):
# rospy.loginfo('Processing Image with AlignedReID')
start = time.time()
try:
input_image = self.cv_bridge.imgmsg_to_cv2(ros_image, "bgr8")
except CvBridgeError as e:
print(e)
input_img = torch.from_numpy(input_image).float()
input_img = input_img.to('cuda:1')
input_img = input_img.permute(2, 0, 1)/255
# Generate 2D image feaures for each bounding box
detections = []
frame_det_ids = []
count = 0
for y1_bbox in y1_bboxes.bounding_boxes:
if y1_bbox.Class == 'person':
xmin = y1_bbox.xmin
xmax = y1_bbox.xmax
ymin = y1_bbox.ymin
ymax = y1_bbox.ymax
probability = y1_bbox.probability
frame_det_ids.append(count)
count += 1
detections.append([int(xmin), int(ymin), int(xmax), int(ymax), probability, -1, -1])
features_2d = detection2d_with_feature_array()
features_2d.header.stamp = y1_bboxes.header.stamp
features_2d.header.frame_id = 'occam'
if not detections:
self.feature_2d_pub.publish(features_2d)
return
image_patches = get_image_patches(input_img, detections)
features = generate_features(self.appearance_model, image_patches)
for (det, feature, i) in zip(detections, features, frame_det_ids):
det_msg = detection2d_with_feature()
det_msg.header.stamp = features_2d.header.stamp
det_msg.x1 = det[0]
det_msg.y1 = det[1]
det_msg.x2 = det[2]
det_msg.y2 = det[3]
det_msg.feature = feature
det_msg.valid = True
det_msg.frame_det_id = i
features_2d.detection2d_with_features.append(det_msg)
self.feature_2d_pub.publish(features_2d)
# rospy.loginfo("Aligned_ReID time: {}".format(time.time() - start))
def cleanup(self):
print("Shutting down 2D-Appearance node.")
def main(args):
try:
Appearance_Features()
rospy.spin()
except KeyboardInterrupt:
print("Shutting down 2D-Appearance node.")
if __name__ == '__main__':
main(sys.argv)
================================================
FILE: src/track_3d 2.py
================================================
# vim: expandtab:ts=4:sw=4
import numpy as np
import pdb
import torch
class TrackState:
"""
Enumeration type for the single target track state. Newly created tracks are
classified as `tentative` until enough evidence has been collected. Then,
the track state is changed to `confirmed`. Tracks that are no longer alive
are classified as `deleted` to mark them for removal from the set of active
tracks.
"""
Tentative = 1
Confirmed = 2
Deleted = 3
class Track_3d:
"""
A single target track with state space `(x, y, a, h)` and associated
velocities, where `(x, y)` is the center of the bounding box, `a` is the
aspect ratio and `h` is the height.
Parameters
----------
mean : ndarray
Mean vector of the initial state distribution.
covariance : ndarray
Covariance matrix of the initial state distribution.
track_id : int
A unique track identifier.
n_init : int
Number of consecutive detections before the track is confirmed. The
track state is set to `Deleted` if a miss occurs within the first
`n_init` frames.
max_age : int
The maximum number of consecutive misses before the track state is
set to `Deleted`.
feature : Optional[ndarray]
Feature vector of the detection this track originates from. If not None,
this feature is added to the `features` cache.
Attributes
----------
mean : ndarray
Mean vector of the initial state distribution.
covariance : ndarray
Covariance matrix of the initial state distribution.
track_id : int
A unique track identifier.
hits : int
Total number of measurement updates.
age : int
Total number of frames since first occurance.
time_since_update : int
Total number of frames since last measurement update.
state : TrackState
The current track state.
features : List[ndarray]
A cache of features. On each measurement update, the associated feature
vector is added to this list.
"""
def __init__(self, mean, covariance, track_id, n_init, max_age,
feature=None, appearance_feature = None, cuda = False, lstm = None):
self.mean = mean
self.covariance = covariance
self.track_id = track_id
self.hits = 1
self.age = 1
self.time_since_update = 0
self.tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
self.cuda = cuda
self.state = TrackState.Tentative
self.features = []
self.features_2d = []
self.hidden = None
if lstm is None:
self.features.append(feature)
self.features_2d.append(appearance_feature)
else:
self.feature_update(feature, appearance_feature, lstm)
self.first_detection = mean[:7]
self._n_init = n_init
if self.state == TrackState.Tentative and self.hits >= self._n_init:
self.state = TrackState.Confirmed
self._max_age = max_age
self.matched = True
self.exiting = False
self.last_box = None
def to_tlwh3d(self):
"""Get current position in bounding box format `(box center of bottom face [x, y, z], l, w, h)`.
Returns
-------
ndarray
The bounding box.
"""
if self.last_box is not None:
return self.last_box.box_3d
else:
return self.mean[[0,1,2,3,4,5,6]].copy()
def to_tlwh(self, kf):
"""Get current position in bounding box format `(box center of bottom face [x, y, z], l, w, h)`.
Returns
-------
ndarray
The bounding box.
"""
corner_points, _ = kf.calculate_corners(self.mean)
min_x, min_y = np.amin(corner_points, axis = 0)[:2]
max_x, max_y = np.amax(corner_points, axis = 0)[:2]
ret = np.array([min_x, min_y, max_x - min_x, max_y - min_y])
return ret
def predict(self, kf):
"""Propagate the state distribution to the current time step using a
Kalman filter prediction step.
Parameters
----------
kf : kalman_filter.KalmanFilter
The Kalman filter.
"""
self.mean, self.covariance = kf.predict(self.mean, self.covariance)
self.age += 1
self.time_since_update += 1
# @profile
def update(self, kf, detection, compare_2d=False,
marginalization=None, detection_idx=None, JPDA=False, lstm = None):
"""Perform Kalman filter measurement update step and update the feature
cache.
Parameters
----------
kf : kalman_filter.KalmanFilter
The Kalman filter.
detection : Detection
The associated detection.
"""
if JPDA:
detections_2d = [det.tlwh for det in detection]
if compare_2d:
detections_3d = None
else:
detections_3d = [np.copy(det.box_3d) for det in detection]
for det in detections_3d:
if det[6] - self.mean[6] > np.pi:
det[6] -= 2 * np.pi
elif det[6] - self.mean[6] < -np.pi:
det[6] += 2*np.pi
self.mean, self.covariance, self.mean_post_3d = kf.update(
self.mean, self.covariance, detections_2d, detections_3d, marginalization, JPDA)
self.mean[6] = self.mean[6] % (2 * np.pi)
self.feature_update(detection, detection_idx, lstm)
if np.argmax(marginalization) != 0:
self.matched=True
else:
self.matched=False
if detection_idx < 0:
self.last_box = None
return
self.hits += 1
self.time_since_update = 0
detection = detection[detection_idx]
self.last_box = detection
else:
detection = detection[detection_idx]
detections_3d = detections_3d[detection_idx]
self.mean, self.covariance = kf.update(
self.mean, self.covariance, detection.tlwh, detections_3d)
if self.state == TrackState.Tentative and self.hits >= self._n_init:
self.state = TrackState.Confirmed
def mark_missed(self):
"""Mark this track as missed (no association at the current time step).
"""
if self.state == TrackState.Tentative:
self.state = TrackState.Deleted
elif self.time_since_update > self._max_age:
self.state = TrackState.Deleted
def is_tentative(self):
"""Returns True if this track is tentative (unconfirmed).
"""
return self.state == TrackState.Tentative
def is_confirmed(self):
"""Returns True if this track is confirmed."""
return self.state == TrackState.Confirmed
def is_deleted(self):
"""Returns True if this track is dead and should be deleted."""
return self.state == TrackState.Deleted
def feature_update(self, detections, detection_idx, lstm, JPDA=False, marginalization=None):
if JPDA:
features=[d.feature for d in detections]
appearance_features=[d.appearance_feature for d in detections]
if len([i for i in features if i is None])==0:
combined_feature=np.sum(np.array(features).reshape(len(features), -1)
*marginalization[1:].reshape(-1, 1), axis=0).astype(np.float32)
self.features.append(combined_feature)
if len([i for i in appearance_features if i is None])==0:
combined_feature=np.sum(
np.array(appearance_features).reshape(len(appearance_features), -1)
*marginalization[1:].reshape(-1, 1), axis=0).astype(np.float32)
self.features_2d.append(combined_feature)
else:
feature = detections[detection_idx].feature
appearance_feature = detections[detection_idx].appearance_feature
if feature is not None:
if lstm is not None:
input_feature = torch.Tensor(feature).type(self.tensor)
input_feature = input_feature.unsqueeze(0)
with torch.no_grad():
if self.hidden is None:
output_feature, self.hidden = lstm(input_feature)
else:
output_feature, self.hidden = lstm(input_feature, self.hidden)
output_feature = output_feature.cpu().numpy().squeeze(0)
else:
output_feature = feature
self.features.append(output_feature)
if appearance_feature is not None:
self.features_2d.append(appearance_feature)
def get_cov(self):
xyz_cov = self.covariance[:3, :3]
theta_cov_1 = self.covariance[7, :3]
theta_cov_2 = self.covariance[7, 7]
out_cov = np.zeros((6, 6))
out_cov[:3,:3] = xyz_cov
out_cov[5, :3] = theta_cov_1
out_cov[:3, 5] = theta_cov_1
out_cov[5, 5] = theta_cov_2
return out_cov
================================================
FILE: src/track_3d.py
================================================
# vim: expandtab:ts=4:sw=4
import numpy as np
import pdb
import torch
class TrackState:
"""
Enumeration type for the single target track state. Newly created tracks are
classified as `tentative` until enough evidence has been collected. Then,
the track state is changed to `confirmed`. Tracks that are no longer alive
are classified as `deleted` to mark them for removal from the set of active
tracks.
"""
Tentative = 1
Confirmed = 2
Deleted = 3
class Track_3d:
"""
A single target track with state space `(x, y, a, h)` and associated
velocities, where `(x, y)` is the center of the bounding box, `a` is the
aspect ratio and `h` is the height.
Parameters
----------
mean : ndarray
Mean vector of the initial state distribution.
covariance : ndarray
Covariance matrix of the initial state distribution.
track_id : int
A unique track identifier.
n_init : int
Number of consecutive detections before the track is confirmed. The
track state is set to `Deleted` if a miss occurs within the first
`n_init` frames.
max_age : int
The maximum number of consecutive misses before the track state is
set to `Deleted`.
feature : Optional[ndarray]
Feature vector of the detection this track originates from. If not None,
this feature is added to the `features` cache.
Attributes
----------
mean : ndarray
Mean vector of the initial state distribution.
covariance : ndarray
Covariance matrix of the initial state distribution.
track_id : int
A unique track identifier.
hits : int
Total number of measurement updates.
age : int
Total number of frames since first occurance.
time_since_update : int
Total number of frames since last measurement update.
state : TrackState
The current track state.
features : List[ndarray]
A cache of features. On each measurement update, the associated feature
vector is added to this list.
"""
def __init__(self, mean, covariance, track_id, n_init, max_age,
feature=None, appearance_feature = None, cuda = False, lstm = None):
self.mean = mean
self.covariance = covariance
self.track_id = track_id
self.hits = 1
self.age = 1
self.time_since_update = 0
self.tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
self.cuda = cuda
self.state = TrackState.Tentative
self.features = []
self.features_2d = []
self.hidden = None
if lstm is None:
self.features.append(feature)
self.features_2d.append(appearance_feature)
else:
self.feature_update(feature, appearance_feature, lstm)
self.first_detection = mean[:7]
self._n_init = n_init
if self.state == TrackState.Tentative and self.hits >= self._n_init:
self.state = TrackState.Confirmed
self._max_age = max_age
self.matched = True
self.exiting = False
self.last_box = None
def to_tlwh3d(self):
"""Get current position in bounding box format `(box center of bottom face [x, y, z], l, w, h)`.
Returns
-------
ndarray
The bounding box.
"""
if self.last_box is not None:
return self.last_box.box_3d
else:
return self.mean[[0,1,2,3,4,5,6]].copy()
def to_tlwh(self, kf):
"""Get current position in bounding box format `(box center of bottom face [x, y, z], l, w, h)`.
Returns
-------
ndarray
The bounding box.
"""
corner_points, _ = kf.calculate_corners(self.mean)
min_x, min_y = np.amin(corner_points, axis = 0)[:2]
max_x, max_y = np.amax(corner_points, axis = 0)[:2]
ret = np.array([min_x, min_y, max_x - min_x, max_y - min_y])
return ret
def predict(self, kf):
"""Propagate the state distribution to the current time step using a
Kalman filter prediction step.
Parameters
----------
kf : kalman_filter.KalmanFilter
The Kalman filter.
"""
self.mean, self.covariance = kf.predict(self.mean, self.covariance)
self.age += 1
self.time_since_update += 1
# @profile
def update(self, kf, detection, compare_2d=False,
marginalization=None, detection_idx=None, JPDA=False, lstm = None):
"""Perform Kalman filter measurement update step and update the feature
cache.
Parameters
----------
kf : kalman_filter.KalmanFilter
The Kalman filter.
detection : Detection
The associated detection.
"""
if JPDA:
detections_2d = [det.tlwh for det in detection]
if compare_2d:
detections_3d = None
else:
detections_3d = [np.copy(det.box_3d) for det in detection]
for det in detections_3d:
if det[6] - self.mean[6] > np.pi:
det[6] -= 2 * np.pi
elif det[6] - self.mean[6] < -np.pi:
det[6] += 2*np.pi
self.mean, self.covariance, self.mean_post_3d = kf.update(
self.mean, self.covariance, detections_2d, detections_3d, marginalization, JPDA)
self.mean[6] = self.mean[6] % (2 * np.pi)
self.feature_update(detection, detection_idx, lstm)
if np.argmax(marginalization) != 0:
self.matched=True
else:
self.matched=False
if detection_idx < 0:
self.last_box = None
return
self.hits += 1
self.time_since_update = 0
detection = detection[detection_idx]
self.last_box = detection
else:
detection = detection[detection_idx]
detections_3d = detections_3d[detection_idx]
self.mean, self.covariance = kf.update(
self.mean, self.covariance, detection.tlwh, detections_3d)
if self.state == TrackState.Tentative and self.hits >= self._n_init:
self.state = TrackState.Confirmed
def mark_missed(self):
"""Mark this track as missed (no association at the current time step).
"""
if self.state == TrackState.Tentative:
self.state = TrackState.Deleted
elif self.time_since_update > self._max_age:
self.state = TrackState.Deleted
def is_tentative(self):
"""Returns True if this track is tentative (unconfirmed).
"""
return self.state == TrackState.Tentative
def is_confirmed(self):
"""Returns True if this track is confirmed."""
return self.state == TrackState.Confirmed
def is_deleted(self):
"""Returns True if this track is dead and should be deleted."""
return self.state == TrackState.Deleted
def feature_update(self, detections, detection_idx, lstm, JPDA=False, marginalization=None):
if JPDA:
features=[d.feature for d in detections]
appearance_features=[d.appearance_feature for d in detections]
if len([i for i in features if i is None])==0:
combined_feature=np.sum(np.array(features).reshape(len(features), -1)
*marginalization[1:].reshape(-1, 1), axis=0).astype(np.float32)
self.features.append(combined_feature)
if len([i for i in appearance_features if i is None])==0:
combined_feature=np.sum(
np.array(appearance_features).reshape(len(appearance_features), -1)
*marginalization[1:].reshape(-1, 1), axis=0).astype(np.float32)
self.features_2d.append(combined_feature)
else:
feature = detections[detection_idx].feature
appearance_feature = detections[detection_idx].appearance_feature
if feature is not None:
if lstm is not None:
input_feature = torch.Tensor(feature).type(self.tensor)
input_feature = input_feature.unsqueeze(0)
with torch.no_grad():
if self.hidden is None:
output_feature, self.hidden = lstm(input_feature)
else:
output_feature, self.hidden = lstm(input_feature, self.hidden)
output_feature = output_feature.cpu().numpy().squeeze(0)
else:
output_feature = feature
self.features.append(output_feature)
if appearance_feature is not None:
self.features_2d.append(appearance_feature)
def get_cov(self):
xyz_cov = self.covariance[:3, :3]
theta_cov_1 = self.covariance[7, :3]
theta_cov_2 = self.covariance[7, 7]
out_cov = np.zeros((6, 6))
out_cov[:3,:3] = xyz_cov
out_cov[5, :3] = theta_cov_1
out_cov[:3, 5] = theta_cov_1
out_cov[5, 5] = theta_cov_2
return out_cov
================================================
FILE: src/tracker_3d 2.py
================================================
# vim: expandtab:ts=4:sw=4
from __future__ import absolute_import
import numpy as np
import pdb
import double_measurement_kf
import linear_assignment
import iou_matching
from track_3d import Track_3d
import JPDA_matching
import tracking_utils
import math
import torch
from nn_matching import NearestNeighborDistanceMetric
class Tracker_3d:
"""
This is the multi-target tracker.
Parameters
----------
metric : nn_matching.NearestNeighborDistanceMetric
A distance metric for measurement-to-track association.
max_age : int
Maximum number of missed misses before a track is deleted.
n_init : int
Number of consecutive detections before the track is confirmed. The
track state is set to `Deleted` if a miss occurs within the first
`n_init` frames.
Attributes
----------
metric : nn_matching.NearestNeighborDistanceMetric
The distance metric used for measurement to track association.
max_age : int
Maximum number of missed misses before a track is deleted.
n_init : int
Number of frames that a track remains in initialization phase.
kf : EKF.KalmanFilter
A Kalman filter to filter target trajectories in image space.
tracks : List[Track]
The list of active tracks at the current time step.
"""
def __init__(self, max_age=30, n_init=3,
JPDA=False, m_best_sol=1, assn_thresh=0.0,
matching_strategy=None, appearance_model = None,
gate_full_state=False, lstm = None, cuda = False, calib=None, omni=False,
kf_vel_params=(1./20, 1./160, 1, 1, 2), dummy_node_cost_iou=0.4, dummy_node_cost_app=0.2, nn_budget = None, use_imm=False,
markov=(0.9, 0.7), uncertainty_limit=1.8, optical_flow=False, gate_limit=400, dummy_node_cost_iou_2d=0.5):
self.metric = NearestNeighborDistanceMetric("euclidean", nn_budget)
self.max_age = max_age
self.n_init = n_init
self.kf = double_measurement_kf.KF_3D(calib, *kf_vel_params, omni=omni)
self.tracks = []
self._next_id = 1
self.JPDA = JPDA
self.m_best_sol = m_best_sol
self.assn_thresh = assn_thresh
self.matching_strategy = matching_strategy
self.gate_only_position = not gate_full_state
self.lstm = lstm
self.cuda = cuda
self.dummy_node_cost_app = dummy_node_cost_app
self.dummy_node_cost_iou = dummy_node_cost_iou
self.dummy_node_cost_iou_2d = dummy_node_cost_iou_2d
self.appearance_model = appearance_model
# @profile
def gated_metric(self, tracks, dets, track_indices, detection_indices, compare_2d=None):
targets = np.array([tracks[i].track_id for i in track_indices])
if not compare_2d and self.metric.check_samples(targets):
compare_2d = True
if compare_2d:
features = torch.stack([dets[i].appearance_feature for i in detection_indices], dim=0)
else:
features = torch.stack([dets[i].feature for i in detection_indices], dim=0)
#cost_matrix = self.metric.distance(features, targets, compare_2d)
cost_matrix_appearance = self.metric.distance_torch(features, targets, compare_2d)
use_3d = not compare_2d
# for i in detection_indices:
# if dets[i].box_3d is None:
# use_3d = False
# break
if use_3d:
cost_matrix_iou = iou_matching.iou_cost(tracks, dets, track_indices, detection_indices, use3d=use_3d)
else:
cost_matrix_iou = iou_matching.iou_cost(tracks, dets, track_indices, detection_indices, use3d=use_3d, kf=self.kf)
dets_for_gating = dets
gate_mask = linear_assignment.gate_cost_matrix(
self.kf, tracks, dets_for_gating, track_indices,
detection_indices, only_position=self.gate_only_position, use3d=use_3d)
cost_matrix = np.dstack((cost_matrix_appearance, cost_matrix_iou))
return cost_matrix, gate_mask
def predict(self):
"""Propagate track state distributions one time step forward.
This function should be called once every time step, before `update`.
"""
for track in self.tracks:
track.predict(self.kf)
# @profile
def update(self, input_img, detections):
"""Perform measurement update and track management.
Parameters
----------
detections : List[deep_sort.detection.Detection]
A list of detections at the current time step.
"""
matches, unmatched_tracks, unmatched_detections = \
self._match(detections)
# update filter for each assigned track
# Only do this for non-JPDA because in JPDA the kf states are updated
# during the matching process
# update track state for unmatched tracks
for track_idx in unmatched_tracks:
self.tracks[track_idx].mark_missed()
self.prune_tracks()
# create new tracks
for detection_idx in unmatched_detections:
self._initiate_track(detections[detection_idx])
# Update distance metric.
active_targets = [t.track_id for t in self.tracks]
features, features_2d, targets, targets_2d = [], [], [], []
for track in self.tracks:
features += track.features
features_2d += track.features_2d
targets += [track.track_id for _ in track.features]
targets_2d += [track.track_id for _ in track.features_2d]
track.features = []
track.features_2d = []
self.metric.partial_fit(
features, features_2d, targets, targets_2d, active_targets)
# @profile
def _match(self, detections):
# Associate confirmed tracks using appearance features.
if self.JPDA:
# Only run JPDA on confirmed tracks
det_3d_idx = []
det_2d_idx = []
for idx, det in enumerate(detections):
if det.box_3d is not None:
det_3d_idx.append(idx)
else:
det_2d_idx.append(idx)
marginalizations = \
linear_assignment.JPDA(self.gated_metric, self.dummy_node_cost_app,
self.dummy_node_cost_iou, self.tracks, \
detections, compare_2d=False,
detection_indices=det_3d_idx)
#print(marginalizations)
dets_matching_3d = [d for i, d in enumerate(detections) if i in det_3d_idx]
jpda_matcher = JPDA_matching.Matcher(
detections, marginalizations, range(len(self.tracks)),
self.matching_strategy, assignment_threshold=self.assn_thresh)
matches_a, unmatched_tracks_a, unmatched_detections = jpda_matcher.match()
# Map matched tracks to detections
track_detection_map = {t:d for (t,d) in matches_a}
# Map unmatched tracks to -1 for no detection
for t in unmatched_tracks_a:
track_detection_map[t] = -1
if det_2d_idx:
marginalizations_2d = \
linear_assignment.JPDA(self.gated_metric, self.dummy_node_cost_app, self.dummy_node_cost_iou_2d, self.tracks, \
detections, compare_2d=True, detection_indices=det_2d_idx, track_indices=unmatched_tracks_a)
dets_matching_2d = [d for i, d in enumerate(detections) if i in det_2d_idx]
jpda_matcher = JPDA_matching.Matcher(
dets_matching_2d, marginalizations_2d, range(len(unmatched_tracks_a)),
self.matching_strategy, assignment_threshold=self.assn_thresh)
matches_a, unmatched_tracks_2d, unmatched_detections = jpda_matcher.match()
track_detection_map_2d = {unmatched_tracks_a[t]:d for (t,d) in matches_a}
# Map unmatched tracks to -1 for no detection
for t in unmatched_tracks_2d:
track_detection_map_2d[unmatched_tracks_a[t]] = -1
# udpate Kalman state
if marginalizations.shape[0] > 0:
for i in range(len(self.tracks)):
if det_2d_idx and i in unmatched_tracks_a:
self.tracks[i].update(self.kf, dets_matching_2d,
marginalization=marginalizations_2d[unmatched_tracks_a.index(i),:], detection_idx=track_detection_map_2d[i],
JPDA=self.JPDA, lstm = self.lstm, compare_2d=True)
else:
self.tracks[i].update(self.kf, dets_matching_3d,
marginalization=marginalizations[i,:], detection_idx=track_detection_map[i],
JPDA=self.JPDA, lstm = self.lstm)
else:
matches_a, unmatched_tracks_a, unmatched_detections = \
linear_assignment.matching_cascade(
self.gated_metric, self.metric.matching_threshold, self.max_age,
self.tracks, detections, confirmed_tracks, compare_2d = compare_2d, detections_3d=detections_3d)
return matches_a, unmatched_tracks_a, unmatched_detections
def _initiate_track(self, detection):
if detection.box_3d is None:
return
mean, covariance = self.kf.initiate(detection.box_3d)
self.tracks.append(Track_3d(
mean, covariance, self._next_id, self.n_init, self.max_age,
feature=detection.feature, appearance_feature = detection.appearance_feature,
cuda = self.cuda, lstm = self.lstm))
self._next_id += 1
def prune_tracks(self):
# for track in self.tracks:
# # Check if track is leaving
# predicted_mean = self.kf.predict_mean(track.mean)
# predicted_cov = track.covariance
# predicted_pos = predicted_mean[:2]
# predicted_vel = predicted_mean[4:6]
# predicted_pos[0] -= w/2
# predicted_pos[1] -= h/2
# cos_theta = np.dot(predicted_pos, predicted_vel)/(np.linalg.norm(predicted_pos)*
# np.linalg.norm(predicted_vel) + 1e-6)
# predicted_pos[0] += w/2
# predicted_pos[1] += h/2
# # Thresholds for deciding whether track is outside image
# BORDER_VALUE = 0
# if (cos_theta > 0 and
# (predicted_pos[0] - track.mean[2]/2<= BORDER_VALUE or
# predicted_pos[0] + track.mean[2]/2 >= w - BORDER_VALUE)):
# if track.is_exiting() and not track.matched:
# track.delete_track()
# else:
# track.mark_exiting()
# Check if track is too uncertain
# cov_axis,_ = np.linalg.eigh(predicted_cov)
# if np.abs(np.sqrt(cov_axis[-1]))*6 > self.uncertainty_limit*np.linalg.norm(predicted_mean[2:4]):
# track.delete_track()
self.tracks = [t for t in self.tracks if not t.is_deleted()]
================================================
FILE: src/tracker_3d.py
================================================
# vim: expandtab:ts=4:sw=4
from __future__ import absolute_import
import numpy as np
import pdb
import double_measurement_kf
import linear_assignment
import iou_matching
from track_3d import Track_3d
import JPDA_matching
import tracking_utils
import math
import torch
from nn_matching import NearestNeighborDistanceMetric
class Tracker_3d:
"""
This is the multi-target tracker.
Parameters
----------
metric : nn_matching.NearestNeighborDistanceMetric
A distance metric for measurement-to-track association.
max_age : int
Maximum number of missed misses before a track is deleted.
n_init : int
Number of consecutive detections before the track is confirmed. The
track state is set to `Deleted` if a miss occurs within the first
`n_init` frames.
Attributes
----------
metric : nn_matching.NearestNeighborDistanceMetric
The distance metric used for measurement to track association.
max_age : int
Maximum number of missed misses before a track is deleted.
n_init : int
Number of frames that a track remains in initialization phase.
kf : EKF.KalmanFilter
A Kalman filter to filter target trajectories in image space.
tracks : List[Track]
The list of active tracks at the current time step.
"""
def __init__(self, max_age=30, n_init=3,
JPDA=False, m_best_sol=1, assn_thresh=0.0,
matching_strategy=None, appearance_model = None,
gate_full_state=False, lstm = None, cuda = False, calib=None, omni=False,
kf_vel_params=(1./20, 1./160, 1, 1, 2), dummy_node_cost_iou=0.4, dummy_node_cost_app=0.2, nn_budget = None, use_imm=False,
markov=(0.9, 0.7), uncertainty_limit=1.8, optical_flow=False, gate_limit=400, dummy_node_cost_iou_2d=0.5):
self.metric = NearestNeighborDistanceMetric("euclidean", nn_budget)
self.max_age = max_age
self.n_init = n_init
self.kf = double_measurement_kf.KF_3D(calib, *kf_vel_params, omni=omni)
self.tracks = []
self._next_id = 1
self.JPDA = JPDA
self.m_best_sol = m_best_sol
self.assn_thresh = assn_thresh
self.matching_strategy = matching_strategy
self.gate_only_position = not gate_full_state
self.lstm = lstm
self.cuda = cuda
self.dummy_node_cost_app = dummy_node_cost_app
self.dummy_node_cost_iou = dummy_node_cost_iou
self.dummy_node_cost_iou_2d = dummy_node_cost_iou_2d
self.appearance_model = appearance_model
# @profile
def gated_metric(self, tracks, dets, track_indices, detection_indices, compare_2d=None):
targets = np.array([tracks[i].track_id for i in track_indices])
if not compare_2d and self.metric.check_samples(targets):
compare_2d = True
if compare_2d:
features = torch.stack([dets[i].appearance_feature for i in detection_indices], dim=0)
else:
features = torch.stack([dets[i].feature for i in detection_indices], dim=0)
#cost_matrix = self.metric.distance(features, targets, compare_2d)
cost_matrix_appearance = self.metric.distance_torch(features, targets, compare_2d)
use_3d = not compare_2d
# for i in detection_indices:
# if dets[i].box_3d is None:
# use_3d = False
# break
if use_3d:
cost_matrix_iou = iou_matching.iou_cost(tracks, dets, track_indices, detection_indices, use3d=use_3d)
else:
cost_matrix_iou = iou_matching.iou_cost(tracks, dets, track_indices, detection_indices, use3d=use_3d, kf=self.kf)
dets_for_gating = dets
gate_mask = linear_assignment.gate_cost_matrix(
self.kf, tracks, dets_for_gating, track_indices,
detection_indices, only_position=self.gate_only_position, use3d=use_3d)
cost_matrix = np.dstack((cost_matrix_appearance, cost_matrix_iou))
return cost_matrix, gate_mask
def predict(self):
"""Propagate track state distributions one time step forward.
This function should be called once every time step, before `update`.
"""
for track in self.tracks:
track.predict(self.kf)
# @profile
def update(self, input_img, detections):
"""Perform measurement update and track management.
Parameters
----------
detections : List[deep_sort.detection.Detection]
A list of detections at the current time step.
"""
matches, unmatched_tracks, unmatched_detections = \
self._match(detections)
# update filter for each assigned track
# Only do this for non-JPDA because in JPDA the kf states are updated
# during the matching process
# update track state for unmatched tracks
for track_idx in unmatched_tracks:
self.tracks[track_idx].mark_missed()
self.prune_tracks()
# create new tracks
for detection_idx in unmatched_detections:
self._initiate_track(detections[detection_idx])
# Update distance metric.
active_targets = [t.track_id for t in self.tracks]
features, features_2d, targets, targets_2d = [], [], [], []
for track in self.tracks:
features += track.features
features_2d += track.features_2d
targets += [track.track_id for _ in track.features]
targets_2d += [track.track_id for _ in track.features_2d]
track.features = []
track.features_2d = []
self.metric.partial_fit(
features, features_2d, targets, targets_2d, active_targets)
# @profile
def _match(self, detections):
# Associate confirmed tracks using appearance features.
if self.JPDA:
# Only run JPDA on confirmed tracks
det_3d_idx = []
det_2d_idx = []
for idx, det in enumerate(detections):
if det.box_3d is not None:
det_3d_idx.append(idx)
else:
det_2d_idx.append(idx)
marginalizations = \
linear_assignment.JPDA(self.gated_metric, self.dummy_node_cost_app,
self.dummy_node_cost_iou, self.tracks, \
detections, compare_2d=False,
detection_indices=det_3d_idx)
#print(marginalizations)
dets_matching_3d = [d for i, d in enumerate(detections) if i in det_3d_idx]
jpda_matcher = JPDA_matching.Matcher(
detections, marginalizations, range(len(self.tracks)),
self.matching_strategy, assignment_threshold=self.assn_thresh)
matches_a, unmatched_tracks_a, unmatched_detections = jpda_matcher.match()
# Map matched tracks to detections
track_detection_map = {t:d for (t,d) in matches_a}
# Map unmatched tracks to -1 for no detection
for t in unmatched_tracks_a:
track_detection_map[t] = -1
if det_2d_idx:
marginalizations_2d = \
linear_assignment.JPDA(self.gated_metric, self.dummy_node_cost_app, self.dummy_node_cost_iou_2d, self.tracks, \
detections, compare_2d=True, detection_indices=det_2d_idx, track_indices=unmatched_tracks_a)
dets_matching_2d = [d for i, d in enumerate(detections) if i in det_2d_idx]
jpda_matcher = JPDA_matching.Matcher(
dets_matching_2d, marginalizations_2d, range(len(unmatched_tracks_a)),
self.matching_strategy, assignment_threshold=self.assn_thresh)
matches_a, unmatched_tracks_2d, unmatched_detections = jpda_matcher.match()
track_detection_map_2d = {unmatched_tracks_a[t]:d for (t,d) in matches_a}
# Map unmatched tracks to -1 for no detection
for t in unmatched_tracks_2d:
track_detection_map_2d[unmatched_tracks_a[t]] = -1
# udpate Kalman state
if marginalizations.shape[0] > 0:
for i in range(len(self.tracks)):
if det_2d_idx and i in unmatched_tracks_a:
self.tracks[i].update(self.kf, dets_matching_2d,
marginalization=marginalizations_2d[unmatched_tracks_a.index(i),:], detection_idx=track_detection_map_2d[i],
JPDA=self.JPDA, lstm = self.lstm, compare_2d=True)
else:
self.tracks[i].update(self.kf, dets_matching_3d,
marginalization=marginalizations[i,:], detection_idx=track_detection_map[i],
JPDA=self.JPDA, lstm = self.lstm)
else:
matches_a, unmatched_tracks_a, unmatched_detections = \
linear_assignment.matching_cascade(
self.gated_metric, self.metric.matching_threshold, self.max_age,
self.tracks, detections, confirmed_tracks, compare_2d = compare_2d, detections_3d=detections_3d)
return matches_a, unmatched_tracks_a, unmatched_detections
def _initiate_track(self, detection):
if detection.box_3d is None:
return
mean, covariance = self.kf.initiate(detection.box_3d)
self.tracks.append(Track_3d(
mean, covariance, self._next_id, self.n_init, self.max_age,
feature=detection.feature, appearance_feature = detection.appearance_feature,
cuda = self.cuda, lstm = self.lstm))
self._next_id += 1
def prune_tracks(self):
# for track in self.tracks:
# # Check if track is leaving
# predicted_mean = self.kf.predict_mean(track.mean)
# predicted_cov = track.covariance
# predicted_pos = predicted_mean[:2]
# predicted_vel = predicted_mean[4:6]
# predicted_pos[0] -= w/2
# predicted_pos[1] -= h/2
# cos_theta = np.dot(predicted_pos, predicted_vel)/(np.linalg.norm(predicted_pos)*
# np.linalg.norm(predicted_vel) + 1e-6)
# predicted_pos[0] += w/2
# predicted_pos[1] += h/2
# # Thresholds for deciding whether track is outside image
# BORDER_VALUE = 0
# if (cos_theta > 0 and
# (predicted_pos[0] - track.mean[2]/2<= BORDER_VALUE or
# predicted_pos[0] + track.mean[2]/2 >= w - BORDER_VALUE)):
# if track.is_exiting() and not track.matched:
# track.delete_track()
# else:
# track.mark_exiting()
# Check if track is too uncertain
# cov_axis,_ = np.linalg.eigh(predicted_cov)
# if np.abs(np.sqrt(cov_axis[-1]))*6 > self.uncertainty_limit*np.linalg.norm(predicted_mean[2:4]):
# track.delete_track()
self.tracks = [t for t in self.tracks if not t.is_deleted()]
================================================
FILE: src/tracker_3d_node 2.py
================================================
#!/home/sibot/anaconda2/bin/python
""" yolo_bbox_to_sort.py
Subscribe to the Yolo 2 bboxes, and publish the detections with a 2d appearance feature used for reidentification
"""
import time
import rospy
import ros_numpy
import sys
import numpy as np
import torch
import os
import message_filters
from featurepointnet_model_util import generate_detections_3d, \
convert_depth_features
from featurepointnet_model import create_depth_model
from calibration import OmniCalibration
from jpda_rospack.msg import detection3d_with_feature_array, \
detection3d_with_feature, detection2d_with_feature_array
from tracking_utils import convert_detections, combine_features
from combination_model import CombiNet
from tracker_3d import Tracker_3d
from visualization_msgs.msg import MarkerArray, Marker
from std_msgs.msg import Int8
from geometry_msgs.msg import Pose, PoseWithCovariance
from spencer_tracking_msgs.msg import TrackedPerson, TrackedPersons
import pdb
class Tracker_3D_node:
def __init__(self):
self.node_name = "tracker_3d"
rospy.init_node(self.node_name)
rospy.on_shutdown(self.cleanup)
self.depth_weight = float(rospy.get_param('~combination_depth_weight', 1))
calibration_folder = rospy.get_param('~calib_3d', 'src/jpda_rospack/calib/')
calib = OmniCalibration(calibration_folder)
self.tracker = Tracker_3d(max_age=25, n_init=3,
JPDA=True, m_best_sol=10, assn_thresh=0.6,
matching_strategy='hungarian',
cuda=True, calib=calib, omni=True,
kf_vel_params=(0.08, 0.03, 0.01, 0.03,
1.2, 3.9, 0.8, 1.6),
dummy_node_cost_iou=0.9, dummy_node_cost_app=6,
nn_budget=3, dummy_node_cost_iou_2d=0.5)
combination_model_path = rospy.get_param('~combination_model_path', False)
if combination_model_path:
self.combination_model = CombiNet()
checkpoint = torch.load(combination_model_path)
self.combination_model.load_state_dict(checkpoint['state_dict'])
try:
combination_model.cuda()
except:
pass
self.combination_model.eval()
else:
self.combination_model = None
self.detection_2d_sub = \
message_filters.Subscriber("detection2d_with_feature",
detection2d_with_feature_array,
queue_size=5)
self.detection_3d_sub = \
message_filters.Subscriber("detection3d_with_feature",
detection3d_with_feature_array,
queue_size=5)
# self.detection_2d_sub.registerCallback(self.find_time_diff_2d)
# self.detection_3d_sub.registerCallback(self.find_time_diff_3d)
# self.last_seen_2d = 0
# self.last_seen_3d = 0
self.time_sync = \
message_filters.TimeSynchronizer([self.detection_2d_sub,
self.detection_3d_sub],
5)
self.time_sync.registerCallback(self.do_3d_tracking)
self.tracker_output_pub = rospy.Publisher("/jpda_output", TrackedPersons,
queue_size=30)
self.debug_pub = rospy.Publisher("/test", Int8, queue_size=1)
rospy.loginfo("Ready.")
def do_3d_tracking(self, detections_2d, detections_3d):
start = time.time()
#rospy.loginfo("Tracking frame")
# convert_detections
boxes_2d = []
boxes_3d = []
valid_3d = []
features_2d = []
features_3d = []
dets_2d = sorted(detections_2d.detection2d_with_features, key=lambda x:x.frame_det_id)
dets_3d = sorted(detections_3d.detection3d_with_features, key=lambda x:x.frame_det_id)
i, j = 0, 0
while i < len(dets_2d) and j < len(dets_3d):
det_2d = dets_2d[i]
det_3d = dets_3d[j]
if det_2d.frame_det_id == det_3d.frame_det_id:
i += 1
j += 1
valid_3d.append(det_3d.valid)
boxes_2d.append(np.array([det_2d.x1, det_2d.y1, det_2d.x2, det_2d.y2, 1, -1, -1]))
features_2d.append(torch.Tensor(det_2d.feature).to('cuda:0'))
if det_3d.valid:
boxes_3d.append(np.array([det_3d.x, det_3d.y, det_3d.z, det_3d.l, det_3d.h, det_3d.w, det_3d.theta]))
features_3d.append(torch.Tensor(det_3d.feature).to('cuda:0'))
else:
boxes_3d.append(None)
features_3d.append(None)
elif det_2d.frame_det_id < det_3d.frame_det_id:
i += 1
else:
j += 1
if not boxes_3d:
boxes_3d = None
features_3d, features_2d = combine_features(features_2d, features_3d,
valid_3d, self.combination_model,
depth_weight=self.depth_weight)
detections = convert_detections(boxes_2d, features_3d, features_2d, boxes_3d)
self.tracker.predict()
self.tracker.update(None, detections)
tracked_array = TrackedPersons()
tracked_array.header.stamp = detections_3d.header.stamp
tracked_array.header.frame_id = 'occam'
for track in self.tracker.tracks:
if not track.is_confirmed():
continue
#print('Confirmed track!')
pose_msg = Pose()
tracked_person_msg = TrackedPerson()
tracked_person_msg.header.stamp = detections_3d.header.stamp
tracked_person_msg.header.frame_id = 'occam'
tracked_person_msg.track_id = track.track_id
if track.time_since_update < 2:
tracked_person_msg.is_matched = True
else:
tracked_person_msg.is_matched = False
bbox = track.to_tlwh3d()
covariance = track.get_cov().reshape(-1).tolist()
pose_msg.position.x = bbox[0]
pose_msg.position.y = bbox[1] - bbox[4]/2
pose_msg.position.z = bbox[2]
pose_msg = PoseWithCovariance(pose=pose_msg, covariance=covariance)
tracked_person_msg.pose = pose_msg
tracked_array.tracks.append(tracked_person_msg)
self.tracker_output_pub.publish(tracked_array)
#rospy.loginfo("tracker time: {}".format(time.time() - start))
def find_time_diff_2d(self, a):
print(a.header.stamp - self.last_seen_3d)
self.last_seen_2d = a.header.stamp
def find_time_diff_3d(self, a):
print(a.header.stamp - self.last_seen_2d)
self.last_seen_3d = a.header.stamp
def cleanup(self):
print("Shutting down 3D tracking node.")
del self.combination_model
del self.tracker
del self.detection_2d_sub
del self.detection_3d_sub
del self.time_sync
del self.tracker_output_pub
def main(args):
try:
Tracker_3D_node()
rospy.spin()
except KeyboardInterrupt:
print("Shutting down 3D tracking node.")
if __name__ == '__main__':
main(sys.argv)
================================================
FILE: src/tracker_3d_node.py
================================================
#!/home/sibot/anaconda2/bin/python
""" yolo_bbox_to_sort.py
Subscribe to the Yolo 2 bboxes, and publish the detections with a 2d appearance feature used for reidentification
"""
import time
import rospy
import ros_numpy
import sys
import numpy as np
import torch
import os
import message_filters
from featurepointnet_model_util import generate_detections_3d, \
convert_depth_features
from featurepointnet_model import create_depth_model
from calibration import OmniCalibration
from jpda_rospack.msg import detection3d_with_feature_array, \
detection3d_with_feature, detection2d_with_feature_array
from tracking_utils import convert_detections, combine_features
from combination_model import CombiNet
from tracker_3d import Tracker_3d
from visualization_msgs.msg import MarkerArray, Marker
from std_msgs.msg import Int8
from geometry_msgs.msg import Pose, PoseWithCovariance
from spencer_tracking_msgs.msg import TrackedPerson, TrackedPersons
import pdb
class Tracker_3D_node:
def __init__(self):
self.node_name = "tracker_3d"
rospy.init_node(self.node_name)
rospy.on_shutdown(self.cleanup)
self.depth_weight = float(rospy.get_param('~combination_depth_weight', 1))
calibration_folder = rospy.get_param('~calib_3d', 'src/jpda_rospack/calib/')
calib = OmniCalibration(calibration_folder)
self.tracker = Tracker_3d(max_age=25, n_init=3,
JPDA=True, m_best_sol=10, assn_thresh=0.6,
matching_strategy='hungarian',
cuda=True, calib=calib, omni=True,
kf_vel_params=(0.08, 0.03, 0.01, 0.03,
1.2, 3.9, 0.8, 1.6),
dummy_node_cost_iou=0.9, dummy_node_cost_app=6,
nn_budget=3, dummy_node_cost_iou_2d=0.5)
combination_model_path = rospy.get_param('~combination_model_path', False)
if combination_model_path:
self.combination_model = CombiNet()
checkpoint = torch.load(combination_model_path)
self.combination_model.load_state_dict(checkpoint['state_dict'])
try:
combination_model.cuda()
except:
pass
self.combination_model.eval()
else:
self.combination_model = None
self.detection_2d_sub = \
message_filters.Subscriber("detection2d_with_feature",
detection2d_with_feature_array,
queue_size=5)
self.detection_3d_sub = \
message_filters.Subscriber("detection3d_with_feature",
detection3d_with_feature_array,
queue_size=5)
# self.detection_2d_sub.registerCallback(self.find_time_diff_2d)
# self.detection_3d_sub.registerCallback(self.find_time_diff_3d)
# self.last_seen_2d = 0
# self.last_seen_3d = 0
self.time_sync = \
message_filters.TimeSynchronizer([self.detection_2d_sub,
self.detection_3d_sub],
5)
self.time_sync.registerCallback(self.do_3d_tracking)
self.tracker_output_pub = rospy.Publisher("/jpda_output", TrackedPersons,
queue_size=30)
self.debug_pub = rospy.Publisher("/test", Int8, queue_size=1)
rospy.loginfo("Ready.")
def do_3d_tracking(self, detections_2d, detections_3d):
start = time.time()
#rospy.loginfo("Tracking frame")
# convert_detections
boxes_2d = []
boxes_3d = []
valid_3d = []
features_2d = []
features_3d = []
dets_2d = sorted(detections_2d.detection2d_with_features, key=lambda x:x.frame_det_id)
dets_3d = sorted(detections_3d.detection3d_with_features, key=lambda x:x.frame_det_id)
i, j = 0, 0
while i < len(dets_2d) and j < len(dets_3d):
det_2d = dets_2d[i]
det_3d = dets_3d[j]
if det_2d.frame_det_id == det_3d.frame_det_id:
i += 1
j += 1
valid_3d.append(det_3d.valid)
boxes_2d.append(np.array([det_2d.x1, det_2d.y1, det_2d.x2, det_2d.y2, 1, -1, -1]))
features_2d.append(torch.Tensor(det_2d.feature).to('cuda:0'))
if det_3d.valid:
boxes_3d.append(np.array([det_3d.x, det_3d.y, det_3d.z, det_3d.l, det_3d.h, det_3d.w, det_3d.theta]))
features_3d.append(torch.Tensor(det_3d.feature).to('cuda:0'))
else:
boxes_3d.append(None)
features_3d.append(None)
elif det_2d.frame_det_id < det_3d.frame_det_id:
i += 1
else:
j += 1
if not boxes_3d:
boxes_3d = None
features_3d, features_2d = combine_features(features_2d, features_3d,
valid_3d, self.combination_model,
depth_weight=self.depth_weight)
detections = convert_detections(boxes_2d, features_3d, features_2d, boxes_3d)
self.tracker.predict()
self.tracker.update(None, detections)
tracked_array = TrackedPersons()
tracked_array.header.stamp = detections_3d.header.stamp
tracked_array.header.frame_id = 'occam'
for track in self.tracker.tracks:
if not track.is_confirmed():
continue
#print('Confirmed track!')
pose_msg = Pose()
tracked_person_msg = TrackedPerson()
tracked_person_msg.header.stamp = detections_3d.header.stamp
tracked_person_msg.header.frame_id = 'occam'
tracked_person_msg.track_id = track.track_id
if track.time_since_update < 2:
tracked_person_msg.is_matched = True
else:
tracked_person_msg.is_matched = False
bbox = track.to_tlwh3d()
covariance = track.get_cov().reshape(-1).tolist()
pose_msg.position.x = bbox[0]
pose_msg.position.y = bbox[1] - bbox[4]/2
pose_msg.position.z = bbox[2]
pose_msg = PoseWithCovariance(pose=pose_msg, covariance=covariance)
tracked_person_msg.pose = pose_msg
tracked_array.tracks.append(tracked_person_msg)
self.tracker_output_pub.publish(tracked_array)
#rospy.loginfo("tracker time: {}".format(time.time() - start))
def find_time_diff_2d(self, a):
print(a.header.stamp - self.last_seen_3d)
self.last_seen_2d = a.header.stamp
def find_time_diff_3d(self, a):
print(a.header.stamp - self.last_seen_2d)
self.last_seen_3d = a.header.stamp
def cleanup(self):
print("Shutting down 3D tracking node.")
del self.combination_model
del self.tracker
del self.detection_2d_sub
del self.detection_3d_sub
del self.time_sync
del self.tracker_output_pub
def main(args):
try:
Tracker_3D_node()
rospy.spin()
except KeyboardInterrupt:
print("Shutting down 3D tracking node.")
if __name__ == '__main__':
main(sys.argv)
================================================
FILE: src/tracking_utils 2.py
================================================
import torch, sys, os, pdb
import numpy as np
from PIL import Image
from scipy.spatial import Delaunay
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir)))
from aligned_reid_utils import load_state_dict
from featurepointnet_model_util import rotate_pc_along_y
from deep_sort_utils import non_max_suppression as deepsort_nms
import math
from detection import Detection
def create_detector(config_path, weight_path, cuda):
detector = Darknet(config_path)
detector.load_weights(weight_path)
if cuda:
detector.cuda()
detector.eval()
return detector
def get_depth_patches(point_cloud, box_3d, ids_3d, rot_angles, num_point = 1024):
#print(ids_3d)
depth_patches = []
for i, box in enumerate(box_3d):
if ids_3d[i] == -1:
depth_patches.append(None)
continue
box_center = np.asarray([ [box[0], box[1], box[2]] ])
rotate_pc_along_y(box_center, np.pi/2 + np.squeeze(box[6]))
box_center = box_center[0]
rotate_pc_along_y(point_cloud, np.pi/2 + np.squeeze(box[6]))
x = point_cloud[:, 0]
y = point_cloud[:, 1]
z = point_cloud[:, 2]
idx_1 = np.logical_and(x >= float(box_center[0] - box[3]/2.0), x <= float(box_center[0] + box[3]/2.0))
idx_2 = np.logical_and(y <= (box_center[1]+0.1), y >= float(box_center[1] - box[4]))
idx_3 = np.logical_and(z >= float(box_center[2] - box[5]/2.0), z <= float(box_center[2] + box[5]/2.0))
idx = np.logical_and(idx_1, idx_2)
idx = np.logical_and(idx, idx_3)
depth_patch = point_cloud[idx, :]
rotate_pc_along_y(point_cloud, -(np.squeeze(box[6])+np.pi/2)) #unrotate to prep for next iteration
rotate_pc_along_y(depth_patch, -(np.squeeze(box[6])+np.pi/2))
if depth_patch.size == 0:
ids_3d[i] = -1
depth_patches.append(None)
else:
if depth_patch.shape[0] > num_point:
pc_in_box_fov = np.expand_dims(depth_patch[np.random.choice(range(depth_patch.shape[0]), size = (num_point), replace=False)], 0)
else:
pc_in_box_fov = np.expand_dims(
np.vstack([depth_patch,
depth_patch[np.random.choice(range(depth_patch.shape[0]), size = (num_point - depth_patch.shape[0]), replace=True)]])
, 0)
depth_patches.append( get_center_view_point_set(pc_in_box_fov, rot_angles[i])[0])
return depth_patches, ids_3d
def non_max_suppression_3D_prime(detections, boxes_3d, ids_3d, ids_2d, nms_thresh = 1, confidence = None):
x = [boxes_3d[i][0] for i in range(len(boxes_3d))]
z = [boxes_3d[i][2] for i in range(len(boxes_3d))]
l = [boxes_3d[i][5] for i in range(len(boxes_3d))] #[3]
w = [boxes_3d[i][3] for i in range(len(boxes_3d))] #[5]
indices = deepsort_nms(boxes_3d, nms_thresh, np.squeeze(confidence))
for i in range(len(ids_3d)):
if i not in indices:
ids_3d[i] = -1
ids_2d[i] = -1
boxes_3d[i] = None
detections[i] = None
return detections, boxes_3d, ids_2d, ids_3d
def non_max_suppression_3D(depth_patches, ids_3d, ids_2d, nms_thresh = 1, confidence = None):
#depth_patches list of patches
if len(depth_patches) == 0:
return []
pick = []
if confidence is not None:
idxs = np.argsort(confidence)
else:
idxs = list(range(len(depth_patches)))
while len(idxs) > 0:
last = len(idxs) - 1
i = idxs[last]
overlap = np.asarray([iou_3d(depth_patches[i], depth_patches[idxs[x]]) for x in range(last)])
if np.any(overlap == -np.inf):
idxs = np.delete(idxs, [last])
continue
pick.append(i)
idxs = np.delete(
idxs, np.concatenate(
([last], np.where(overlap > nms_thresh)[0])))
for i in range(len(depth_patches)):
if i not in pick:
if ids_3d[i]!=-1:
ids_2d[i] = -1
ids_3d[i] = -1
return depth_patches, ids_3d, ids_2d
def iou_3d(patch_1, patch_2):
#Expecting patches of shape (N, 4) or (N,3) (numpy arrays)
if patch_2 is None:
return np.inf
elif patch_1 is None:
return -np.inf
# Unique points
patch_unique_1 = np.unique(patch_1, axis = 0)
patch_unique_2 = np.unique(patch_2, axis = 0)
intersection_points = 0
for point_1_idx in range(patch_unique_1.shape[0]):
point_distance = np.sqrt(np.sum((patch_unique_1[point_1_idx]-patch_unique_2)**2, axis = 1))
intersection_points += np.any(point_distance<0.3)
union_points = patch_unique_1.shape[0] + patch_unique_2.shape[0] - intersection_points
iou = intersection_points/union_points
return iou
def convert_detections(detections, features, appearance_features, detections_3d):
detection_list = []
if detections_3d is None:
detections_3d = [None] * len(detections)
for detection, feature, appearance_feature, detection_3d in zip(detections, features, appearance_features, detections_3d):
x1, y1, x2, y2, conf, _, _ = detection
box_2d = [x1, y1, x2-x1, y2-y1]
if detection_3d is not None:
x, y, z, l, w, h, theta = detection_3d
box_3d = [x, y, z, l, w, h, theta]
else:
box_3d = None
if feature is None:
detection_list.append(Detection(box_2d, None, conf, appearance_feature, feature))
else:
detection_list.append(Detection(box_2d, box_3d, conf, appearance_feature, feature))
return detection_list
def combine_features(features, depth_features, ids_3d, combination_model, depth_weight=1):
combined_features = []
appearance_features = []
for i, (appearance_feature, depth_feature) in enumerate(zip(features, depth_features)):
if not ids_3d[i]:
depth_feature = torch.zeros(512, device=torch.device("cuda:0"))
# appearance_feature = torch.zeros(512, device=torch.device("cuda:0"))
combined_features.append(torch.cat([appearance_feature, depth_feature* depth_weight]))
appearance_features.append(appearance_feature)
if combination_model is not None and len(combined_features) > 0:
combination_model.eval()
combined_feature = torch.stack(combined_features)
combined_features = combination_model(combined_feature).detach()
combined_features = list(torch.unbind(combined_features))
return combined_features, appearance_features
def filter(detections):
for i, det in enumerate(detections): #Note image is 1242 x 375
left = det[0]
top = det[1]
right = det[2]
bottom = det[3]
if (left < 10 or right > 1232) and (top < 10 or bottom > 365):
detections[i] = None
return detections
================================================
FILE: src/tracking_utils.py
================================================
import torch, sys, os, pdb
import numpy as np
from PIL import Image
from scipy.spatial import Delaunay
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir)))
from aligned_reid_utils import load_state_dict
from featurepointnet_model_util import rotate_pc_along_y
from deep_sort_utils import non_max_suppression as deepsort_nms
import math
from detection import Detection
def create_detector(config_path, weight_path, cuda):
detector = Darknet(config_path)
detector.load_weights(weight_path)
if cuda:
detector.cuda()
detector.eval()
return detector
def get_depth_patches(point_cloud, box_3d, ids_3d, rot_angles, num_point = 1024):
#print(ids_3d)
depth_patches = []
for i, box in enumerate(box_3d):
if ids_3d[i] == -1:
depth_patches.append(None)
continue
box_center = np.asarray([ [box[0], box[1], box[2]] ])
rotate_pc_along_y(box_center, np.pi/2 + np.squeeze(box[6]))
box_center = box_center[0]
rotate_pc_along_y(point_cloud, np.pi/2 + np.squeeze(box[6]))
x = point_cloud[:, 0]
y = point_cloud[:, 1]
z = point_cloud[:, 2]
idx_1 = np.logical_and(x >= float(box_center[0] - box[3]/2.0), x <= float(box_center[0] + box[3]/2.0))
idx_2 = np.logical_and(y <= (box_center[1]+0.1), y >= float(box_center[1] - box[4]))
idx_3 = np.logical_and(z >= float(box_center[2] - box[5]/2.0), z <= float(box_center[2] + box[5]/2.0))
idx = np.logical_and(idx_1, idx_2)
idx = np.logical_and(idx, idx_3)
depth_patch = point_cloud[idx, :]
rotate_pc_along_y(point_cloud, -(np.squeeze(box[6])+np.pi/2)) #unrotate to prep for next iteration
rotate_pc_along_y(depth_patch, -(np.squeeze(box[6])+np.pi/2))
if depth_patch.size == 0:
ids_3d[i] = -1
depth_patches.append(None)
else:
if depth_patch.shape[0] > num_point:
pc_in_box_fov = np.expand_dims(depth_patch[np.random.choice(range(depth_patch.shape[0]), size = (num_point), replace=False)], 0)
else:
pc_in_box_fov = np.expand_dims(
np.vstack([depth_patch,
depth_patch[np.random.choice(range(depth_patch.shape[0]), size = (num_point - depth_patch.shape[0]), replace=True)]])
, 0)
depth_patches.append( get_center_view_point_set(pc_in_box_fov, rot_angles[i])[0])
return depth_patches, ids_3d
def non_max_suppression_3D_prime(detections, boxes_3d, ids_3d, ids_2d, nms_thresh = 1, confidence = None):
x = [boxes_3d[i][0] for i in range(len(boxes_3d))]
z = [boxes_3d[i][2] for i in range(len(boxes_3d))]
l = [boxes_3d[i][5] for i in range(len(boxes_3d))] #[3]
w = [boxes_3d[i][3] for i in range(len(boxes_3d))] #[5]
indices = deepsort_nms(boxes_3d, nms_thresh, np.squeeze(confidence))
for i in range(len(ids_3d)):
if i not in indices:
ids_3d[i] = -1
ids_2d[i] = -1
boxes_3d[i] = None
detections[i] = None
return detections, boxes_3d, ids_2d, ids_3d
def non_max_suppression_3D(depth_patches, ids_3d, ids_2d, nms_thresh = 1, confidence = None):
#depth_patches list of patches
if len(depth_patches) == 0:
return []
pick = []
if confidence is not None:
idxs = np.argsort(confidence)
else:
idxs = list(range(len(depth_patches)))
while len(idxs) > 0:
last = len(idxs) - 1
i = idxs[last]
overlap = np.asarray([iou_3d(depth_patches[i], depth_patches[idxs[x]]) for x in range(last)])
if np.any(overlap == -np.inf):
idxs = np.delete(idxs, [last])
continue
pick.append(i)
idxs = np.delete(
idxs, np.concatenate(
([last], np.where(overlap > nms_thresh)[0])))
for i in range(len(depth_patches)):
if i not in pick:
if ids_3d[i]!=-1:
ids_2d[i] = -1
ids_3d[i] = -1
return depth_patches, ids_3d, ids_2d
def iou_3d(patch_1, patch_2):
#Expecting patches of shape (N, 4) or (N,3) (numpy arrays)
if patch_2 is None:
return np.inf
elif patch_1 is None:
return -np.inf
# Unique points
patch_unique_1 = np.unique(patch_1, axis = 0)
patch_unique_2 = np.unique(patch_2, axis = 0)
intersection_points = 0
for point_1_idx in range(patch_unique_1.shape[0]):
point_distance = np.sqrt(np.sum((patch_unique_1[point_1_idx]-patch_unique_2)**2, axis = 1))
intersection_points += np.any(point_distance<0.3)
union_points = patch_unique_1.shape[0] + patch_unique_2.shape[0] - intersection_points
iou = intersection_points/union_points
return iou
def convert_detections(detections, features, appearance_features, detections_3d):
detection_list = []
if detections_3d is None:
detections_3d = [None] * len(detections)
for detection, feature, appearance_feature, detection_3d in zip(detections, features, appearance_features, detections_3d):
x1, y1, x2, y2, conf, _, _ = detection
box_2d = [x1, y1, x2-x1, y2-y1]
if detection_3d is not None:
x, y, z, l, w, h, theta = detection_3d
box_3d = [x, y, z, l, w, h, theta]
else:
box_3d = None
if feature is None:
detection_list.append(Detection(box_2d, None, conf, appearance_feature, feature))
else:
detection_list.append(Detection(box_2d, box_3d, conf, appearance_feature, feature))
return detection_list
def combine_features(features, depth_features, ids_3d, combination_model, depth_weight=1):
combined_features = []
appearance_features = []
for i, (appearance_feature, depth_feature) in enumerate(zip(features, depth_features)):
if not ids_3d[i]:
depth_feature = torch.zeros(512, device=torch.device("cuda:0"))
# appearance_feature = torch.zeros(512, device=torch.device("cuda:0"))
combined_features.append(torch.cat([appearance_feature, depth_feature* depth_weight]))
appearance_features.append(appearance_feature)
if combination_model is not None and len(combined_features) > 0:
combination_model.eval()
combined_feature = torch.stack(combined_features)
combined_features = combination_model(combined_feature).detach()
combined_features = list(torch.unbind(combined_features))
return combined_features, appearance_features
def filter(detections):
for i, det in enumerate(detections): #Note image is 1242 x 375
left = det[0]
top = det[1]
right = det[2]
bottom = det[3]
if (left < 10 or right > 1232) and (top < 10 or bottom > 365):
detections[i] = None
return detections