Repository: hpc203/bytetrack-opencv-onnxruntime Branch: main Commit: ab6f5173b9d8 Files: 42 Total size: 182.5 KB Directory structure: gitextract_zzy2l3w3/ ├── README.md ├── onnxruntime/ │ ├── README.md │ ├── cpp/ │ │ ├── CMakeLists.txt │ │ ├── include/ │ │ │ ├── BYTETracker.h │ │ │ ├── STrack.h │ │ │ ├── dataType.h │ │ │ ├── kalmanFilter.h │ │ │ └── lapjv.h │ │ └── src/ │ │ ├── BYTETracker.cpp │ │ ├── STrack.cpp │ │ ├── kalmanFilter.cpp │ │ ├── lapjv.cpp │ │ ├── main.cpp │ │ └── utils.cpp │ └── python/ │ ├── byte_tracker/ │ │ ├── byte_tracker_onnx.py │ │ ├── tracker/ │ │ │ ├── basetrack.py │ │ │ ├── byte_tracker.py │ │ │ ├── kalman_filter.py │ │ │ └── matching.py │ │ └── utils/ │ │ └── yolox_utils.py │ └── main.py ├── opencv/ │ ├── README.md │ ├── cpp/ │ │ ├── CMakeLists.txt │ │ ├── include/ │ │ │ ├── BYTETracker.h │ │ │ ├── STrack.h │ │ │ ├── dataType.h │ │ │ ├── kalmanFilter.h │ │ │ └── lapjv.h │ │ └── src/ │ │ ├── BYTETracker.cpp │ │ ├── STrack.cpp │ │ ├── kalmanFilter.cpp │ │ ├── lapjv.cpp │ │ ├── main.cpp │ │ └── utils.cpp │ └── python/ │ ├── byte_tracker/ │ │ ├── byte_tracker_onnx.py │ │ ├── tracker/ │ │ │ ├── basetrack.py │ │ │ ├── byte_tracker.py │ │ │ ├── kalman_filter.py │ │ │ └── matching.py │ │ └── utils/ │ │ └── yolox_utils.py │ └── main.py └── requirements.txt ================================================ FILE CONTENTS ================================================ ================================================ FILE: README.md ================================================ # bytetrack-opencv-onnxruntime 使用OpenCV部署YOLOX+ByteTrack目标跟踪,包含C++和Python两个版本的程序。 使用ONNXRuntime部署YOLOX+ByteTrack目标跟踪,包含C++和Python两个版本的程序。 由于.onnx文件超过25M,无法直接上传。.onnx文件和测试文件sample.mp4 在百度云盘,链接: https://pan.baidu.com/s/1dYz_Ru5EgFg_DZhdDHH-Sg 密码: iwtm 安装eigen库和编译C++程序的指令,详情请看文件夹里的readme 运行python程序之前,需要先安装依赖库。如果在安装cython_bbox失败, 请参见文章 https://www.jianshu.com/p/51f7f7f8b262 ================================================ FILE: onnxruntime/README.md ================================================ # ByteTrack-onnxruntime #### 安装 首先确保你的机器安装了opencv和onnxruntime,接下来安装 `eigen` ```shell unzip eigen-3.3.9.zip cd eigen-3.3.9 mkdir build cd build cmake .. sudo make install ``` #### 编译C++程序 ``` mkdir build cd build && rm -rf * cmake .. && make ``` #### 运行 python版本: ``` python main.py ``` C++版本: ``` ./bytetrack-onnxrun-cpp /home/ByteTrack/sample.mp4 ``` ================================================ FILE: onnxruntime/cpp/CMakeLists.txt ================================================ cmake_minimum_required(VERSION 3.5) project(bytetrack-onnxrun-cpp) set(CMAKE_CXX_STANDARD 11) find_package(OpenCV REQUIRED) set(onnxruntime_INCLUDE_DIRS /opt/onnxruntime-linux-x64-1.6.0/include) include_directories(${onnxruntime_INCLUDE_DIRS}) include_directories(include) include_directories(/usr/local/include/eigen3) include_directories(${OpenCV_INCLUDE_DIRS}) file(GLOB My_Source_Files src/*.cpp) add_executable(bytetrack-onnxrun-cpp ${My_Source_Files}) target_link_libraries(bytetrack-onnxrun-cpp ${OpenCV_LIBS} /opt/onnxruntime-linux-x64-1.6.0/lib/libonnxruntime.so) ================================================ FILE: onnxruntime/cpp/include/BYTETracker.h ================================================ #pragma once #include "STrack.h" struct Object { cv::Rect_ rect; int label; float prob; }; class BYTETracker { public: BYTETracker(int frame_rate = 30, int track_buffer = 30); ~BYTETracker(); vector update(const vector& objects); Scalar get_color(int idx); private: vector joint_stracks(vector &tlista, vector &tlistb); vector joint_stracks(vector &tlista, vector &tlistb); vector sub_stracks(vector &tlista, vector &tlistb); void remove_duplicate_stracks(vector &resa, vector &resb, vector &stracksa, vector &stracksb); void linear_assignment(vector > &cost_matrix, int cost_matrix_size, int cost_matrix_size_size, float thresh, vector > &matches, vector &unmatched_a, vector &unmatched_b); vector > iou_distance(vector &atracks, vector &btracks, int &dist_size, int &dist_size_size); vector > iou_distance(vector &atracks, vector &btracks); vector > ious(vector > &atlbrs, vector > &btlbrs); double lapjv(const vector > &cost, vector &rowsol, vector &colsol, bool extend_cost = false, float cost_limit = LONG_MAX, bool return_cost = true); private: float track_thresh; float high_thresh; float match_thresh; int frame_id; int max_time_lost; vector tracked_stracks; vector lost_stracks; vector removed_stracks; byte_kalman::KalmanFilter kalman_filter; }; ================================================ FILE: onnxruntime/cpp/include/STrack.h ================================================ #pragma once #include #include "kalmanFilter.h" using namespace cv; using namespace std; enum TrackState { New = 0, Tracked, Lost, Removed }; class STrack { public: STrack(vector tlwh_, float score); ~STrack(); vector static tlbr_to_tlwh(vector &tlbr); void static multi_predict(vector &stracks, byte_kalman::KalmanFilter &kalman_filter); void static_tlwh(); void static_tlbr(); vector tlwh_to_xyah(vector tlwh_tmp); vector to_xyah(); void mark_lost(); void mark_removed(); int next_id(); int end_frame(); void activate(byte_kalman::KalmanFilter &kalman_filter, int frame_id); void re_activate(STrack &new_track, int frame_id, bool new_id = false); void update(STrack &new_track, int frame_id); public: bool is_activated; int track_id; int state; vector _tlwh; vector tlwh; vector tlbr; int frame_id; int tracklet_len; int start_frame; KAL_MEAN mean; KAL_COVA covariance; float score; private: byte_kalman::KalmanFilter kalman_filter; }; ================================================ FILE: onnxruntime/cpp/include/dataType.h ================================================ #pragma once #include #include #include #include typedef Eigen::Matrix DETECTBOX; typedef Eigen::Matrix DETECTBOXSS; typedef Eigen::Matrix FEATURE; typedef Eigen::Matrix FEATURESS; //typedef std::vector FEATURESS; //Kalmanfilter //typedef Eigen::Matrix KAL_FILTER; typedef Eigen::Matrix KAL_MEAN; typedef Eigen::Matrix KAL_COVA; typedef Eigen::Matrix KAL_HMEAN; typedef Eigen::Matrix KAL_HCOVA; using KAL_DATA = std::pair; using KAL_HDATA = std::pair; //main using RESULT_DATA = std::pair; //tracker: using TRACKER_DATA = std::pair; using MATCH_DATA = std::pair; typedef struct t { std::vector matches; std::vector unmatched_tracks; std::vector unmatched_detections; }TRACHER_MATCHD; //linear_assignment: typedef Eigen::Matrix DYNAMICM; ================================================ FILE: onnxruntime/cpp/include/kalmanFilter.h ================================================ #pragma once #include "dataType.h" namespace byte_kalman { class KalmanFilter { public: static const double chi2inv95[10]; KalmanFilter(); KAL_DATA initiate(const DETECTBOX& measurement); void predict(KAL_MEAN& mean, KAL_COVA& covariance); KAL_HDATA project(const KAL_MEAN& mean, const KAL_COVA& covariance); KAL_DATA update(const KAL_MEAN& mean, const KAL_COVA& covariance, const DETECTBOX& measurement); Eigen::Matrix gating_distance( const KAL_MEAN& mean, const KAL_COVA& covariance, const std::vector& measurements, bool only_position = false); private: Eigen::Matrix _motion_mat; Eigen::Matrix _update_mat; float _std_weight_position; float _std_weight_velocity; }; } ================================================ FILE: onnxruntime/cpp/include/lapjv.h ================================================ #ifndef LAPJV_H #define LAPJV_H #define LARGE 1000000 #if !defined TRUE #define TRUE 1 #endif #if !defined FALSE #define FALSE 0 #endif #define NEW(x, t, n) if ((x = (t *)malloc(sizeof(t) * (n))) == 0) { return -1; } #define FREE(x) if (x != 0) { free(x); x = 0; } #define SWAP_INDICES(a, b) { int_t _temp_index = a; a = b; b = _temp_index; } #if 0 #include #define ASSERT(cond) assert(cond) #define PRINTF(fmt, ...) printf(fmt, ##__VA_ARGS__) #define PRINT_COST_ARRAY(a, n) \ while (1) { \ printf(#a" = ["); \ if ((n) > 0) { \ printf("%f", (a)[0]); \ for (uint_t j = 1; j < n; j++) { \ printf(", %f", (a)[j]); \ } \ } \ printf("]\n"); \ break; \ } #define PRINT_INDEX_ARRAY(a, n) \ while (1) { \ printf(#a" = ["); \ if ((n) > 0) { \ printf("%d", (a)[0]); \ for (uint_t j = 1; j < n; j++) { \ printf(", %d", (a)[j]); \ } \ } \ printf("]\n"); \ break; \ } #else #define ASSERT(cond) #define PRINTF(fmt, ...) #define PRINT_COST_ARRAY(a, n) #define PRINT_INDEX_ARRAY(a, n) #endif typedef signed int int_t; typedef unsigned int uint_t; typedef double cost_t; typedef char boolean; typedef enum fp_t { FP_1 = 1, FP_2 = 2, FP_DYNAMIC = 3 } fp_t; extern int_t lapjv_internal( const uint_t n, cost_t *cost[], int_t *x, int_t *y); #endif // LAPJV_H ================================================ FILE: onnxruntime/cpp/src/BYTETracker.cpp ================================================ #include "BYTETracker.h" #include BYTETracker::BYTETracker(int frame_rate, int track_buffer) { track_thresh = 0.5; high_thresh = 0.6; match_thresh = 0.8; frame_id = 0; max_time_lost = int(frame_rate / 30.0 * track_buffer); cout << "Init ByteTrack!" << endl; } BYTETracker::~BYTETracker() { } vector BYTETracker::update(const vector& objects) { ////////////////// Step 1: Get detections ////////////////// this->frame_id++; vector activated_stracks; vector refind_stracks; vector removed_stracks; vector lost_stracks; vector detections; vector detections_low; vector detections_cp; vector tracked_stracks_swap; vector resa, resb; vector output_stracks; vector unconfirmed; vector tracked_stracks; vector strack_pool; vector r_tracked_stracks; if (objects.size() > 0) { for (int i = 0; i < objects.size(); i++) { vector tlbr_; tlbr_.resize(4); tlbr_[0] = objects[i].rect.x; tlbr_[1] = objects[i].rect.y; tlbr_[2] = objects[i].rect.x + objects[i].rect.width; tlbr_[3] = objects[i].rect.y + objects[i].rect.height; float score = objects[i].prob; STrack strack(STrack::tlbr_to_tlwh(tlbr_), score); if (score >= track_thresh) { detections.push_back(strack); } else { detections_low.push_back(strack); } } } // Add newly detected tracklets to tracked_stracks for (int i = 0; i < this->tracked_stracks.size(); i++) { if (!this->tracked_stracks[i].is_activated) unconfirmed.push_back(&this->tracked_stracks[i]); else tracked_stracks.push_back(&this->tracked_stracks[i]); } ////////////////// Step 2: First association, with IoU ////////////////// strack_pool = joint_stracks(tracked_stracks, this->lost_stracks); STrack::multi_predict(strack_pool, this->kalman_filter); vector > dists; int dist_size = 0, dist_size_size = 0; dists = iou_distance(strack_pool, detections, dist_size, dist_size_size); vector > matches; vector u_track, u_detection; linear_assignment(dists, dist_size, dist_size_size, match_thresh, matches, u_track, u_detection); for (int i = 0; i < matches.size(); i++) { STrack *track = strack_pool[matches[i][0]]; STrack *det = &detections[matches[i][1]]; if (track->state == TrackState::Tracked) { track->update(*det, this->frame_id); activated_stracks.push_back(*track); } else { track->re_activate(*det, this->frame_id, false); refind_stracks.push_back(*track); } } ////////////////// Step 3: Second association, using low score dets ////////////////// for (int i = 0; i < u_detection.size(); i++) { detections_cp.push_back(detections[u_detection[i]]); } detections.clear(); detections.assign(detections_low.begin(), detections_low.end()); for (int i = 0; i < u_track.size(); i++) { if (strack_pool[u_track[i]]->state == TrackState::Tracked) { r_tracked_stracks.push_back(strack_pool[u_track[i]]); } } dists.clear(); dists = iou_distance(r_tracked_stracks, detections, dist_size, dist_size_size); matches.clear(); u_track.clear(); u_detection.clear(); linear_assignment(dists, dist_size, dist_size_size, 0.5, matches, u_track, u_detection); for (int i = 0; i < matches.size(); i++) { STrack *track = r_tracked_stracks[matches[i][0]]; STrack *det = &detections[matches[i][1]]; if (track->state == TrackState::Tracked) { track->update(*det, this->frame_id); activated_stracks.push_back(*track); } else { track->re_activate(*det, this->frame_id, false); refind_stracks.push_back(*track); } } for (int i = 0; i < u_track.size(); i++) { STrack *track = r_tracked_stracks[u_track[i]]; if (track->state != TrackState::Lost) { track->mark_lost(); lost_stracks.push_back(*track); } } // Deal with unconfirmed tracks, usually tracks with only one beginning frame detections.clear(); detections.assign(detections_cp.begin(), detections_cp.end()); dists.clear(); dists = iou_distance(unconfirmed, detections, dist_size, dist_size_size); matches.clear(); vector u_unconfirmed; u_detection.clear(); linear_assignment(dists, dist_size, dist_size_size, 0.7, matches, u_unconfirmed, u_detection); for (int i = 0; i < matches.size(); i++) { unconfirmed[matches[i][0]]->update(detections[matches[i][1]], this->frame_id); activated_stracks.push_back(*unconfirmed[matches[i][0]]); } for (int i = 0; i < u_unconfirmed.size(); i++) { STrack *track = unconfirmed[u_unconfirmed[i]]; track->mark_removed(); removed_stracks.push_back(*track); } ////////////////// Step 4: Init new stracks ////////////////// for (int i = 0; i < u_detection.size(); i++) { STrack *track = &detections[u_detection[i]]; if (track->score < this->high_thresh) continue; track->activate(this->kalman_filter, this->frame_id); activated_stracks.push_back(*track); } ////////////////// Step 5: Update state ////////////////// for (int i = 0; i < this->lost_stracks.size(); i++) { if (this->frame_id - this->lost_stracks[i].end_frame() > this->max_time_lost) { this->lost_stracks[i].mark_removed(); removed_stracks.push_back(this->lost_stracks[i]); } } for (int i = 0; i < this->tracked_stracks.size(); i++) { if (this->tracked_stracks[i].state == TrackState::Tracked) { tracked_stracks_swap.push_back(this->tracked_stracks[i]); } } this->tracked_stracks.clear(); this->tracked_stracks.assign(tracked_stracks_swap.begin(), tracked_stracks_swap.end()); this->tracked_stracks = joint_stracks(this->tracked_stracks, activated_stracks); this->tracked_stracks = joint_stracks(this->tracked_stracks, refind_stracks); //std::cout << activated_stracks.size() << std::endl; this->lost_stracks = sub_stracks(this->lost_stracks, this->tracked_stracks); for (int i = 0; i < lost_stracks.size(); i++) { this->lost_stracks.push_back(lost_stracks[i]); } this->lost_stracks = sub_stracks(this->lost_stracks, this->removed_stracks); for (int i = 0; i < removed_stracks.size(); i++) { this->removed_stracks.push_back(removed_stracks[i]); } remove_duplicate_stracks(resa, resb, this->tracked_stracks, this->lost_stracks); this->tracked_stracks.clear(); this->tracked_stracks.assign(resa.begin(), resa.end()); this->lost_stracks.clear(); this->lost_stracks.assign(resb.begin(), resb.end()); for (int i = 0; i < this->tracked_stracks.size(); i++) { if (this->tracked_stracks[i].is_activated) { output_stracks.push_back(this->tracked_stracks[i]); } } return output_stracks; } ================================================ FILE: onnxruntime/cpp/src/STrack.cpp ================================================ #include "STrack.h" STrack::STrack(vector tlwh_, float score) { _tlwh.resize(4); _tlwh.assign(tlwh_.begin(), tlwh_.end()); is_activated = false; track_id = 0; state = TrackState::New; tlwh.resize(4); tlbr.resize(4); static_tlwh(); static_tlbr(); frame_id = 0; tracklet_len = 0; this->score = score; start_frame = 0; } STrack::~STrack() { } void STrack::activate(byte_kalman::KalmanFilter &kalman_filter, int frame_id) { this->kalman_filter = kalman_filter; this->track_id = this->next_id(); vector _tlwh_tmp(4); _tlwh_tmp[0] = this->_tlwh[0]; _tlwh_tmp[1] = this->_tlwh[1]; _tlwh_tmp[2] = this->_tlwh[2]; _tlwh_tmp[3] = this->_tlwh[3]; vector xyah = tlwh_to_xyah(_tlwh_tmp); DETECTBOX xyah_box; xyah_box[0] = xyah[0]; xyah_box[1] = xyah[1]; xyah_box[2] = xyah[2]; xyah_box[3] = xyah[3]; auto mc = this->kalman_filter.initiate(xyah_box); this->mean = mc.first; this->covariance = mc.second; static_tlwh(); static_tlbr(); this->tracklet_len = 0; this->state = TrackState::Tracked; if (frame_id == 1) { this->is_activated = true; } //this->is_activated = true; this->frame_id = frame_id; this->start_frame = frame_id; } void STrack::re_activate(STrack &new_track, int frame_id, bool new_id) { vector xyah = tlwh_to_xyah(new_track.tlwh); DETECTBOX xyah_box; xyah_box[0] = xyah[0]; xyah_box[1] = xyah[1]; xyah_box[2] = xyah[2]; xyah_box[3] = xyah[3]; auto mc = this->kalman_filter.update(this->mean, this->covariance, xyah_box); this->mean = mc.first; this->covariance = mc.second; static_tlwh(); static_tlbr(); this->tracklet_len = 0; this->state = TrackState::Tracked; this->is_activated = true; this->frame_id = frame_id; this->score = new_track.score; if (new_id) this->track_id = next_id(); } void STrack::update(STrack &new_track, int frame_id) { this->frame_id = frame_id; this->tracklet_len++; vector xyah = tlwh_to_xyah(new_track.tlwh); DETECTBOX xyah_box; xyah_box[0] = xyah[0]; xyah_box[1] = xyah[1]; xyah_box[2] = xyah[2]; xyah_box[3] = xyah[3]; auto mc = this->kalman_filter.update(this->mean, this->covariance, xyah_box); this->mean = mc.first; this->covariance = mc.second; static_tlwh(); static_tlbr(); this->state = TrackState::Tracked; this->is_activated = true; this->score = new_track.score; } void STrack::static_tlwh() { if (this->state == TrackState::New) { tlwh[0] = _tlwh[0]; tlwh[1] = _tlwh[1]; tlwh[2] = _tlwh[2]; tlwh[3] = _tlwh[3]; return; } tlwh[0] = mean[0]; tlwh[1] = mean[1]; tlwh[2] = mean[2]; tlwh[3] = mean[3]; tlwh[2] *= tlwh[3]; tlwh[0] -= tlwh[2] / 2; tlwh[1] -= tlwh[3] / 2; } void STrack::static_tlbr() { tlbr.clear(); tlbr.assign(tlwh.begin(), tlwh.end()); tlbr[2] += tlbr[0]; tlbr[3] += tlbr[1]; } vector STrack::tlwh_to_xyah(vector tlwh_tmp) { vector tlwh_output = tlwh_tmp; tlwh_output[0] += tlwh_output[2] / 2; tlwh_output[1] += tlwh_output[3] / 2; tlwh_output[2] /= tlwh_output[3]; return tlwh_output; } vector STrack::to_xyah() { return tlwh_to_xyah(tlwh); } vector STrack::tlbr_to_tlwh(vector &tlbr) { tlbr[2] -= tlbr[0]; tlbr[3] -= tlbr[1]; return tlbr; } void STrack::mark_lost() { state = TrackState::Lost; } void STrack::mark_removed() { state = TrackState::Removed; } int STrack::next_id() { static int _count = 0; _count++; return _count; } int STrack::end_frame() { return this->frame_id; } void STrack::multi_predict(vector &stracks, byte_kalman::KalmanFilter &kalman_filter) { for (int i = 0; i < stracks.size(); i++) { if (stracks[i]->state != TrackState::Tracked) { stracks[i]->mean[7] = 0; } kalman_filter.predict(stracks[i]->mean, stracks[i]->covariance); } } ================================================ FILE: onnxruntime/cpp/src/kalmanFilter.cpp ================================================ #include "kalmanFilter.h" #include namespace byte_kalman { const double KalmanFilter::chi2inv95[10] = { 0, 3.8415, 5.9915, 7.8147, 9.4877, 11.070, 12.592, 14.067, 15.507, 16.919 }; KalmanFilter::KalmanFilter() { int ndim = 4; double dt = 1.; _motion_mat = Eigen::MatrixXf::Identity(8, 8); for (int i = 0; i < ndim; i++) { _motion_mat(i, ndim + i) = dt; } _update_mat = Eigen::MatrixXf::Identity(4, 8); this->_std_weight_position = 1. / 20; this->_std_weight_velocity = 1. / 160; } KAL_DATA KalmanFilter::initiate(const DETECTBOX &measurement) { DETECTBOX mean_pos = measurement; DETECTBOX mean_vel; for (int i = 0; i < 4; i++) mean_vel(i) = 0; KAL_MEAN mean; for (int i = 0; i < 8; i++) { if (i < 4) mean(i) = mean_pos(i); else mean(i) = mean_vel(i - 4); } KAL_MEAN std; std(0) = 2 * _std_weight_position * measurement[3]; std(1) = 2 * _std_weight_position * measurement[3]; std(2) = 1e-2; std(3) = 2 * _std_weight_position * measurement[3]; std(4) = 10 * _std_weight_velocity * measurement[3]; std(5) = 10 * _std_weight_velocity * measurement[3]; std(6) = 1e-5; std(7) = 10 * _std_weight_velocity * measurement[3]; KAL_MEAN tmp = std.array().square(); KAL_COVA var = tmp.asDiagonal(); return std::make_pair(mean, var); } void KalmanFilter::predict(KAL_MEAN &mean, KAL_COVA &covariance) { //revise the data; DETECTBOX std_pos; std_pos << _std_weight_position * mean(3), _std_weight_position * mean(3), 1e-2, _std_weight_position * mean(3); DETECTBOX std_vel; std_vel << _std_weight_velocity * mean(3), _std_weight_velocity * mean(3), 1e-5, _std_weight_velocity * mean(3); KAL_MEAN tmp; tmp.block<1, 4>(0, 0) = std_pos; tmp.block<1, 4>(0, 4) = std_vel; tmp = tmp.array().square(); KAL_COVA motion_cov = tmp.asDiagonal(); KAL_MEAN mean1 = this->_motion_mat * mean.transpose(); KAL_COVA covariance1 = this->_motion_mat * covariance *(_motion_mat.transpose()); covariance1 += motion_cov; mean = mean1; covariance = covariance1; } KAL_HDATA KalmanFilter::project(const KAL_MEAN &mean, const KAL_COVA &covariance) { DETECTBOX std; std << _std_weight_position * mean(3), _std_weight_position * mean(3), 1e-1, _std_weight_position * mean(3); KAL_HMEAN mean1 = _update_mat * mean.transpose(); KAL_HCOVA covariance1 = _update_mat * covariance * (_update_mat.transpose()); Eigen::Matrix diag = std.asDiagonal(); diag = diag.array().square().matrix(); covariance1 += diag; // covariance1.diagonal() << diag; return std::make_pair(mean1, covariance1); } KAL_DATA KalmanFilter::update( const KAL_MEAN &mean, const KAL_COVA &covariance, const DETECTBOX &measurement) { KAL_HDATA pa = project(mean, covariance); KAL_HMEAN projected_mean = pa.first; KAL_HCOVA projected_cov = pa.second; //chol_factor, lower = //scipy.linalg.cho_factor(projected_cov, lower=True, check_finite=False) //kalmain_gain = //scipy.linalg.cho_solve((cho_factor, lower), //np.dot(covariance, self._upadte_mat.T).T, //check_finite=False).T Eigen::Matrix B = (covariance * (_update_mat.transpose())).transpose(); Eigen::Matrix kalman_gain = (projected_cov.llt().solve(B)).transpose(); // eg.8x4 Eigen::Matrix innovation = measurement - projected_mean; //eg.1x4 auto tmp = innovation * (kalman_gain.transpose()); KAL_MEAN new_mean = (mean.array() + tmp.array()).matrix(); KAL_COVA new_covariance = covariance - kalman_gain * projected_cov*(kalman_gain.transpose()); return std::make_pair(new_mean, new_covariance); } Eigen::Matrix KalmanFilter::gating_distance( const KAL_MEAN &mean, const KAL_COVA &covariance, const std::vector &measurements, bool only_position) { KAL_HDATA pa = this->project(mean, covariance); if (only_position) { printf("not implement!"); exit(0); } KAL_HMEAN mean1 = pa.first; KAL_HCOVA covariance1 = pa.second; // Eigen::Matrix d(size, 4); DETECTBOXSS d(measurements.size(), 4); int pos = 0; for (DETECTBOX box : measurements) { d.row(pos++) = box - mean1; } Eigen::Matrix factor = covariance1.llt().matrixL(); Eigen::Matrix z = factor.triangularView().solve(d).transpose(); auto zz = ((z.array())*(z.array())).matrix(); auto square_maha = zz.colwise().sum(); return square_maha; } } ================================================ FILE: onnxruntime/cpp/src/lapjv.cpp ================================================ #include #include #include #include "lapjv.h" /** Column-reduction and reduction transfer for a dense cost matrix. */ int_t _ccrrt_dense(const uint_t n, cost_t *cost[], int_t *free_rows, int_t *x, int_t *y, cost_t *v) { int_t n_free_rows; boolean *unique; for (uint_t i = 0; i < n; i++) { x[i] = -1; v[i] = LARGE; y[i] = 0; } for (uint_t i = 0; i < n; i++) { for (uint_t j = 0; j < n; j++) { const cost_t c = cost[i][j]; if (c < v[j]) { v[j] = c; y[j] = i; } PRINTF("i=%d, j=%d, c[i,j]=%f, v[j]=%f y[j]=%d\n", i, j, c, v[j], y[j]); } } PRINT_COST_ARRAY(v, n); PRINT_INDEX_ARRAY(y, n); NEW(unique, boolean, n); memset(unique, TRUE, n); { int_t j = n; do { j--; const int_t i = y[j]; if (x[i] < 0) { x[i] = j; } else { unique[i] = FALSE; y[j] = -1; } } while (j > 0); } n_free_rows = 0; for (uint_t i = 0; i < n; i++) { if (x[i] < 0) { free_rows[n_free_rows++] = i; } else if (unique[i]) { const int_t j = x[i]; cost_t min = LARGE; for (uint_t j2 = 0; j2 < n; j2++) { if (j2 == (uint_t)j) { continue; } const cost_t c = cost[i][j2] - v[j2]; if (c < min) { min = c; } } PRINTF("v[%d] = %f - %f\n", j, v[j], min); v[j] -= min; } } FREE(unique); return n_free_rows; } /** Augmenting row reduction for a dense cost matrix. */ int_t _carr_dense( const uint_t n, cost_t *cost[], const uint_t n_free_rows, int_t *free_rows, int_t *x, int_t *y, cost_t *v) { uint_t current = 0; int_t new_free_rows = 0; uint_t rr_cnt = 0; PRINT_INDEX_ARRAY(x, n); PRINT_INDEX_ARRAY(y, n); PRINT_COST_ARRAY(v, n); PRINT_INDEX_ARRAY(free_rows, n_free_rows); while (current < n_free_rows) { int_t i0; int_t j1, j2; cost_t v1, v2, v1_new; boolean v1_lowers; rr_cnt++; PRINTF("current = %d rr_cnt = %d\n", current, rr_cnt); const int_t free_i = free_rows[current++]; j1 = 0; v1 = cost[free_i][0] - v[0]; j2 = -1; v2 = LARGE; for (uint_t j = 1; j < n; j++) { PRINTF("%d = %f %d = %f\n", j1, v1, j2, v2); const cost_t c = cost[free_i][j] - v[j]; if (c < v2) { if (c >= v1) { v2 = c; j2 = j; } else { v2 = v1; v1 = c; j2 = j1; j1 = j; } } } i0 = y[j1]; v1_new = v[j1] - (v2 - v1); v1_lowers = v1_new < v[j1]; PRINTF("%d %d 1=%d,%f 2=%d,%f v1'=%f(%d,%g) \n", free_i, i0, j1, v1, j2, v2, v1_new, v1_lowers, v[j1] - v1_new); if (rr_cnt < current * n) { if (v1_lowers) { v[j1] = v1_new; } else if (i0 >= 0 && j2 >= 0) { j1 = j2; i0 = y[j2]; } if (i0 >= 0) { if (v1_lowers) { free_rows[--current] = i0; } else { free_rows[new_free_rows++] = i0; } } } else { PRINTF("rr_cnt=%d >= %d (current=%d * n=%d)\n", rr_cnt, current * n, current, n); if (i0 >= 0) { free_rows[new_free_rows++] = i0; } } x[free_i] = j1; y[j1] = free_i; } return new_free_rows; } /** Find columns with minimum d[j] and put them on the SCAN list. */ uint_t _find_dense(const uint_t n, uint_t lo, cost_t *d, int_t *cols, int_t *y) { uint_t hi = lo + 1; cost_t mind = d[cols[lo]]; for (uint_t k = hi; k < n; k++) { int_t j = cols[k]; if (d[j] <= mind) { if (d[j] < mind) { hi = lo; mind = d[j]; } cols[k] = cols[hi]; cols[hi++] = j; } } return hi; } // Scan all columns in TODO starting from arbitrary column in SCAN // and try to decrease d of the TODO columns using the SCAN column. int_t _scan_dense(const uint_t n, cost_t *cost[], uint_t *plo, uint_t*phi, cost_t *d, int_t *cols, int_t *pred, int_t *y, cost_t *v) { uint_t lo = *plo; uint_t hi = *phi; cost_t h, cred_ij; while (lo != hi) { int_t j = cols[lo++]; const int_t i = y[j]; const cost_t mind = d[j]; h = cost[i][j] - v[j] - mind; PRINTF("i=%d j=%d h=%f\n", i, j, h); // For all columns in TODO for (uint_t k = hi; k < n; k++) { j = cols[k]; cred_ij = cost[i][j] - v[j] - h; if (cred_ij < d[j]) { d[j] = cred_ij; pred[j] = i; if (cred_ij == mind) { if (y[j] < 0) { return j; } cols[k] = cols[hi]; cols[hi++] = j; } } } } *plo = lo; *phi = hi; return -1; } /** Single iteration of modified Dijkstra shortest path algorithm as explained in the JV paper. * * This is a dense matrix version. * * \return The closest free column index. */ int_t find_path_dense( const uint_t n, cost_t *cost[], const int_t start_i, int_t *y, cost_t *v, int_t *pred) { uint_t lo = 0, hi = 0; int_t final_j = -1; uint_t n_ready = 0; int_t *cols; cost_t *d; NEW(cols, int_t, n); NEW(d, cost_t, n); for (uint_t i = 0; i < n; i++) { cols[i] = i; pred[i] = start_i; d[i] = cost[start_i][i] - v[i]; } PRINT_COST_ARRAY(d, n); while (final_j == -1) { // No columns left on the SCAN list. if (lo == hi) { PRINTF("%d..%d -> find\n", lo, hi); n_ready = lo; hi = _find_dense(n, lo, d, cols, y); PRINTF("check %d..%d\n", lo, hi); PRINT_INDEX_ARRAY(cols, n); for (uint_t k = lo; k < hi; k++) { const int_t j = cols[k]; if (y[j] < 0) { final_j = j; } } } if (final_j == -1) { PRINTF("%d..%d -> scan\n", lo, hi); final_j = _scan_dense( n, cost, &lo, &hi, d, cols, pred, y, v); PRINT_COST_ARRAY(d, n); PRINT_INDEX_ARRAY(cols, n); PRINT_INDEX_ARRAY(pred, n); } } PRINTF("found final_j=%d\n", final_j); PRINT_INDEX_ARRAY(cols, n); { const cost_t mind = d[cols[lo]]; for (uint_t k = 0; k < n_ready; k++) { const int_t j = cols[k]; v[j] += d[j] - mind; } } FREE(cols); FREE(d); return final_j; } /** Augment for a dense cost matrix. */ int_t _ca_dense( const uint_t n, cost_t *cost[], const uint_t n_free_rows, int_t *free_rows, int_t *x, int_t *y, cost_t *v) { int_t *pred; NEW(pred, int_t, n); for (int_t *pfree_i = free_rows; pfree_i < free_rows + n_free_rows; pfree_i++) { int_t i = -1, j; uint_t k = 0; PRINTF("looking at free_i=%d\n", *pfree_i); j = find_path_dense(n, cost, *pfree_i, y, v, pred); ASSERT(j >= 0); ASSERT(j < n); while (i != *pfree_i) { PRINTF("augment %d\n", j); PRINT_INDEX_ARRAY(pred, n); i = pred[j]; PRINTF("y[%d]=%d -> %d\n", j, y[j], i); y[j] = i; PRINT_INDEX_ARRAY(x, n); SWAP_INDICES(j, x[i]); k++; if (k >= n) { ASSERT(FALSE); } } } FREE(pred); return 0; } /** Solve dense sparse LAP. */ int lapjv_internal( const uint_t n, cost_t *cost[], int_t *x, int_t *y) { int ret; int_t *free_rows; cost_t *v; NEW(free_rows, int_t, n); NEW(v, cost_t, n); ret = _ccrrt_dense(n, cost, free_rows, x, y, v); int i = 0; while (ret > 0 && i < 2) { ret = _carr_dense(n, cost, ret, free_rows, x, y, v); i++; } if (ret > 0) { ret = _ca_dense(n, cost, ret, free_rows, x, y, v); } FREE(v); FREE(free_rows); return ret; } ================================================ FILE: onnxruntime/cpp/src/main.cpp ================================================ #include #include #include #include //#include #include #include #include #include #include #include "BYTETracker.h" using namespace cv; using namespace std; using namespace Ort; struct GridAndStride { int grid0; int grid1; int stride; }; static inline float intersection_area(const Object& a, const Object& b) { cv::Rect_ inter = a.rect & b.rect; return inter.area(); } static void qsort_descent_inplace(std::vector& faceobjects, int left, int right) { int i = left; int j = right; float p = faceobjects[(left + right) / 2].prob; while (i <= j) { while (faceobjects[i].prob > p) i++; while (faceobjects[j].prob < p) j--; if (i <= j) { // swap std::swap(faceobjects[i], faceobjects[j]); i++; j--; } } #pragma omp parallel sections { #pragma omp section { if (left < j) qsort_descent_inplace(faceobjects, left, j); } #pragma omp section { if (i < right) qsort_descent_inplace(faceobjects, i, right); } } } static void qsort_descent_inplace(std::vector& objects) { if (objects.empty()) return; qsort_descent_inplace(objects, 0, objects.size() - 1); } class yolox { public: yolox(); int detect(cv::Mat &img, std::vector &detectResults); private: const float mean_vals[3] = {0.485, 0.456, 0.406}; const float norm_vals[3] = {0.229, 0.224, 0.225}; vector input_image_; const int stride_arr[3] = {8, 16, 32}; // might have stride=64 in YOLOX std::vector grid_strides; Mat static_resize(Mat& img); void generate_grids_and_stride(std::vector& strides); void generate_yolox_proposals(const float* feat_ptr, std::vector& objects); void nms_sorted_bboxes(const std::vector& faceobjects, std::vector& picked); float nms_threshold = 0.7; float prob_threshold = 0.1; int num_grid; int num_class; int INPUT_W; int INPUT_H; Session *session_; Env env = Env(ORT_LOGGING_LEVEL_ERROR, "yolox"); SessionOptions sessionOptions = SessionOptions(); vector input_names; vector output_names; }; yolox::yolox() { string model_path = "/home/ByteTrack/byte_tracker/model/bytetrack_s.onnx"; //OrtStatus* status = OrtSessionOptionsAppendExecutionProvider_CUDA(sessionOptions, 0); sessionOptions.SetGraphOptimizationLevel(ORT_ENABLE_BASIC); session_ = new Session(env, model_path.c_str(), sessionOptions); size_t numInputNodes = session_->GetInputCount(); size_t numOutputNodes = session_->GetOutputCount(); AllocatorWithDefaultOptions allocator; vector> input_node_dims; // >=1 outputs for(int i=0;iGetInputName(i, allocator)); Ort::TypeInfo input_type_info = session_->GetInputTypeInfo(i); auto input_tensor_info = input_type_info.GetTensorTypeAndShapeInfo(); auto input_dims = input_tensor_info.GetShape(); input_node_dims.push_back(input_dims); } vector> output_node_dims; // >=1 outputs for(int i=0;iGetOutputName(i, allocator)); Ort::TypeInfo output_type_info = session_->GetOutputTypeInfo(i); auto output_tensor_info = output_type_info.GetTensorTypeAndShapeInfo(); auto output_dims = output_tensor_info.GetShape(); output_node_dims.push_back(output_dims); /*for (int j = 0; j < output_dims.size(); j++) { cout << output_dims[j] << ","; } cout << endl;*/ } this->INPUT_H = input_node_dims[0][2]; this->INPUT_W = input_node_dims[0][3]; num_grid = output_node_dims[0][1]; num_class = output_node_dims[0][2] - 5; std::vector strides(stride_arr, stride_arr + sizeof(stride_arr) / sizeof(stride_arr[0])); generate_grids_and_stride(strides); } Mat yolox::static_resize(Mat& img) { float r = min(INPUT_W / (img.cols*1.0), INPUT_H / (img.rows*1.0)); // r = std::min(r, 1.0f); int unpad_w = r * img.cols; int unpad_h = r * img.rows; Mat re(unpad_h, unpad_w, CV_8UC3); resize(img, re, re.size()); Mat out(INPUT_H, INPUT_W, CV_8UC3, Scalar(114, 114, 114)); re.copyTo(out(Rect(0, 0, re.cols, re.rows))); return out; } void yolox::generate_grids_and_stride(std::vector& strides) { for (int i = 0; i < (int)strides.size(); i++) { int stride = strides[i]; int num_grid_w = INPUT_W / stride; int num_grid_h = INPUT_H / stride; for (int g1 = 0; g1 < num_grid_h; g1++) { for (int g0 = 0; g0 < num_grid_w; g0++) { GridAndStride gs; gs.grid0 = g0; gs.grid1 = g1; gs.stride = stride; grid_strides.push_back(gs); } } } } void yolox::generate_yolox_proposals(const float* feat_ptr, std::vector& objects) { const int num_anchors = grid_strides.size(); for (int anchor_idx = 0; anchor_idx < num_anchors; anchor_idx++) { const int grid0 = grid_strides[anchor_idx].grid0; const int grid1 = grid_strides[anchor_idx].grid1; const int stride = grid_strides[anchor_idx].stride; // yolox/models/yolo_head.py decode logic // outputs[..., :2] = (outputs[..., :2] + grids) * strides // outputs[..., 2:4] = torch.exp(outputs[..., 2:4]) * strides float x_center = (feat_ptr[0] + grid0) * stride; float y_center = (feat_ptr[1] + grid1) * stride; float w = exp(feat_ptr[2]) * stride; float h = exp(feat_ptr[3]) * stride; float x0 = x_center - w * 0.5f; float y0 = y_center - h * 0.5f; float box_objectness = feat_ptr[4]; for (int class_idx = 0; class_idx < num_class; class_idx++) { float box_cls_score = feat_ptr[5 + class_idx]; float box_prob = box_objectness * box_cls_score; if (box_prob > prob_threshold) { Object obj; obj.rect.x = x0; obj.rect.y = y0; obj.rect.width = w; obj.rect.height = h; obj.label = class_idx; obj.prob = box_prob; objects.push_back(obj); } } // class loop feat_ptr += (num_class + 5); } // point anchor loop } void yolox::nms_sorted_bboxes(const std::vector& faceobjects, std::vector& picked) { picked.clear(); const int n = faceobjects.size(); std::vector areas(n); for (int i = 0; i < n; i++) { areas[i] = faceobjects[i].rect.area(); } for (int i = 0; i < n; i++) { const Object& a = faceobjects[i]; int keep = 1; for (int j = 0; j < (int)picked.size(); j++) { const Object& b = faceobjects[picked[j]]; // intersection over union float inter_area = intersection_area(a, b); float union_area = areas[i] + areas[picked[j]] - inter_area; // float IoU = inter_area / union_area if (inter_area / union_area > nms_threshold) keep = 0; } if (keep) picked.push_back(i); } } int yolox::detect(cv::Mat &srcimg, std::vector& objects) { float scale = min(INPUT_W / (srcimg.cols*1.0), INPUT_H / (srcimg.rows*1.0)); Mat img = static_resize(srcimg); int row = img.rows; int col = img.cols; this->input_image_.resize(row * col * img.channels()); for (int c = 0; c < 3; c++) { for (int i = 0; i < row; i++) { for (int j = 0; j < col; j++) { float pix = img.ptr(i)[j * 3 + 2 - c]; this->input_image_[c * row * col + i * col + j] = (pix / 255.0 - mean_vals[c]) / norm_vals[c]; } } } array input_shape_{1, 3, row, col}; auto allocator_info = MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU); Value input_tensor_ = Value::CreateTensor(allocator_info, input_image_.data(), input_image_.size(), input_shape_.data(), input_shape_.size()); vector ort_outputs = session_->Run(RunOptions{nullptr}, &input_names[0], &input_tensor_, 1, output_names.data(), output_names.size()); const float* out = ort_outputs[0].GetTensorMutableData(); std::vector proposals; generate_yolox_proposals(out, proposals); // sort all proposals by score from highest to lowest qsort_descent_inplace(proposals); // apply nms with nms_threshold std::vector picked; nms_sorted_bboxes(proposals, picked); int count = picked.size(); objects.resize(count); for (int i = 0; i < count; i++) { objects[i] = proposals[picked[i]]; // adjust offset to original unpadded float x0 = (objects[i].rect.x) / scale; float y0 = (objects[i].rect.y) / scale; float x1 = (objects[i].rect.x + objects[i].rect.width) / scale; float y1 = (objects[i].rect.y + objects[i].rect.height) / scale; // clip // x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f); // y0 = std::max(std::min(y0, (float)(img_h - 1)), 0.f); // x1 = std::max(std::min(x1, (float)(img_w - 1)), 0.f); // y1 = std::max(std::min(y1, (float)(img_h - 1)), 0.f); objects[i].rect.x = x0; objects[i].rect.y = y0; objects[i].rect.width = x1 - x0; objects[i].rect.height = y1 - y0; } return 0; } int main(int argc, char** argv) { if (argc != 2) { fprintf(stderr, "Usage: %s [videopath]\n", argv[0]); return -1; } const char* videopath = argv[1]; VideoCapture cap(videopath); if (!cap.isOpened()) return 0; int img_w = cap.get(CAP_PROP_FRAME_WIDTH); int img_h = cap.get(CAP_PROP_FRAME_HEIGHT); int fps = cap.get(CAP_PROP_FPS); long nFrame = static_cast(cap.get(CAP_PROP_FRAME_COUNT)); cout << "Total frames: " << nFrame << ", fps: "< objects; auto start = chrono::system_clock::now(); yolox.detect(img, objects); vector output_stracks = tracker.update(objects); auto end = chrono::system_clock::now(); total_ms = total_ms + chrono::duration_cast(end - start).count(); for (int i = 0; i < output_stracks.size(); i++) { vector tlwh = output_stracks[i].tlwh; bool vertical = tlwh[2] / tlwh[3] > 1.6; if (tlwh[2] * tlwh[3] > 20 && !vertical) { Scalar s = tracker.get_color(output_stracks[i].track_id); putText(img, format("%d", output_stracks[i].track_id), Point(tlwh[0], tlwh[1] - 5), 0, 0.6, Scalar(0, 0, 255), 2, LINE_AA); rectangle(img, Rect(tlwh[0], tlwh[1], tlwh[2], tlwh[3]), s, 2); } } putText(img, format("frame: %d fps: %d num: %d", num_frames, num_frames * 1000000 / total_ms, (int)output_stracks.size()), Point(0, 30), 0, 0.6, Scalar(0, 0, 255), 2, LINE_AA); writer.write(img); /*char c = waitKey(1); if (c > 0) { break; }*/ } cap.release(); cout << "FPS: " << num_frames * 1000000 / total_ms << endl; return 0; } ================================================ FILE: onnxruntime/cpp/src/utils.cpp ================================================ #include "BYTETracker.h" #include "lapjv.h" vector BYTETracker::joint_stracks(vector &tlista, vector &tlistb) { map exists; vector res; for (int i = 0; i < tlista.size(); i++) { exists.insert(pair(tlista[i]->track_id, 1)); res.push_back(tlista[i]); } for (int i = 0; i < tlistb.size(); i++) { int tid = tlistb[i].track_id; if (!exists[tid] || exists.count(tid) == 0) { exists[tid] = 1; res.push_back(&tlistb[i]); } } return res; } vector BYTETracker::joint_stracks(vector &tlista, vector &tlistb) { map exists; vector res; for (int i = 0; i < tlista.size(); i++) { exists.insert(pair(tlista[i].track_id, 1)); res.push_back(tlista[i]); } for (int i = 0; i < tlistb.size(); i++) { int tid = tlistb[i].track_id; if (!exists[tid] || exists.count(tid) == 0) { exists[tid] = 1; res.push_back(tlistb[i]); } } return res; } vector BYTETracker::sub_stracks(vector &tlista, vector &tlistb) { map stracks; for (int i = 0; i < tlista.size(); i++) { stracks.insert(pair(tlista[i].track_id, tlista[i])); } for (int i = 0; i < tlistb.size(); i++) { int tid = tlistb[i].track_id; if (stracks.count(tid) != 0) { stracks.erase(tid); } } vector res; std::map::iterator it; for (it = stracks.begin(); it != stracks.end(); ++it) { res.push_back(it->second); } return res; } void BYTETracker::remove_duplicate_stracks(vector &resa, vector &resb, vector &stracksa, vector &stracksb) { vector > pdist = iou_distance(stracksa, stracksb); vector > pairs; for (int i = 0; i < pdist.size(); i++) { for (int j = 0; j < pdist[i].size(); j++) { if (pdist[i][j] < 0.15) { pairs.push_back(pair(i, j)); } } } vector dupa, dupb; for (int i = 0; i < pairs.size(); i++) { int timep = stracksa[pairs[i].first].frame_id - stracksa[pairs[i].first].start_frame; int timeq = stracksb[pairs[i].second].frame_id - stracksb[pairs[i].second].start_frame; if (timep > timeq) dupb.push_back(pairs[i].second); else dupa.push_back(pairs[i].first); } for (int i = 0; i < stracksa.size(); i++) { vector::iterator iter = find(dupa.begin(), dupa.end(), i); if (iter == dupa.end()) { resa.push_back(stracksa[i]); } } for (int i = 0; i < stracksb.size(); i++) { vector::iterator iter = find(dupb.begin(), dupb.end(), i); if (iter == dupb.end()) { resb.push_back(stracksb[i]); } } } void BYTETracker::linear_assignment(vector > &cost_matrix, int cost_matrix_size, int cost_matrix_size_size, float thresh, vector > &matches, vector &unmatched_a, vector &unmatched_b) { if (cost_matrix.size() == 0) { for (int i = 0; i < cost_matrix_size; i++) { unmatched_a.push_back(i); } for (int i = 0; i < cost_matrix_size_size; i++) { unmatched_b.push_back(i); } return; } vector rowsol; vector colsol; float c = lapjv(cost_matrix, rowsol, colsol, true, thresh); for (int i = 0; i < rowsol.size(); i++) { if (rowsol[i] >= 0) { vector match; match.push_back(i); match.push_back(rowsol[i]); matches.push_back(match); } else { unmatched_a.push_back(i); } } for (int i = 0; i < colsol.size(); i++) { if (colsol[i] < 0) { unmatched_b.push_back(i); } } } vector > BYTETracker::ious(vector > &atlbrs, vector > &btlbrs) { vector > ious; if (atlbrs.size()*btlbrs.size() == 0) return ious; ious.resize(atlbrs.size()); for (int i = 0; i < ious.size(); i++) { ious[i].resize(btlbrs.size()); } //bbox_ious for (int k = 0; k < btlbrs.size(); k++) { vector ious_tmp; float box_area = (btlbrs[k][2] - btlbrs[k][0] + 1)*(btlbrs[k][3] - btlbrs[k][1] + 1); for (int n = 0; n < atlbrs.size(); n++) { float iw = min(atlbrs[n][2], btlbrs[k][2]) - max(atlbrs[n][0], btlbrs[k][0]) + 1; if (iw > 0) { float ih = min(atlbrs[n][3], btlbrs[k][3]) - max(atlbrs[n][1], btlbrs[k][1]) + 1; if(ih > 0) { float ua = (atlbrs[n][2] - atlbrs[n][0] + 1)*(atlbrs[n][3] - atlbrs[n][1] + 1) + box_area - iw * ih; ious[n][k] = iw * ih / ua; } else { ious[n][k] = 0.0; } } else { ious[n][k] = 0.0; } } } return ious; } vector > BYTETracker::iou_distance(vector &atracks, vector &btracks, int &dist_size, int &dist_size_size) { vector > cost_matrix; if (atracks.size() * btracks.size() == 0) { dist_size = atracks.size(); dist_size_size = btracks.size(); return cost_matrix; } vector > atlbrs, btlbrs; for (int i = 0; i < atracks.size(); i++) { atlbrs.push_back(atracks[i]->tlbr); } for (int i = 0; i < btracks.size(); i++) { btlbrs.push_back(btracks[i].tlbr); } dist_size = atracks.size(); dist_size_size = btracks.size(); vector > _ious = ious(atlbrs, btlbrs); for (int i = 0; i < _ious.size();i++) { vector _iou; for (int j = 0; j < _ious[i].size(); j++) { _iou.push_back(1 - _ious[i][j]); } cost_matrix.push_back(_iou); } return cost_matrix; } vector > BYTETracker::iou_distance(vector &atracks, vector &btracks) { vector > atlbrs, btlbrs; for (int i = 0; i < atracks.size(); i++) { atlbrs.push_back(atracks[i].tlbr); } for (int i = 0; i < btracks.size(); i++) { btlbrs.push_back(btracks[i].tlbr); } vector > _ious = ious(atlbrs, btlbrs); vector > cost_matrix; for (int i = 0; i < _ious.size(); i++) { vector _iou; for (int j = 0; j < _ious[i].size(); j++) { _iou.push_back(1 - _ious[i][j]); } cost_matrix.push_back(_iou); } return cost_matrix; } double BYTETracker::lapjv(const vector > &cost, vector &rowsol, vector &colsol, bool extend_cost, float cost_limit, bool return_cost) { vector > cost_c; cost_c.assign(cost.begin(), cost.end()); vector > cost_c_extended; int n_rows = cost.size(); int n_cols = cost[0].size(); rowsol.resize(n_rows); colsol.resize(n_cols); int n = 0; if (n_rows == n_cols) { n = n_rows; } else { if (!extend_cost) { cout << "set extend_cost=True" << endl; system("pause"); exit(0); } } if (extend_cost || cost_limit < LONG_MAX) { n = n_rows + n_cols; cost_c_extended.resize(n); for (int i = 0; i < cost_c_extended.size(); i++) cost_c_extended[i].resize(n); if (cost_limit < LONG_MAX) { for (int i = 0; i < cost_c_extended.size(); i++) { for (int j = 0; j < cost_c_extended[i].size(); j++) { cost_c_extended[i][j] = cost_limit / 2.0; } } } else { float cost_max = -1; for (int i = 0; i < cost_c.size(); i++) { for (int j = 0; j < cost_c[i].size(); j++) { if (cost_c[i][j] > cost_max) cost_max = cost_c[i][j]; } } for (int i = 0; i < cost_c_extended.size(); i++) { for (int j = 0; j < cost_c_extended[i].size(); j++) { cost_c_extended[i][j] = cost_max + 1; } } } for (int i = n_rows; i < cost_c_extended.size(); i++) { for (int j = n_cols; j < cost_c_extended[i].size(); j++) { cost_c_extended[i][j] = 0; } } for (int i = 0; i < n_rows; i++) { for (int j = 0; j < n_cols; j++) { cost_c_extended[i][j] = cost_c[i][j]; } } cost_c.clear(); cost_c.assign(cost_c_extended.begin(), cost_c_extended.end()); } double **cost_ptr; cost_ptr = new double *[sizeof(double *) * n]; for (int i = 0; i < n; i++) cost_ptr[i] = new double[sizeof(double) * n]; for (int i = 0; i < n; i++) { for (int j = 0; j < n; j++) { cost_ptr[i][j] = cost_c[i][j]; } } int* x_c = new int[sizeof(int) * n]; int *y_c = new int[sizeof(int) * n]; int ret = lapjv_internal(n, cost_ptr, x_c, y_c); if (ret != 0) { cout << "Calculate Wrong!" << endl; system("pause"); exit(0); } double opt = 0.0; if (n != n_rows) { for (int i = 0; i < n; i++) { if (x_c[i] >= n_cols) x_c[i] = -1; if (y_c[i] >= n_rows) y_c[i] = -1; } for (int i = 0; i < n_rows; i++) { rowsol[i] = x_c[i]; } for (int i = 0; i < n_cols; i++) { colsol[i] = y_c[i]; } if (return_cost) { for (int i = 0; i < rowsol.size(); i++) { if (rowsol[i] != -1) { //cout << i << "\t" << rowsol[i] << "\t" << cost_ptr[i][rowsol[i]] << endl; opt += cost_ptr[i][rowsol[i]]; } } } } else if (return_cost) { for (int i = 0; i < rowsol.size(); i++) { opt += cost_ptr[i][rowsol[i]]; } } for (int i = 0; i < n; i++) { delete[]cost_ptr[i]; } delete[]cost_ptr; delete[]x_c; delete[]y_c; return opt; } Scalar BYTETracker::get_color(int idx) { idx += 3; return Scalar(37 * idx % 255, 17 * idx % 255, 29 * idx % 255); } ================================================ FILE: onnxruntime/python/byte_tracker/byte_tracker_onnx.py ================================================ #!/usr/bin/env python # -*- coding: utf-8 -*- import copy import numpy as np import onnxruntime from byte_tracker.utils.yolox_utils import ( pre_process, post_process, multiclass_nms, ) from byte_tracker.tracker.byte_tracker import BYTETracker class ByteTrackerONNX(object): def __init__(self, args): self.args = args self.rgb_means = (0.485, 0.456, 0.406) self.std = (0.229, 0.224, 0.225) self.session = onnxruntime.InferenceSession(args.model) self.input_shape = tuple(map(int, args.input_shape.split(','))) self.tracker = BYTETracker(args, frame_rate=30) def _pre_process(self, image): image_info = {'id': 0} image_info['image'] = copy.deepcopy(image) image_info['width'] = image.shape[1] image_info['height'] = image.shape[0] preprocessed_image, ratio = pre_process( image, self.input_shape, self.rgb_means, self.std, ) image_info['ratio'] = ratio return preprocessed_image, image_info def inference(self, image): image, image_info = self._pre_process(image) input_name = self.session.get_inputs()[0].name result = self.session.run(None, {input_name: image[None, :, :, :]}) dets = self._post_process(result, image_info) bboxes, ids, scores = self._tracker_update( dets, image_info, ) return image_info, bboxes, ids, scores def _post_process(self, result, image_info): predictions = post_process( result[0], self.input_shape, p6=self.args.with_p6, ) predictions = predictions[0] boxes = predictions[:, :4] scores = predictions[:, 4:5] * predictions[:, 5:] boxes_xyxy = np.ones_like(boxes) boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2] / 2. boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3] / 2. boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2] / 2. boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3] / 2. boxes_xyxy /= image_info['ratio'] dets = multiclass_nms( boxes_xyxy, scores, nms_thr=self.args.nms_th, score_thr=self.args.score_th, ) return dets def _tracker_update(self, dets, image_info): online_targets = [] if dets is not None: online_targets = self.tracker.update( dets[:, :-1], [image_info['height'], image_info['width']], [image_info['height'], image_info['width']], ) online_tlwhs = [] online_ids = [] online_scores = [] for online_target in online_targets: tlwh = online_target.tlwh track_id = online_target.track_id vertical = tlwh[2] / tlwh[3] > 1.6 if tlwh[2] * tlwh[3] > self.args.min_box_area and not vertical: online_tlwhs.append(tlwh) online_ids.append(track_id) online_scores.append(online_target.score) return online_tlwhs, online_ids, online_scores ================================================ FILE: onnxruntime/python/byte_tracker/tracker/basetrack.py ================================================ import numpy as np from collections import OrderedDict class TrackState(object): New = 0 Tracked = 1 Lost = 2 Removed = 3 class BaseTrack(object): _count = 0 track_id = 0 is_activated = False state = TrackState.New history = OrderedDict() features = [] curr_feature = None score = 0 start_frame = 0 frame_id = 0 time_since_update = 0 # multi-camera location = (np.inf, np.inf) @property def end_frame(self): return self.frame_id @staticmethod def next_id(): BaseTrack._count += 1 return BaseTrack._count def activate(self, *args): raise NotImplementedError def predict(self): raise NotImplementedError def update(self, *args, **kwargs): raise NotImplementedError def mark_lost(self): self.state = TrackState.Lost def mark_removed(self): self.state = TrackState.Removed ================================================ FILE: onnxruntime/python/byte_tracker/tracker/byte_tracker.py ================================================ import numpy as np from collections import deque import os import os.path as osp import copy import torch import torch.nn.functional as F from .kalman_filter import KalmanFilter from byte_tracker.tracker import matching from .basetrack import BaseTrack, TrackState class STrack(BaseTrack): shared_kalman = KalmanFilter() def __init__(self, tlwh, score): # wait activate self._tlwh = np.asarray(tlwh, dtype=np.float) self.kalman_filter = None self.mean, self.covariance = None, None self.is_activated = False self.score = score self.tracklet_len = 0 def predict(self): mean_state = self.mean.copy() if self.state != TrackState.Tracked: mean_state[7] = 0 self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance) @staticmethod def multi_predict(stracks): if len(stracks) > 0: multi_mean = np.asarray([st.mean.copy() for st in stracks]) multi_covariance = np.asarray([st.covariance for st in stracks]) for i, st in enumerate(stracks): if st.state != TrackState.Tracked: multi_mean[i][7] = 0 multi_mean, multi_covariance = STrack.shared_kalman.multi_predict(multi_mean, multi_covariance) for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)): stracks[i].mean = mean stracks[i].covariance = cov def activate(self, kalman_filter, frame_id): """Start a new tracklet""" self.kalman_filter = kalman_filter self.track_id = self.next_id() self.mean, self.covariance = self.kalman_filter.initiate(self.tlwh_to_xyah(self._tlwh)) self.tracklet_len = 0 self.state = TrackState.Tracked if frame_id == 1: self.is_activated = True # self.is_activated = True self.frame_id = frame_id self.start_frame = frame_id def re_activate(self, new_track, frame_id, new_id=False): self.mean, self.covariance = self.kalman_filter.update( self.mean, self.covariance, self.tlwh_to_xyah(new_track.tlwh) ) self.tracklet_len = 0 self.state = TrackState.Tracked self.is_activated = True self.frame_id = frame_id if new_id: self.track_id = self.next_id() self.score = new_track.score def update(self, new_track, frame_id): """ Update a matched track :type new_track: STrack :type frame_id: int :type update_feature: bool :return: """ self.frame_id = frame_id self.tracklet_len += 1 new_tlwh = new_track.tlwh self.mean, self.covariance = self.kalman_filter.update( self.mean, self.covariance, self.tlwh_to_xyah(new_tlwh)) self.state = TrackState.Tracked self.is_activated = True self.score = new_track.score @property # @jit(nopython=True) def tlwh(self): """Get current position in bounding box format `(top left x, top left y, width, height)`. """ if self.mean is None: return self._tlwh.copy() ret = self.mean[:4].copy() ret[2] *= ret[3] ret[:2] -= ret[2:] / 2 return ret @property # @jit(nopython=True) def tlbr(self): """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., `(top left, bottom right)`. """ ret = self.tlwh.copy() ret[2:] += ret[:2] return ret @staticmethod # @jit(nopython=True) def tlwh_to_xyah(tlwh): """Convert bounding box to format `(center x, center y, aspect ratio, height)`, where the aspect ratio is `width / height`. """ ret = np.asarray(tlwh).copy() ret[:2] += ret[2:] / 2 ret[2] /= ret[3] return ret def to_xyah(self): return self.tlwh_to_xyah(self.tlwh) @staticmethod # @jit(nopython=True) def tlbr_to_tlwh(tlbr): ret = np.asarray(tlbr).copy() ret[2:] -= ret[:2] return ret @staticmethod # @jit(nopython=True) def tlwh_to_tlbr(tlwh): ret = np.asarray(tlwh).copy() ret[2:] += ret[:2] return ret def __repr__(self): return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame) class BYTETracker(object): def __init__(self, args, frame_rate=30): self.tracked_stracks = [] # type: list[STrack] self.lost_stracks = [] # type: list[STrack] self.removed_stracks = [] # type: list[STrack] self.frame_id = 0 self.args = args #self.det_thresh = args.track_thresh self.det_thresh = args.track_thresh + 0.1 self.buffer_size = int(frame_rate / 30.0 * args.track_buffer) self.max_time_lost = self.buffer_size self.kalman_filter = KalmanFilter() def update(self, output_results, img_info, img_size): self.frame_id += 1 activated_starcks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] if output_results.shape[1] == 5: scores = output_results[:, 4] bboxes = output_results[:, :4] else: output_results = output_results.cpu().numpy() scores = output_results[:, 4] * output_results[:, 5] bboxes = output_results[:, :4] # x1y1x2y2 img_h, img_w = img_info[0], img_info[1] scale = min(img_size[0] / float(img_h), img_size[1] / float(img_w)) bboxes /= scale remain_inds = scores > self.args.track_thresh inds_low = scores > 0.1 inds_high = scores < self.args.track_thresh inds_second = np.logical_and(inds_low, inds_high) dets_second = bboxes[inds_second] dets = bboxes[remain_inds] scores_keep = scores[remain_inds] scores_second = scores[inds_second] if len(dets) > 0: '''Detections''' detections = [STrack(STrack.tlbr_to_tlwh(tlbr), s) for (tlbr, s) in zip(dets, scores_keep)] else: detections = [] ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with high score detection boxes''' strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) # Predict the current location with KF STrack.multi_predict(strack_pool) dists = matching.iou_distance(strack_pool, detections) if not self.args.mot20: dists = matching.fuse_score(dists, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=self.args.match_thresh) for itracked, idet in matches: track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(detections[idet], self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) ''' Step 3: Second association, with low score detection boxes''' # association the untrack to the low score detections if len(dets_second) > 0: '''Detections''' detections_second = [STrack(STrack.tlbr_to_tlwh(tlbr), s) for (tlbr, s) in zip(dets_second, scores_second)] else: detections_second = [] r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked] dists = matching.iou_distance(r_tracked_stracks, detections_second) matches, u_track, u_detection_second = matching.linear_assignment(dists, thresh=0.5) for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections_second[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append(track) '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) if not self.args.mot20: dists = matching.fuse_score(dists, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_starcks.append(unconfirmed[itracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] if track.score < self.det_thresh: continue track.activate(self.kalman_filter, self.frame_id) activated_starcks.append(track) """ Step 5: Update state""" for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() removed_stracks.append(track) # print('Ramained match {} s'.format(t4-t3)) self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked] self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.removed_stracks.extend(removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks) # get scores of lost tracks output_stracks = [track for track in self.tracked_stracks if track.is_activated] return output_stracks def joint_stracks(tlista, tlistb): exists = {} res = [] for t in tlista: exists[t.track_id] = 1 res.append(t) for t in tlistb: tid = t.track_id if not exists.get(tid, 0): exists[tid] = 1 res.append(t) return res def sub_stracks(tlista, tlistb): stracks = {} for t in tlista: stracks[t.track_id] = t for t in tlistb: tid = t.track_id if stracks.get(tid, 0): del stracks[tid] return list(stracks.values()) def remove_duplicate_stracks(stracksa, stracksb): pdist = matching.iou_distance(stracksa, stracksb) pairs = np.where(pdist < 0.15) dupa, dupb = list(), list() for p, q in zip(*pairs): timep = stracksa[p].frame_id - stracksa[p].start_frame timeq = stracksb[q].frame_id - stracksb[q].start_frame if timep > timeq: dupb.append(q) else: dupa.append(p) resa = [t for i, t in enumerate(stracksa) if not i in dupa] resb = [t for i, t in enumerate(stracksb) if not i in dupb] return resa, resb ================================================ FILE: onnxruntime/python/byte_tracker/tracker/kalman_filter.py ================================================ # vim: expandtab:ts=4:sw=4 import numpy as np import scipy.linalg """ Table for the 0.95 quantile of the chi-square distribution with N degrees of freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv function and used as Mahalanobis gating threshold. """ chi2inv95 = { 1: 3.8415, 2: 5.9915, 3: 7.8147, 4: 9.4877, 5: 11.070, 6: 12.592, 7: 14.067, 8: 15.507, 9: 16.919} class KalmanFilter(object): """ A simple Kalman filter for tracking bounding boxes in image space. The 8-dimensional state space x, y, a, h, vx, vy, va, vh contains the bounding box center position (x, y), aspect ratio a, height h, and their respective velocities. Object motion follows a constant velocity model. The bounding box location (x, y, a, h) is taken as direct observation of the state space (linear observation model). """ def __init__(self): ndim, dt = 4, 1. # Create Kalman filter model matrices. self._motion_mat = np.eye(2 * ndim, 2 * ndim) for i in range(ndim): self._motion_mat[i, ndim + i] = dt self._update_mat = np.eye(ndim, 2 * ndim) # Motion and observation uncertainty are chosen relative to the current # state estimate. These weights control the amount of uncertainty in # the model. This is a bit hacky. self._std_weight_position = 1. / 20 self._std_weight_velocity = 1. / 160 def initiate(self, measurement): """Create track from unassociated measurement. Parameters ---------- measurement : ndarray Bounding box coordinates (x, y, a, h) with center position (x, y), aspect ratio a, and height h. Returns ------- (ndarray, ndarray) Returns the mean vector (8 dimensional) and covariance matrix (8x8 dimensional) of the new track. Unobserved velocities are initialized to 0 mean. """ mean_pos = measurement mean_vel = np.zeros_like(mean_pos) mean = np.r_[mean_pos, mean_vel] std = [ 2 * self._std_weight_position * measurement[3], 2 * self._std_weight_position * measurement[3], 1e-2, 2 * self._std_weight_position * measurement[3], 10 * self._std_weight_velocity * measurement[3], 10 * self._std_weight_velocity * measurement[3], 1e-5, 10 * self._std_weight_velocity * measurement[3]] covariance = np.diag(np.square(std)) return mean, covariance def predict(self, mean, covariance): """Run Kalman filter prediction step. Parameters ---------- mean : ndarray The 8 dimensional mean vector of the object state at the previous time step. covariance : ndarray The 8x8 dimensional covariance matrix of the object state at the previous time step. Returns ------- (ndarray, ndarray) Returns the mean vector and covariance matrix of the predicted state. Unobserved velocities are initialized to 0 mean. """ std_pos = [ self._std_weight_position * mean[3], self._std_weight_position * mean[3], 1e-2, self._std_weight_position * mean[3]] std_vel = [ self._std_weight_velocity * mean[3], self._std_weight_velocity * mean[3], 1e-5, self._std_weight_velocity * mean[3]] motion_cov = np.diag(np.square(np.r_[std_pos, std_vel])) #mean = np.dot(self._motion_mat, mean) mean = np.dot(mean, self._motion_mat.T) covariance = np.linalg.multi_dot(( self._motion_mat, covariance, self._motion_mat.T)) + motion_cov return mean, covariance def project(self, mean, covariance): """Project state distribution to measurement space. Parameters ---------- mean : ndarray The state's mean vector (8 dimensional array). covariance : ndarray The state's covariance matrix (8x8 dimensional). Returns ------- (ndarray, ndarray) Returns the projected mean and covariance matrix of the given state estimate. """ std = [ self._std_weight_position * mean[3], self._std_weight_position * mean[3], 1e-1, self._std_weight_position * mean[3]] innovation_cov = np.diag(np.square(std)) mean = np.dot(self._update_mat, mean) covariance = np.linalg.multi_dot(( self._update_mat, covariance, self._update_mat.T)) return mean, covariance + innovation_cov def multi_predict(self, mean, covariance): """Run Kalman filter prediction step (Vectorized version). Parameters ---------- mean : ndarray The Nx8 dimensional mean matrix of the object states at the previous time step. covariance : ndarray The Nx8x8 dimensional covariance matrics of the object states at the previous time step. Returns ------- (ndarray, ndarray) Returns the mean vector and covariance matrix of the predicted state. Unobserved velocities are initialized to 0 mean. """ std_pos = [ self._std_weight_position * mean[:, 3], self._std_weight_position * mean[:, 3], 1e-2 * np.ones_like(mean[:, 3]), self._std_weight_position * mean[:, 3]] std_vel = [ self._std_weight_velocity * mean[:, 3], self._std_weight_velocity * mean[:, 3], 1e-5 * np.ones_like(mean[:, 3]), self._std_weight_velocity * mean[:, 3]] sqr = np.square(np.r_[std_pos, std_vel]).T motion_cov = [] for i in range(len(mean)): motion_cov.append(np.diag(sqr[i])) motion_cov = np.asarray(motion_cov) mean = np.dot(mean, self._motion_mat.T) left = np.dot(self._motion_mat, covariance).transpose((1, 0, 2)) covariance = np.dot(left, self._motion_mat.T) + motion_cov return mean, covariance def update(self, mean, covariance, measurement): """Run Kalman filter correction step. Parameters ---------- mean : ndarray The predicted state's mean vector (8 dimensional). covariance : ndarray The state's covariance matrix (8x8 dimensional). measurement : ndarray The 4 dimensional measurement vector (x, y, a, h), where (x, y) is the center position, a the aspect ratio, and h the height of the bounding box. Returns ------- (ndarray, ndarray) Returns the measurement-corrected state distribution. """ projected_mean, projected_cov = self.project(mean, covariance) chol_factor, lower = scipy.linalg.cho_factor( projected_cov, lower=True, check_finite=False) kalman_gain = scipy.linalg.cho_solve( (chol_factor, lower), np.dot(covariance, self._update_mat.T).T, check_finite=False).T innovation = measurement - projected_mean new_mean = mean + np.dot(innovation, kalman_gain.T) new_covariance = covariance - np.linalg.multi_dot(( kalman_gain, projected_cov, kalman_gain.T)) return new_mean, new_covariance def gating_distance(self, mean, covariance, measurements, only_position=False, metric='maha'): """Compute gating distance between state distribution and measurements. A suitable distance threshold can be obtained from `chi2inv95`. If `only_position` is False, the chi-square distribution has 4 degrees of freedom, otherwise 2. Parameters ---------- mean : ndarray Mean vector over the state distribution (8 dimensional). covariance : ndarray Covariance of the state distribution (8x8 dimensional). measurements : ndarray An Nx4 dimensional matrix of N measurements, each in format (x, y, a, h) where (x, y) is the bounding box center position, a the aspect ratio, and h the height. only_position : Optional[bool] If True, distance computation is done with respect to the bounding box center position only. Returns ------- ndarray Returns an array of length N, where the i-th element contains the squared Mahalanobis distance between (mean, covariance) and `measurements[i]`. """ mean, covariance = self.project(mean, covariance) if only_position: mean, covariance = mean[:2], covariance[:2, :2] measurements = measurements[:, :2] d = measurements - mean if metric == 'gaussian': return np.sum(d * d, axis=1) elif metric == 'maha': cholesky_factor = np.linalg.cholesky(covariance) z = scipy.linalg.solve_triangular( cholesky_factor, d.T, lower=True, check_finite=False, overwrite_b=True) squared_maha = np.sum(z * z, axis=0) return squared_maha else: raise ValueError('invalid distance metric') ================================================ FILE: onnxruntime/python/byte_tracker/tracker/matching.py ================================================ import cv2 import numpy as np import scipy import lap from scipy.spatial.distance import cdist from cython_bbox import bbox_overlaps as bbox_ious from byte_tracker.tracker import kalman_filter import time def merge_matches(m1, m2, shape): O,P,Q = shape m1 = np.asarray(m1) m2 = np.asarray(m2) M1 = scipy.sparse.coo_matrix((np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P)) M2 = scipy.sparse.coo_matrix((np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q)) mask = M1*M2 match = mask.nonzero() match = list(zip(match[0], match[1])) unmatched_O = tuple(set(range(O)) - set([i for i, j in match])) unmatched_Q = tuple(set(range(Q)) - set([j for i, j in match])) return match, unmatched_O, unmatched_Q def _indices_to_matches(cost_matrix, indices, thresh): matched_cost = cost_matrix[tuple(zip(*indices))] matched_mask = (matched_cost <= thresh) matches = indices[matched_mask] unmatched_a = tuple(set(range(cost_matrix.shape[0])) - set(matches[:, 0])) unmatched_b = tuple(set(range(cost_matrix.shape[1])) - set(matches[:, 1])) return matches, unmatched_a, unmatched_b def linear_assignment(cost_matrix, thresh): if cost_matrix.size == 0: return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1])) matches, unmatched_a, unmatched_b = [], [], [] cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh) for ix, mx in enumerate(x): if mx >= 0: matches.append([ix, mx]) unmatched_a = np.where(x < 0)[0] unmatched_b = np.where(y < 0)[0] matches = np.asarray(matches) return matches, unmatched_a, unmatched_b def ious(atlbrs, btlbrs): """ Compute cost based on IoU :type atlbrs: list[tlbr] | np.ndarray :type atlbrs: list[tlbr] | np.ndarray :rtype ious np.ndarray """ ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float) if ious.size == 0: return ious ious = bbox_ious( np.ascontiguousarray(atlbrs, dtype=np.float), np.ascontiguousarray(btlbrs, dtype=np.float) ) return ious def iou_distance(atracks, btracks): """ Compute cost based on IoU :type atracks: list[STrack] :type btracks: list[STrack] :rtype cost_matrix np.ndarray """ if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)): atlbrs = atracks btlbrs = btracks else: atlbrs = [track.tlbr for track in atracks] btlbrs = [track.tlbr for track in btracks] _ious = ious(atlbrs, btlbrs) cost_matrix = 1 - _ious return cost_matrix def v_iou_distance(atracks, btracks): """ Compute cost based on IoU :type atracks: list[STrack] :type btracks: list[STrack] :rtype cost_matrix np.ndarray """ if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)): atlbrs = atracks btlbrs = btracks else: atlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in atracks] btlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in btracks] _ious = ious(atlbrs, btlbrs) cost_matrix = 1 - _ious return cost_matrix def embedding_distance(tracks, detections, metric='cosine'): """ :param tracks: list[STrack] :param detections: list[BaseTrack] :param metric: :return: cost_matrix np.ndarray """ cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float) if cost_matrix.size == 0: return cost_matrix det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float) #for i, track in enumerate(tracks): #cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric)) track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float) cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric)) # Nomalized features return cost_matrix def gate_cost_matrix(kf, cost_matrix, tracks, detections, only_position=False): if cost_matrix.size == 0: return cost_matrix gating_dim = 2 if only_position else 4 gating_threshold = kalman_filter.chi2inv95[gating_dim] measurements = np.asarray([det.to_xyah() for det in detections]) for row, track in enumerate(tracks): gating_distance = kf.gating_distance( track.mean, track.covariance, measurements, only_position) cost_matrix[row, gating_distance > gating_threshold] = np.inf return cost_matrix def fuse_motion(kf, cost_matrix, tracks, detections, only_position=False, lambda_=0.98): if cost_matrix.size == 0: return cost_matrix gating_dim = 2 if only_position else 4 gating_threshold = kalman_filter.chi2inv95[gating_dim] measurements = np.asarray([det.to_xyah() for det in detections]) for row, track in enumerate(tracks): gating_distance = kf.gating_distance( track.mean, track.covariance, measurements, only_position, metric='maha') cost_matrix[row, gating_distance > gating_threshold] = np.inf cost_matrix[row] = lambda_ * cost_matrix[row] + (1 - lambda_) * gating_distance return cost_matrix def fuse_iou(cost_matrix, tracks, detections): if cost_matrix.size == 0: return cost_matrix reid_sim = 1 - cost_matrix iou_dist = iou_distance(tracks, detections) iou_sim = 1 - iou_dist fuse_sim = reid_sim * (1 + iou_sim) / 2 det_scores = np.array([det.score for det in detections]) det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0) #fuse_sim = fuse_sim * (1 + det_scores) / 2 fuse_cost = 1 - fuse_sim return fuse_cost def fuse_score(cost_matrix, detections): if cost_matrix.size == 0: return cost_matrix iou_sim = 1 - cost_matrix det_scores = np.array([det.score for det in detections]) det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0) fuse_sim = iou_sim * det_scores fuse_cost = 1 - fuse_sim return fuse_cost ================================================ FILE: onnxruntime/python/byte_tracker/utils/yolox_utils.py ================================================ #!/usr/bin/env python # -*- coding: utf-8 -*- import cv2 import numpy as np def nms(boxes, scores, nms_thr): """Single class NMS implemented in Numpy.""" x1 = boxes[:, 0] y1 = boxes[:, 1] x2 = boxes[:, 2] y2 = boxes[:, 3] areas = (x2 - x1 + 1) * (y2 - y1 + 1) order = scores.argsort()[::-1] keep = [] while order.size > 0: i = order[0] keep.append(i) xx1 = np.maximum(x1[i], x1[order[1:]]) yy1 = np.maximum(y1[i], y1[order[1:]]) xx2 = np.minimum(x2[i], x2[order[1:]]) yy2 = np.minimum(y2[i], y2[order[1:]]) w = np.maximum(0.0, xx2 - xx1 + 1) h = np.maximum(0.0, yy2 - yy1 + 1) inter = w * h ovr = inter / (areas[i] + areas[order[1:]] - inter) inds = np.where(ovr <= nms_thr)[0] order = order[inds + 1] return keep def multiclass_nms(boxes, scores, nms_thr, score_thr): """Multiclass NMS implemented in Numpy""" final_dets = [] num_classes = scores.shape[1] for cls_ind in range(num_classes): cls_scores = scores[:, cls_ind] valid_score_mask = cls_scores > score_thr if valid_score_mask.sum() == 0: continue else: valid_scores = cls_scores[valid_score_mask] valid_boxes = boxes[valid_score_mask] keep = nms(valid_boxes, valid_scores, nms_thr) if len(keep) > 0: cls_inds = np.ones((len(keep), 1)) * cls_ind dets = np.concatenate( [valid_boxes[keep], valid_scores[keep, None], cls_inds], 1) final_dets.append(dets) if len(final_dets) == 0: return None return np.concatenate(final_dets, 0) def pre_process(image, input_size, mean, std, swap=(2, 0, 1)): if len(image.shape) == 3: padded_img = np.ones((input_size[0], input_size[1], 3)) * 114.0 else: padded_img = np.ones(input_size) * 114.0 img = np.array(image) r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1]) resized_img = cv2.resize( img, (int(img.shape[1] * r), int(img.shape[0] * r)), interpolation=cv2.INTER_LINEAR, ).astype(np.float32) padded_img[:int(img.shape[0] * r), :int(img.shape[1] * r)] = resized_img padded_img = padded_img[:, :, ::-1] padded_img /= 255.0 if mean is not None: padded_img -= mean if std is not None: padded_img /= std padded_img = padded_img.transpose(swap) padded_img = np.ascontiguousarray(padded_img, dtype=np.float32) return padded_img, r def post_process(outputs, img_size, p6=False): grids = [] expanded_strides = [] if not p6: strides = [8, 16, 32] else: strides = [8, 16, 32, 64] hsizes = [img_size[0] // stride for stride in strides] wsizes = [img_size[1] // stride for stride in strides] for hsize, wsize, stride in zip(hsizes, wsizes, strides): xv, yv = np.meshgrid(np.arange(wsize), np.arange(hsize)) grid = np.stack((xv, yv), 2).reshape(1, -1, 2) grids.append(grid) shape = grid.shape[:2] expanded_strides.append(np.full((*shape, 1), stride)) grids = np.concatenate(grids, 1) expanded_strides = np.concatenate(expanded_strides, 1) outputs[..., :2] = (outputs[..., :2] + grids) * expanded_strides outputs[..., 2:4] = np.exp(outputs[..., 2:4]) * expanded_strides return outputs ================================================ FILE: onnxruntime/python/main.py ================================================ import os import copy import time import argparse import cv2 from loguru import logger from byte_tracker.byte_tracker_onnx import ByteTrackerONNX def get_args(): parser = argparse.ArgumentParser() parser.add_argument( '--use_debug_window', action='store_true', ) parser.add_argument( '--model', type=str, default='byte_tracker/model/bytetrack_s.onnx', ) parser.add_argument( '--video', type=str, default='sample.mp4', ) parser.add_argument( '--output_dir', type=str, default='output', ) parser.add_argument( '--score_th', type=float, default=0.1, ) parser.add_argument( '--nms_th', type=float, default=0.7, ) parser.add_argument( '--input_shape', type=str, default='608,1088', ) parser.add_argument( '--with_p6', action='store_true', help='Whether your model uses p6 in FPN/PAN.', ) # tracking args parser.add_argument( '--track_thresh', type=float, default=0.5, help='tracking confidence threshold', ) parser.add_argument( '--track_buffer', type=int, default=30, help='the frames for keep lost tracks', ) parser.add_argument( '--match_thresh', type=float, default=0.8, help='matching threshold for tracking', ) parser.add_argument( '--min-box-area', type=float, default=10, help='filter out tiny boxes', ) parser.add_argument( '--mot20', dest='mot20', default=False, action='store_true', help='test mot20.', ) args = parser.parse_args() return args def main(): args = get_args() use_debug_window = args.use_debug_window video_path = args.video output_dir = args.output_dir byte_tracker = ByteTrackerONNX(args) cap = cv2.VideoCapture(video_path) width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) fps = cap.get(cv2.CAP_PROP_FPS) frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT) if not use_debug_window: os.makedirs(output_dir, exist_ok=True) save_path = os.path.join(output_dir, video_path.split("/")[-1]) logger.info(f"video save path is {save_path}") video_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (int(width), int(height)), ) frame_id = 1 while True: start_time = time.time() ret, frame = cap.read() if not ret: break debug_image = copy.deepcopy(frame) _, bboxes, ids, scores = byte_tracker.inference(frame) elapsed_time = time.time() - start_time debug_image = draw_tracking_info( debug_image, bboxes, ids, scores, frame_id, elapsed_time, ) if use_debug_window: key = cv2.waitKey(1) if key == 27: # ESC break cv2.namedWindow('ByteTrack ONNX Sample', 0) cv2.imshow('ByteTrack ONNX Sample', debug_image) else: video_writer.write(debug_image) logger.info( 'frame {}/{} ({:.2f} ms)'.format(frame_id, int(frame_count), elapsed_time * 1000), ) frame_id += 1 if use_debug_window: cap.release() cv2.destroyAllWindows() def get_id_color(index): temp_index = abs(int(index)) * 3 color = ((37 * temp_index) % 255, (17 * temp_index) % 255, (29 * temp_index) % 255) return color def draw_tracking_info( image, tlwhs, ids, scores, frame_id=0, elapsed_time=0., ): text_scale = 1.5 text_thickness = 2 line_thickness = 2 text = 'frame: %d ' % (frame_id) text += 'elapsed time: %.0fms ' % (elapsed_time * 1000) text += 'num: %d' % (len(tlwhs)) cv2.putText( image, text, (0, int(15 * text_scale)), cv2.FONT_HERSHEY_PLAIN, 2, (0, 255, 0), thickness=text_thickness, ) for index, tlwh in enumerate(tlwhs): x1, y1 = int(tlwh[0]), int(tlwh[1]) x2, y2 = x1 + int(tlwh[2]), y1 + int(tlwh[3]) color = get_id_color(ids[index]) cv2.rectangle(image, (x1, y1), (x2, y2), color, line_thickness) # text = str(ids[index]) + ':%.2f' % (scores[index]) text = str(ids[index]) cv2.putText(image, text, (x1, y1 - 5), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 0, 0), text_thickness + 3) cv2.putText(image, text, (x1, y1 - 5), cv2.FONT_HERSHEY_PLAIN, text_scale, (255, 255, 255), text_thickness) return image if __name__ == '__main__': main() ================================================ FILE: opencv/README.md ================================================ # ByteTrack-opencv #### 安装 首先确保你的机器安装了opencv4.5以上版本的,接下来安装 `eigen` ```shell unzip eigen-3.3.9.zip cd eigen-3.3.9 mkdir build cd build cmake .. sudo make install ``` #### 编译C++程序 ``` mkdir build cd build && rm -rf * cmake .. && make ``` #### 运行 python版本: ``` python main.py ``` C++版本: ``` ./bytetrack-opencv-cpp /home/ByteTrack/sample.mp4 ``` ================================================ FILE: opencv/cpp/CMakeLists.txt ================================================ cmake_minimum_required(VERSION 3.5) project(bytetrack-opencv-cpp) set(CMAKE_CXX_STANDARD 11) find_package(OpenCV REQUIRED) include_directories(include) include_directories(/usr/local/include/eigen3) include_directories(${OpenCV_INCLUDE_DIRS}) file(GLOB My_Source_Files src/*.cpp) add_executable(bytetrack-opencv-cpp ${My_Source_Files}) target_link_libraries(bytetrack-opencv-cpp ${OpenCV_LIBS}) ================================================ FILE: opencv/cpp/include/BYTETracker.h ================================================ #pragma once #include "STrack.h" struct Object { cv::Rect_ rect; int label; float prob; }; class BYTETracker { public: BYTETracker(int frame_rate = 30, int track_buffer = 30); ~BYTETracker(); vector update(const vector& objects); Scalar get_color(int idx); private: vector joint_stracks(vector &tlista, vector &tlistb); vector joint_stracks(vector &tlista, vector &tlistb); vector sub_stracks(vector &tlista, vector &tlistb); void remove_duplicate_stracks(vector &resa, vector &resb, vector &stracksa, vector &stracksb); void linear_assignment(vector > &cost_matrix, int cost_matrix_size, int cost_matrix_size_size, float thresh, vector > &matches, vector &unmatched_a, vector &unmatched_b); vector > iou_distance(vector &atracks, vector &btracks, int &dist_size, int &dist_size_size); vector > iou_distance(vector &atracks, vector &btracks); vector > ious(vector > &atlbrs, vector > &btlbrs); double lapjv(const vector > &cost, vector &rowsol, vector &colsol, bool extend_cost = false, float cost_limit = LONG_MAX, bool return_cost = true); private: float track_thresh; float high_thresh; float match_thresh; int frame_id; int max_time_lost; vector tracked_stracks; vector lost_stracks; vector removed_stracks; byte_kalman::KalmanFilter kalman_filter; }; ================================================ FILE: opencv/cpp/include/STrack.h ================================================ #pragma once #include #include "kalmanFilter.h" using namespace cv; using namespace std; enum TrackState { New = 0, Tracked, Lost, Removed }; class STrack { public: STrack(vector tlwh_, float score); ~STrack(); vector static tlbr_to_tlwh(vector &tlbr); void static multi_predict(vector &stracks, byte_kalman::KalmanFilter &kalman_filter); void static_tlwh(); void static_tlbr(); vector tlwh_to_xyah(vector tlwh_tmp); vector to_xyah(); void mark_lost(); void mark_removed(); int next_id(); int end_frame(); void activate(byte_kalman::KalmanFilter &kalman_filter, int frame_id); void re_activate(STrack &new_track, int frame_id, bool new_id = false); void update(STrack &new_track, int frame_id); public: bool is_activated; int track_id; int state; vector _tlwh; vector tlwh; vector tlbr; int frame_id; int tracklet_len; int start_frame; KAL_MEAN mean; KAL_COVA covariance; float score; private: byte_kalman::KalmanFilter kalman_filter; }; ================================================ FILE: opencv/cpp/include/dataType.h ================================================ #pragma once #include #include #include #include typedef Eigen::Matrix DETECTBOX; typedef Eigen::Matrix DETECTBOXSS; typedef Eigen::Matrix FEATURE; typedef Eigen::Matrix FEATURESS; //typedef std::vector FEATURESS; //Kalmanfilter //typedef Eigen::Matrix KAL_FILTER; typedef Eigen::Matrix KAL_MEAN; typedef Eigen::Matrix KAL_COVA; typedef Eigen::Matrix KAL_HMEAN; typedef Eigen::Matrix KAL_HCOVA; using KAL_DATA = std::pair; using KAL_HDATA = std::pair; //main using RESULT_DATA = std::pair; //tracker: using TRACKER_DATA = std::pair; using MATCH_DATA = std::pair; typedef struct t { std::vector matches; std::vector unmatched_tracks; std::vector unmatched_detections; }TRACHER_MATCHD; //linear_assignment: typedef Eigen::Matrix DYNAMICM; ================================================ FILE: opencv/cpp/include/kalmanFilter.h ================================================ #pragma once #include "dataType.h" namespace byte_kalman { class KalmanFilter { public: static const double chi2inv95[10]; KalmanFilter(); KAL_DATA initiate(const DETECTBOX& measurement); void predict(KAL_MEAN& mean, KAL_COVA& covariance); KAL_HDATA project(const KAL_MEAN& mean, const KAL_COVA& covariance); KAL_DATA update(const KAL_MEAN& mean, const KAL_COVA& covariance, const DETECTBOX& measurement); Eigen::Matrix gating_distance( const KAL_MEAN& mean, const KAL_COVA& covariance, const std::vector& measurements, bool only_position = false); private: Eigen::Matrix _motion_mat; Eigen::Matrix _update_mat; float _std_weight_position; float _std_weight_velocity; }; } ================================================ FILE: opencv/cpp/include/lapjv.h ================================================ #ifndef LAPJV_H #define LAPJV_H #define LARGE 1000000 #if !defined TRUE #define TRUE 1 #endif #if !defined FALSE #define FALSE 0 #endif #define NEW(x, t, n) if ((x = (t *)malloc(sizeof(t) * (n))) == 0) { return -1; } #define FREE(x) if (x != 0) { free(x); x = 0; } #define SWAP_INDICES(a, b) { int_t _temp_index = a; a = b; b = _temp_index; } #if 0 #include #define ASSERT(cond) assert(cond) #define PRINTF(fmt, ...) printf(fmt, ##__VA_ARGS__) #define PRINT_COST_ARRAY(a, n) \ while (1) { \ printf(#a" = ["); \ if ((n) > 0) { \ printf("%f", (a)[0]); \ for (uint_t j = 1; j < n; j++) { \ printf(", %f", (a)[j]); \ } \ } \ printf("]\n"); \ break; \ } #define PRINT_INDEX_ARRAY(a, n) \ while (1) { \ printf(#a" = ["); \ if ((n) > 0) { \ printf("%d", (a)[0]); \ for (uint_t j = 1; j < n; j++) { \ printf(", %d", (a)[j]); \ } \ } \ printf("]\n"); \ break; \ } #else #define ASSERT(cond) #define PRINTF(fmt, ...) #define PRINT_COST_ARRAY(a, n) #define PRINT_INDEX_ARRAY(a, n) #endif typedef signed int int_t; typedef unsigned int uint_t; typedef double cost_t; typedef char boolean; typedef enum fp_t { FP_1 = 1, FP_2 = 2, FP_DYNAMIC = 3 } fp_t; extern int_t lapjv_internal( const uint_t n, cost_t *cost[], int_t *x, int_t *y); #endif // LAPJV_H ================================================ FILE: opencv/cpp/src/BYTETracker.cpp ================================================ #include "BYTETracker.h" #include BYTETracker::BYTETracker(int frame_rate, int track_buffer) { track_thresh = 0.5; high_thresh = 0.6; match_thresh = 0.8; frame_id = 0; max_time_lost = int(frame_rate / 30.0 * track_buffer); cout << "Init ByteTrack!" << endl; } BYTETracker::~BYTETracker() { } vector BYTETracker::update(const vector& objects) { ////////////////// Step 1: Get detections ////////////////// this->frame_id++; vector activated_stracks; vector refind_stracks; vector removed_stracks; vector lost_stracks; vector detections; vector detections_low; vector detections_cp; vector tracked_stracks_swap; vector resa, resb; vector output_stracks; vector unconfirmed; vector tracked_stracks; vector strack_pool; vector r_tracked_stracks; if (objects.size() > 0) { for (int i = 0; i < objects.size(); i++) { vector tlbr_; tlbr_.resize(4); tlbr_[0] = objects[i].rect.x; tlbr_[1] = objects[i].rect.y; tlbr_[2] = objects[i].rect.x + objects[i].rect.width; tlbr_[3] = objects[i].rect.y + objects[i].rect.height; float score = objects[i].prob; STrack strack(STrack::tlbr_to_tlwh(tlbr_), score); if (score >= track_thresh) { detections.push_back(strack); } else { detections_low.push_back(strack); } } } // Add newly detected tracklets to tracked_stracks for (int i = 0; i < this->tracked_stracks.size(); i++) { if (!this->tracked_stracks[i].is_activated) unconfirmed.push_back(&this->tracked_stracks[i]); else tracked_stracks.push_back(&this->tracked_stracks[i]); } ////////////////// Step 2: First association, with IoU ////////////////// strack_pool = joint_stracks(tracked_stracks, this->lost_stracks); STrack::multi_predict(strack_pool, this->kalman_filter); vector > dists; int dist_size = 0, dist_size_size = 0; dists = iou_distance(strack_pool, detections, dist_size, dist_size_size); vector > matches; vector u_track, u_detection; linear_assignment(dists, dist_size, dist_size_size, match_thresh, matches, u_track, u_detection); for (int i = 0; i < matches.size(); i++) { STrack *track = strack_pool[matches[i][0]]; STrack *det = &detections[matches[i][1]]; if (track->state == TrackState::Tracked) { track->update(*det, this->frame_id); activated_stracks.push_back(*track); } else { track->re_activate(*det, this->frame_id, false); refind_stracks.push_back(*track); } } ////////////////// Step 3: Second association, using low score dets ////////////////// for (int i = 0; i < u_detection.size(); i++) { detections_cp.push_back(detections[u_detection[i]]); } detections.clear(); detections.assign(detections_low.begin(), detections_low.end()); for (int i = 0; i < u_track.size(); i++) { if (strack_pool[u_track[i]]->state == TrackState::Tracked) { r_tracked_stracks.push_back(strack_pool[u_track[i]]); } } dists.clear(); dists = iou_distance(r_tracked_stracks, detections, dist_size, dist_size_size); matches.clear(); u_track.clear(); u_detection.clear(); linear_assignment(dists, dist_size, dist_size_size, 0.5, matches, u_track, u_detection); for (int i = 0; i < matches.size(); i++) { STrack *track = r_tracked_stracks[matches[i][0]]; STrack *det = &detections[matches[i][1]]; if (track->state == TrackState::Tracked) { track->update(*det, this->frame_id); activated_stracks.push_back(*track); } else { track->re_activate(*det, this->frame_id, false); refind_stracks.push_back(*track); } } for (int i = 0; i < u_track.size(); i++) { STrack *track = r_tracked_stracks[u_track[i]]; if (track->state != TrackState::Lost) { track->mark_lost(); lost_stracks.push_back(*track); } } // Deal with unconfirmed tracks, usually tracks with only one beginning frame detections.clear(); detections.assign(detections_cp.begin(), detections_cp.end()); dists.clear(); dists = iou_distance(unconfirmed, detections, dist_size, dist_size_size); matches.clear(); vector u_unconfirmed; u_detection.clear(); linear_assignment(dists, dist_size, dist_size_size, 0.7, matches, u_unconfirmed, u_detection); for (int i = 0; i < matches.size(); i++) { unconfirmed[matches[i][0]]->update(detections[matches[i][1]], this->frame_id); activated_stracks.push_back(*unconfirmed[matches[i][0]]); } for (int i = 0; i < u_unconfirmed.size(); i++) { STrack *track = unconfirmed[u_unconfirmed[i]]; track->mark_removed(); removed_stracks.push_back(*track); } ////////////////// Step 4: Init new stracks ////////////////// for (int i = 0; i < u_detection.size(); i++) { STrack *track = &detections[u_detection[i]]; if (track->score < this->high_thresh) continue; track->activate(this->kalman_filter, this->frame_id); activated_stracks.push_back(*track); } ////////////////// Step 5: Update state ////////////////// for (int i = 0; i < this->lost_stracks.size(); i++) { if (this->frame_id - this->lost_stracks[i].end_frame() > this->max_time_lost) { this->lost_stracks[i].mark_removed(); removed_stracks.push_back(this->lost_stracks[i]); } } for (int i = 0; i < this->tracked_stracks.size(); i++) { if (this->tracked_stracks[i].state == TrackState::Tracked) { tracked_stracks_swap.push_back(this->tracked_stracks[i]); } } this->tracked_stracks.clear(); this->tracked_stracks.assign(tracked_stracks_swap.begin(), tracked_stracks_swap.end()); this->tracked_stracks = joint_stracks(this->tracked_stracks, activated_stracks); this->tracked_stracks = joint_stracks(this->tracked_stracks, refind_stracks); //std::cout << activated_stracks.size() << std::endl; this->lost_stracks = sub_stracks(this->lost_stracks, this->tracked_stracks); for (int i = 0; i < lost_stracks.size(); i++) { this->lost_stracks.push_back(lost_stracks[i]); } this->lost_stracks = sub_stracks(this->lost_stracks, this->removed_stracks); for (int i = 0; i < removed_stracks.size(); i++) { this->removed_stracks.push_back(removed_stracks[i]); } remove_duplicate_stracks(resa, resb, this->tracked_stracks, this->lost_stracks); this->tracked_stracks.clear(); this->tracked_stracks.assign(resa.begin(), resa.end()); this->lost_stracks.clear(); this->lost_stracks.assign(resb.begin(), resb.end()); for (int i = 0; i < this->tracked_stracks.size(); i++) { if (this->tracked_stracks[i].is_activated) { output_stracks.push_back(this->tracked_stracks[i]); } } return output_stracks; } ================================================ FILE: opencv/cpp/src/STrack.cpp ================================================ #include "STrack.h" STrack::STrack(vector tlwh_, float score) { _tlwh.resize(4); _tlwh.assign(tlwh_.begin(), tlwh_.end()); is_activated = false; track_id = 0; state = TrackState::New; tlwh.resize(4); tlbr.resize(4); static_tlwh(); static_tlbr(); frame_id = 0; tracklet_len = 0; this->score = score; start_frame = 0; } STrack::~STrack() { } void STrack::activate(byte_kalman::KalmanFilter &kalman_filter, int frame_id) { this->kalman_filter = kalman_filter; this->track_id = this->next_id(); vector _tlwh_tmp(4); _tlwh_tmp[0] = this->_tlwh[0]; _tlwh_tmp[1] = this->_tlwh[1]; _tlwh_tmp[2] = this->_tlwh[2]; _tlwh_tmp[3] = this->_tlwh[3]; vector xyah = tlwh_to_xyah(_tlwh_tmp); DETECTBOX xyah_box; xyah_box[0] = xyah[0]; xyah_box[1] = xyah[1]; xyah_box[2] = xyah[2]; xyah_box[3] = xyah[3]; auto mc = this->kalman_filter.initiate(xyah_box); this->mean = mc.first; this->covariance = mc.second; static_tlwh(); static_tlbr(); this->tracklet_len = 0; this->state = TrackState::Tracked; if (frame_id == 1) { this->is_activated = true; } //this->is_activated = true; this->frame_id = frame_id; this->start_frame = frame_id; } void STrack::re_activate(STrack &new_track, int frame_id, bool new_id) { vector xyah = tlwh_to_xyah(new_track.tlwh); DETECTBOX xyah_box; xyah_box[0] = xyah[0]; xyah_box[1] = xyah[1]; xyah_box[2] = xyah[2]; xyah_box[3] = xyah[3]; auto mc = this->kalman_filter.update(this->mean, this->covariance, xyah_box); this->mean = mc.first; this->covariance = mc.second; static_tlwh(); static_tlbr(); this->tracklet_len = 0; this->state = TrackState::Tracked; this->is_activated = true; this->frame_id = frame_id; this->score = new_track.score; if (new_id) this->track_id = next_id(); } void STrack::update(STrack &new_track, int frame_id) { this->frame_id = frame_id; this->tracklet_len++; vector xyah = tlwh_to_xyah(new_track.tlwh); DETECTBOX xyah_box; xyah_box[0] = xyah[0]; xyah_box[1] = xyah[1]; xyah_box[2] = xyah[2]; xyah_box[3] = xyah[3]; auto mc = this->kalman_filter.update(this->mean, this->covariance, xyah_box); this->mean = mc.first; this->covariance = mc.second; static_tlwh(); static_tlbr(); this->state = TrackState::Tracked; this->is_activated = true; this->score = new_track.score; } void STrack::static_tlwh() { if (this->state == TrackState::New) { tlwh[0] = _tlwh[0]; tlwh[1] = _tlwh[1]; tlwh[2] = _tlwh[2]; tlwh[3] = _tlwh[3]; return; } tlwh[0] = mean[0]; tlwh[1] = mean[1]; tlwh[2] = mean[2]; tlwh[3] = mean[3]; tlwh[2] *= tlwh[3]; tlwh[0] -= tlwh[2] / 2; tlwh[1] -= tlwh[3] / 2; } void STrack::static_tlbr() { tlbr.clear(); tlbr.assign(tlwh.begin(), tlwh.end()); tlbr[2] += tlbr[0]; tlbr[3] += tlbr[1]; } vector STrack::tlwh_to_xyah(vector tlwh_tmp) { vector tlwh_output = tlwh_tmp; tlwh_output[0] += tlwh_output[2] / 2; tlwh_output[1] += tlwh_output[3] / 2; tlwh_output[2] /= tlwh_output[3]; return tlwh_output; } vector STrack::to_xyah() { return tlwh_to_xyah(tlwh); } vector STrack::tlbr_to_tlwh(vector &tlbr) { tlbr[2] -= tlbr[0]; tlbr[3] -= tlbr[1]; return tlbr; } void STrack::mark_lost() { state = TrackState::Lost; } void STrack::mark_removed() { state = TrackState::Removed; } int STrack::next_id() { static int _count = 0; _count++; return _count; } int STrack::end_frame() { return this->frame_id; } void STrack::multi_predict(vector &stracks, byte_kalman::KalmanFilter &kalman_filter) { for (int i = 0; i < stracks.size(); i++) { if (stracks[i]->state != TrackState::Tracked) { stracks[i]->mean[7] = 0; } kalman_filter.predict(stracks[i]->mean, stracks[i]->covariance); } } ================================================ FILE: opencv/cpp/src/kalmanFilter.cpp ================================================ #include "kalmanFilter.h" #include namespace byte_kalman { const double KalmanFilter::chi2inv95[10] = { 0, 3.8415, 5.9915, 7.8147, 9.4877, 11.070, 12.592, 14.067, 15.507, 16.919 }; KalmanFilter::KalmanFilter() { int ndim = 4; double dt = 1.; _motion_mat = Eigen::MatrixXf::Identity(8, 8); for (int i = 0; i < ndim; i++) { _motion_mat(i, ndim + i) = dt; } _update_mat = Eigen::MatrixXf::Identity(4, 8); this->_std_weight_position = 1. / 20; this->_std_weight_velocity = 1. / 160; } KAL_DATA KalmanFilter::initiate(const DETECTBOX &measurement) { DETECTBOX mean_pos = measurement; DETECTBOX mean_vel; for (int i = 0; i < 4; i++) mean_vel(i) = 0; KAL_MEAN mean; for (int i = 0; i < 8; i++) { if (i < 4) mean(i) = mean_pos(i); else mean(i) = mean_vel(i - 4); } KAL_MEAN std; std(0) = 2 * _std_weight_position * measurement[3]; std(1) = 2 * _std_weight_position * measurement[3]; std(2) = 1e-2; std(3) = 2 * _std_weight_position * measurement[3]; std(4) = 10 * _std_weight_velocity * measurement[3]; std(5) = 10 * _std_weight_velocity * measurement[3]; std(6) = 1e-5; std(7) = 10 * _std_weight_velocity * measurement[3]; KAL_MEAN tmp = std.array().square(); KAL_COVA var = tmp.asDiagonal(); return std::make_pair(mean, var); } void KalmanFilter::predict(KAL_MEAN &mean, KAL_COVA &covariance) { //revise the data; DETECTBOX std_pos; std_pos << _std_weight_position * mean(3), _std_weight_position * mean(3), 1e-2, _std_weight_position * mean(3); DETECTBOX std_vel; std_vel << _std_weight_velocity * mean(3), _std_weight_velocity * mean(3), 1e-5, _std_weight_velocity * mean(3); KAL_MEAN tmp; tmp.block<1, 4>(0, 0) = std_pos; tmp.block<1, 4>(0, 4) = std_vel; tmp = tmp.array().square(); KAL_COVA motion_cov = tmp.asDiagonal(); KAL_MEAN mean1 = this->_motion_mat * mean.transpose(); KAL_COVA covariance1 = this->_motion_mat * covariance *(_motion_mat.transpose()); covariance1 += motion_cov; mean = mean1; covariance = covariance1; } KAL_HDATA KalmanFilter::project(const KAL_MEAN &mean, const KAL_COVA &covariance) { DETECTBOX std; std << _std_weight_position * mean(3), _std_weight_position * mean(3), 1e-1, _std_weight_position * mean(3); KAL_HMEAN mean1 = _update_mat * mean.transpose(); KAL_HCOVA covariance1 = _update_mat * covariance * (_update_mat.transpose()); Eigen::Matrix diag = std.asDiagonal(); diag = diag.array().square().matrix(); covariance1 += diag; // covariance1.diagonal() << diag; return std::make_pair(mean1, covariance1); } KAL_DATA KalmanFilter::update( const KAL_MEAN &mean, const KAL_COVA &covariance, const DETECTBOX &measurement) { KAL_HDATA pa = project(mean, covariance); KAL_HMEAN projected_mean = pa.first; KAL_HCOVA projected_cov = pa.second; //chol_factor, lower = //scipy.linalg.cho_factor(projected_cov, lower=True, check_finite=False) //kalmain_gain = //scipy.linalg.cho_solve((cho_factor, lower), //np.dot(covariance, self._upadte_mat.T).T, //check_finite=False).T Eigen::Matrix B = (covariance * (_update_mat.transpose())).transpose(); Eigen::Matrix kalman_gain = (projected_cov.llt().solve(B)).transpose(); // eg.8x4 Eigen::Matrix innovation = measurement - projected_mean; //eg.1x4 auto tmp = innovation * (kalman_gain.transpose()); KAL_MEAN new_mean = (mean.array() + tmp.array()).matrix(); KAL_COVA new_covariance = covariance - kalman_gain * projected_cov*(kalman_gain.transpose()); return std::make_pair(new_mean, new_covariance); } Eigen::Matrix KalmanFilter::gating_distance( const KAL_MEAN &mean, const KAL_COVA &covariance, const std::vector &measurements, bool only_position) { KAL_HDATA pa = this->project(mean, covariance); if (only_position) { printf("not implement!"); exit(0); } KAL_HMEAN mean1 = pa.first; KAL_HCOVA covariance1 = pa.second; // Eigen::Matrix d(size, 4); DETECTBOXSS d(measurements.size(), 4); int pos = 0; for (DETECTBOX box : measurements) { d.row(pos++) = box - mean1; } Eigen::Matrix factor = covariance1.llt().matrixL(); Eigen::Matrix z = factor.triangularView().solve(d).transpose(); auto zz = ((z.array())*(z.array())).matrix(); auto square_maha = zz.colwise().sum(); return square_maha; } } ================================================ FILE: opencv/cpp/src/lapjv.cpp ================================================ #include #include #include #include "lapjv.h" /** Column-reduction and reduction transfer for a dense cost matrix. */ int_t _ccrrt_dense(const uint_t n, cost_t *cost[], int_t *free_rows, int_t *x, int_t *y, cost_t *v) { int_t n_free_rows; boolean *unique; for (uint_t i = 0; i < n; i++) { x[i] = -1; v[i] = LARGE; y[i] = 0; } for (uint_t i = 0; i < n; i++) { for (uint_t j = 0; j < n; j++) { const cost_t c = cost[i][j]; if (c < v[j]) { v[j] = c; y[j] = i; } PRINTF("i=%d, j=%d, c[i,j]=%f, v[j]=%f y[j]=%d\n", i, j, c, v[j], y[j]); } } PRINT_COST_ARRAY(v, n); PRINT_INDEX_ARRAY(y, n); NEW(unique, boolean, n); memset(unique, TRUE, n); { int_t j = n; do { j--; const int_t i = y[j]; if (x[i] < 0) { x[i] = j; } else { unique[i] = FALSE; y[j] = -1; } } while (j > 0); } n_free_rows = 0; for (uint_t i = 0; i < n; i++) { if (x[i] < 0) { free_rows[n_free_rows++] = i; } else if (unique[i]) { const int_t j = x[i]; cost_t min = LARGE; for (uint_t j2 = 0; j2 < n; j2++) { if (j2 == (uint_t)j) { continue; } const cost_t c = cost[i][j2] - v[j2]; if (c < min) { min = c; } } PRINTF("v[%d] = %f - %f\n", j, v[j], min); v[j] -= min; } } FREE(unique); return n_free_rows; } /** Augmenting row reduction for a dense cost matrix. */ int_t _carr_dense( const uint_t n, cost_t *cost[], const uint_t n_free_rows, int_t *free_rows, int_t *x, int_t *y, cost_t *v) { uint_t current = 0; int_t new_free_rows = 0; uint_t rr_cnt = 0; PRINT_INDEX_ARRAY(x, n); PRINT_INDEX_ARRAY(y, n); PRINT_COST_ARRAY(v, n); PRINT_INDEX_ARRAY(free_rows, n_free_rows); while (current < n_free_rows) { int_t i0; int_t j1, j2; cost_t v1, v2, v1_new; boolean v1_lowers; rr_cnt++; PRINTF("current = %d rr_cnt = %d\n", current, rr_cnt); const int_t free_i = free_rows[current++]; j1 = 0; v1 = cost[free_i][0] - v[0]; j2 = -1; v2 = LARGE; for (uint_t j = 1; j < n; j++) { PRINTF("%d = %f %d = %f\n", j1, v1, j2, v2); const cost_t c = cost[free_i][j] - v[j]; if (c < v2) { if (c >= v1) { v2 = c; j2 = j; } else { v2 = v1; v1 = c; j2 = j1; j1 = j; } } } i0 = y[j1]; v1_new = v[j1] - (v2 - v1); v1_lowers = v1_new < v[j1]; PRINTF("%d %d 1=%d,%f 2=%d,%f v1'=%f(%d,%g) \n", free_i, i0, j1, v1, j2, v2, v1_new, v1_lowers, v[j1] - v1_new); if (rr_cnt < current * n) { if (v1_lowers) { v[j1] = v1_new; } else if (i0 >= 0 && j2 >= 0) { j1 = j2; i0 = y[j2]; } if (i0 >= 0) { if (v1_lowers) { free_rows[--current] = i0; } else { free_rows[new_free_rows++] = i0; } } } else { PRINTF("rr_cnt=%d >= %d (current=%d * n=%d)\n", rr_cnt, current * n, current, n); if (i0 >= 0) { free_rows[new_free_rows++] = i0; } } x[free_i] = j1; y[j1] = free_i; } return new_free_rows; } /** Find columns with minimum d[j] and put them on the SCAN list. */ uint_t _find_dense(const uint_t n, uint_t lo, cost_t *d, int_t *cols, int_t *y) { uint_t hi = lo + 1; cost_t mind = d[cols[lo]]; for (uint_t k = hi; k < n; k++) { int_t j = cols[k]; if (d[j] <= mind) { if (d[j] < mind) { hi = lo; mind = d[j]; } cols[k] = cols[hi]; cols[hi++] = j; } } return hi; } // Scan all columns in TODO starting from arbitrary column in SCAN // and try to decrease d of the TODO columns using the SCAN column. int_t _scan_dense(const uint_t n, cost_t *cost[], uint_t *plo, uint_t*phi, cost_t *d, int_t *cols, int_t *pred, int_t *y, cost_t *v) { uint_t lo = *plo; uint_t hi = *phi; cost_t h, cred_ij; while (lo != hi) { int_t j = cols[lo++]; const int_t i = y[j]; const cost_t mind = d[j]; h = cost[i][j] - v[j] - mind; PRINTF("i=%d j=%d h=%f\n", i, j, h); // For all columns in TODO for (uint_t k = hi; k < n; k++) { j = cols[k]; cred_ij = cost[i][j] - v[j] - h; if (cred_ij < d[j]) { d[j] = cred_ij; pred[j] = i; if (cred_ij == mind) { if (y[j] < 0) { return j; } cols[k] = cols[hi]; cols[hi++] = j; } } } } *plo = lo; *phi = hi; return -1; } /** Single iteration of modified Dijkstra shortest path algorithm as explained in the JV paper. * * This is a dense matrix version. * * \return The closest free column index. */ int_t find_path_dense( const uint_t n, cost_t *cost[], const int_t start_i, int_t *y, cost_t *v, int_t *pred) { uint_t lo = 0, hi = 0; int_t final_j = -1; uint_t n_ready = 0; int_t *cols; cost_t *d; NEW(cols, int_t, n); NEW(d, cost_t, n); for (uint_t i = 0; i < n; i++) { cols[i] = i; pred[i] = start_i; d[i] = cost[start_i][i] - v[i]; } PRINT_COST_ARRAY(d, n); while (final_j == -1) { // No columns left on the SCAN list. if (lo == hi) { PRINTF("%d..%d -> find\n", lo, hi); n_ready = lo; hi = _find_dense(n, lo, d, cols, y); PRINTF("check %d..%d\n", lo, hi); PRINT_INDEX_ARRAY(cols, n); for (uint_t k = lo; k < hi; k++) { const int_t j = cols[k]; if (y[j] < 0) { final_j = j; } } } if (final_j == -1) { PRINTF("%d..%d -> scan\n", lo, hi); final_j = _scan_dense( n, cost, &lo, &hi, d, cols, pred, y, v); PRINT_COST_ARRAY(d, n); PRINT_INDEX_ARRAY(cols, n); PRINT_INDEX_ARRAY(pred, n); } } PRINTF("found final_j=%d\n", final_j); PRINT_INDEX_ARRAY(cols, n); { const cost_t mind = d[cols[lo]]; for (uint_t k = 0; k < n_ready; k++) { const int_t j = cols[k]; v[j] += d[j] - mind; } } FREE(cols); FREE(d); return final_j; } /** Augment for a dense cost matrix. */ int_t _ca_dense( const uint_t n, cost_t *cost[], const uint_t n_free_rows, int_t *free_rows, int_t *x, int_t *y, cost_t *v) { int_t *pred; NEW(pred, int_t, n); for (int_t *pfree_i = free_rows; pfree_i < free_rows + n_free_rows; pfree_i++) { int_t i = -1, j; uint_t k = 0; PRINTF("looking at free_i=%d\n", *pfree_i); j = find_path_dense(n, cost, *pfree_i, y, v, pred); ASSERT(j >= 0); ASSERT(j < n); while (i != *pfree_i) { PRINTF("augment %d\n", j); PRINT_INDEX_ARRAY(pred, n); i = pred[j]; PRINTF("y[%d]=%d -> %d\n", j, y[j], i); y[j] = i; PRINT_INDEX_ARRAY(x, n); SWAP_INDICES(j, x[i]); k++; if (k >= n) { ASSERT(FALSE); } } } FREE(pred); return 0; } /** Solve dense sparse LAP. */ int lapjv_internal( const uint_t n, cost_t *cost[], int_t *x, int_t *y) { int ret; int_t *free_rows; cost_t *v; NEW(free_rows, int_t, n); NEW(v, cost_t, n); ret = _ccrrt_dense(n, cost, free_rows, x, y, v); int i = 0; while (ret > 0 && i < 2) { ret = _carr_dense(n, cost, ret, free_rows, x, y, v); i++; } if (ret > 0) { ret = _ca_dense(n, cost, ret, free_rows, x, y, v); } FREE(v); FREE(free_rows); return ret; } ================================================ FILE: opencv/cpp/src/main.cpp ================================================ #include #include #include #include #include #include #include #include #include #include "BYTETracker.h" using namespace cv; using namespace std; using namespace dnn; struct GridAndStride { int grid0; int grid1; int stride; }; static inline float intersection_area(const Object& a, const Object& b) { cv::Rect_ inter = a.rect & b.rect; return inter.area(); } static void qsort_descent_inplace(std::vector& faceobjects, int left, int right) { int i = left; int j = right; float p = faceobjects[(left + right) / 2].prob; while (i <= j) { while (faceobjects[i].prob > p) i++; while (faceobjects[j].prob < p) j--; if (i <= j) { // swap std::swap(faceobjects[i], faceobjects[j]); i++; j--; } } #pragma omp parallel sections { #pragma omp section { if (left < j) qsort_descent_inplace(faceobjects, left, j); } #pragma omp section { if (i < right) qsort_descent_inplace(faceobjects, i, right); } } } static void qsort_descent_inplace(std::vector& objects) { if (objects.empty()) return; qsort_descent_inplace(objects, 0, objects.size() - 1); } class yolox { public: yolox(); int detect(cv::Mat &img, std::vector &detectResults); private: const int INPUT_W = 1088; const int INPUT_H = 608; const float mean_vals[3] = {0.485, 0.456, 0.406}; const float norm_vals[3] = {0.229, 0.224, 0.225}; const int stride_arr[3] = {8, 16, 32}; // might have stride=64 in YOLOX std::vector grid_strides; Mat static_resize(Mat& img); void generate_grids_and_stride(std::vector& strides); void generate_yolox_proposals(const float* feat_ptr, std::vector& objects); void nms_sorted_bboxes(const std::vector& faceobjects, std::vector& picked); float nms_threshold = 0.7; float prob_threshold = 0.1; int num_grid; int num_class; Net net; }; yolox::yolox() { string model_path = "/home/ByteTrack/byte_tracker/model/bytetrack_s.onnx"; this->net = readNet(model_path); std::vector strides(stride_arr, stride_arr + sizeof(stride_arr) / sizeof(stride_arr[0])); generate_grids_and_stride(strides); } Mat yolox::static_resize(Mat& img) { float r = min(INPUT_W / (img.cols*1.0), INPUT_H / (img.rows*1.0)); // r = std::min(r, 1.0f); int unpad_w = r * img.cols; int unpad_h = r * img.rows; Mat re(unpad_h, unpad_w, CV_8UC3); resize(img, re, re.size()); Mat out(INPUT_H, INPUT_W, CV_8UC3, Scalar(114, 114, 114)); re.copyTo(out(Rect(0, 0, re.cols, re.rows))); return out; } void yolox::generate_grids_and_stride(std::vector& strides) { for (int i = 0; i < (int)strides.size(); i++) { int stride = strides[i]; int num_grid_w = INPUT_W / stride; int num_grid_h = INPUT_H / stride; for (int g1 = 0; g1 < num_grid_h; g1++) { for (int g0 = 0; g0 < num_grid_w; g0++) { GridAndStride gs; gs.grid0 = g0; gs.grid1 = g1; gs.stride = stride; grid_strides.push_back(gs); } } } } void yolox::generate_yolox_proposals(const float* feat_ptr, std::vector& objects) { const int num_anchors = grid_strides.size(); for (int anchor_idx = 0; anchor_idx < num_anchors; anchor_idx++) { const int grid0 = grid_strides[anchor_idx].grid0; const int grid1 = grid_strides[anchor_idx].grid1; const int stride = grid_strides[anchor_idx].stride; // yolox/models/yolo_head.py decode logic // outputs[..., :2] = (outputs[..., :2] + grids) * strides // outputs[..., 2:4] = torch.exp(outputs[..., 2:4]) * strides float x_center = (feat_ptr[0] + grid0) * stride; float y_center = (feat_ptr[1] + grid1) * stride; float w = exp(feat_ptr[2]) * stride; float h = exp(feat_ptr[3]) * stride; float x0 = x_center - w * 0.5f; float y0 = y_center - h * 0.5f; float box_objectness = feat_ptr[4]; for (int class_idx = 0; class_idx < num_class; class_idx++) { float box_cls_score = feat_ptr[5 + class_idx]; float box_prob = box_objectness * box_cls_score; if (box_prob > prob_threshold) { Object obj; obj.rect.x = x0; obj.rect.y = y0; obj.rect.width = w; obj.rect.height = h; obj.label = class_idx; obj.prob = box_prob; objects.push_back(obj); } } // class loop feat_ptr += (num_class + 5); } // point anchor loop } void yolox::nms_sorted_bboxes(const std::vector& faceobjects, std::vector& picked) { picked.clear(); const int n = faceobjects.size(); std::vector areas(n); for (int i = 0; i < n; i++) { areas[i] = faceobjects[i].rect.area(); } for (int i = 0; i < n; i++) { const Object& a = faceobjects[i]; int keep = 1; for (int j = 0; j < (int)picked.size(); j++) { const Object& b = faceobjects[picked[j]]; // intersection over union float inter_area = intersection_area(a, b); float union_area = areas[i] + areas[picked[j]] - inter_area; // float IoU = inter_area / union_area if (inter_area / union_area > nms_threshold) keep = 0; } if (keep) picked.push_back(i); } } int yolox::detect(cv::Mat &srcimg, std::vector& objects) { float scale = min(INPUT_W / (srcimg.cols*1.0), INPUT_H / (srcimg.rows*1.0)); Mat img = static_resize(srcimg); img.convertTo(img, CV_32F); int i = 0, j = 0; for (i = 0; i < img.rows; i++) { float* pdata = (float*)(img.data + i * img.step); for (j = 0; j < img.cols; j++) { pdata[0] = (pdata[2] / 255.0 - this->mean_vals[0]) / this->norm_vals[0]; pdata[1] = (pdata[1] / 255.0 - this->mean_vals[1]) / this->norm_vals[1]; pdata[2] = (pdata[0] / 255.0 - this->mean_vals[2]) / this->norm_vals[2]; pdata += 3; } } Mat blob = blobFromImage(img); this->net.setInput(blob); vector outs; this->net.forward(outs, this->net.getUnconnectedOutLayersNames()); this->num_grid = outs[0].size[1]; this->num_class = outs[0].size[2] - 5; const float* out = (float*)outs[0].data; std::vector proposals; generate_yolox_proposals(out, proposals); // sort all proposals by score from highest to lowest qsort_descent_inplace(proposals); // apply nms with nms_threshold std::vector picked; nms_sorted_bboxes(proposals, picked); int count = picked.size(); objects.resize(count); for (int i = 0; i < count; i++) { objects[i] = proposals[picked[i]]; // adjust offset to original unpadded float x0 = (objects[i].rect.x) / scale; float y0 = (objects[i].rect.y) / scale; float x1 = (objects[i].rect.x + objects[i].rect.width) / scale; float y1 = (objects[i].rect.y + objects[i].rect.height) / scale; // clip // x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f); // y0 = std::max(std::min(y0, (float)(img_h - 1)), 0.f); // x1 = std::max(std::min(x1, (float)(img_w - 1)), 0.f); // y1 = std::max(std::min(y1, (float)(img_h - 1)), 0.f); objects[i].rect.x = x0; objects[i].rect.y = y0; objects[i].rect.width = x1 - x0; objects[i].rect.height = y1 - y0; } return 0; } int main(int argc, char** argv) { if (argc != 2) { fprintf(stderr, "Usage: %s [videopath]\n", argv[0]); return -1; } const char* videopath = argv[1]; VideoCapture cap(videopath); if (!cap.isOpened()) return 0; int img_w = cap.get(CAP_PROP_FRAME_WIDTH); int img_h = cap.get(CAP_PROP_FRAME_HEIGHT); int fps = cap.get(CAP_PROP_FPS); long nFrame = static_cast(cap.get(CAP_PROP_FRAME_COUNT)); cout << "Total frames: " << nFrame << ", fps: "< objects; auto start = chrono::system_clock::now(); yolox.detect(img, objects); vector output_stracks = tracker.update(objects); auto end = chrono::system_clock::now(); total_ms = total_ms + chrono::duration_cast(end - start).count(); for (int i = 0; i < output_stracks.size(); i++) { vector tlwh = output_stracks[i].tlwh; bool vertical = tlwh[2] / tlwh[3] > 1.6; if (tlwh[2] * tlwh[3] > 20 && !vertical) { Scalar s = tracker.get_color(output_stracks[i].track_id); putText(img, format("%d", output_stracks[i].track_id), Point(tlwh[0], tlwh[1] - 5), 0, 0.6, Scalar(0, 0, 255), 2, LINE_AA); rectangle(img, Rect(tlwh[0], tlwh[1], tlwh[2], tlwh[3]), s, 2); } } putText(img, format("frame: %d fps: %d num: %d", num_frames, num_frames * 1000000 / total_ms, (int)output_stracks.size()), Point(0, 30), 0, 0.6, Scalar(0, 0, 255), 2, LINE_AA); writer.write(img); /*char c = waitKey(1); if (c > 0) { break; }*/ } cap.release(); cout << "FPS: " << num_frames * 1000000 / total_ms << endl; return 0; } ================================================ FILE: opencv/cpp/src/utils.cpp ================================================ #include "BYTETracker.h" #include "lapjv.h" vector BYTETracker::joint_stracks(vector &tlista, vector &tlistb) { map exists; vector res; for (int i = 0; i < tlista.size(); i++) { exists.insert(pair(tlista[i]->track_id, 1)); res.push_back(tlista[i]); } for (int i = 0; i < tlistb.size(); i++) { int tid = tlistb[i].track_id; if (!exists[tid] || exists.count(tid) == 0) { exists[tid] = 1; res.push_back(&tlistb[i]); } } return res; } vector BYTETracker::joint_stracks(vector &tlista, vector &tlistb) { map exists; vector res; for (int i = 0; i < tlista.size(); i++) { exists.insert(pair(tlista[i].track_id, 1)); res.push_back(tlista[i]); } for (int i = 0; i < tlistb.size(); i++) { int tid = tlistb[i].track_id; if (!exists[tid] || exists.count(tid) == 0) { exists[tid] = 1; res.push_back(tlistb[i]); } } return res; } vector BYTETracker::sub_stracks(vector &tlista, vector &tlistb) { map stracks; for (int i = 0; i < tlista.size(); i++) { stracks.insert(pair(tlista[i].track_id, tlista[i])); } for (int i = 0; i < tlistb.size(); i++) { int tid = tlistb[i].track_id; if (stracks.count(tid) != 0) { stracks.erase(tid); } } vector res; std::map::iterator it; for (it = stracks.begin(); it != stracks.end(); ++it) { res.push_back(it->second); } return res; } void BYTETracker::remove_duplicate_stracks(vector &resa, vector &resb, vector &stracksa, vector &stracksb) { vector > pdist = iou_distance(stracksa, stracksb); vector > pairs; for (int i = 0; i < pdist.size(); i++) { for (int j = 0; j < pdist[i].size(); j++) { if (pdist[i][j] < 0.15) { pairs.push_back(pair(i, j)); } } } vector dupa, dupb; for (int i = 0; i < pairs.size(); i++) { int timep = stracksa[pairs[i].first].frame_id - stracksa[pairs[i].first].start_frame; int timeq = stracksb[pairs[i].second].frame_id - stracksb[pairs[i].second].start_frame; if (timep > timeq) dupb.push_back(pairs[i].second); else dupa.push_back(pairs[i].first); } for (int i = 0; i < stracksa.size(); i++) { vector::iterator iter = find(dupa.begin(), dupa.end(), i); if (iter == dupa.end()) { resa.push_back(stracksa[i]); } } for (int i = 0; i < stracksb.size(); i++) { vector::iterator iter = find(dupb.begin(), dupb.end(), i); if (iter == dupb.end()) { resb.push_back(stracksb[i]); } } } void BYTETracker::linear_assignment(vector > &cost_matrix, int cost_matrix_size, int cost_matrix_size_size, float thresh, vector > &matches, vector &unmatched_a, vector &unmatched_b) { if (cost_matrix.size() == 0) { for (int i = 0; i < cost_matrix_size; i++) { unmatched_a.push_back(i); } for (int i = 0; i < cost_matrix_size_size; i++) { unmatched_b.push_back(i); } return; } vector rowsol; vector colsol; float c = lapjv(cost_matrix, rowsol, colsol, true, thresh); for (int i = 0; i < rowsol.size(); i++) { if (rowsol[i] >= 0) { vector match; match.push_back(i); match.push_back(rowsol[i]); matches.push_back(match); } else { unmatched_a.push_back(i); } } for (int i = 0; i < colsol.size(); i++) { if (colsol[i] < 0) { unmatched_b.push_back(i); } } } vector > BYTETracker::ious(vector > &atlbrs, vector > &btlbrs) { vector > ious; if (atlbrs.size()*btlbrs.size() == 0) return ious; ious.resize(atlbrs.size()); for (int i = 0; i < ious.size(); i++) { ious[i].resize(btlbrs.size()); } //bbox_ious for (int k = 0; k < btlbrs.size(); k++) { vector ious_tmp; float box_area = (btlbrs[k][2] - btlbrs[k][0] + 1)*(btlbrs[k][3] - btlbrs[k][1] + 1); for (int n = 0; n < atlbrs.size(); n++) { float iw = min(atlbrs[n][2], btlbrs[k][2]) - max(atlbrs[n][0], btlbrs[k][0]) + 1; if (iw > 0) { float ih = min(atlbrs[n][3], btlbrs[k][3]) - max(atlbrs[n][1], btlbrs[k][1]) + 1; if(ih > 0) { float ua = (atlbrs[n][2] - atlbrs[n][0] + 1)*(atlbrs[n][3] - atlbrs[n][1] + 1) + box_area - iw * ih; ious[n][k] = iw * ih / ua; } else { ious[n][k] = 0.0; } } else { ious[n][k] = 0.0; } } } return ious; } vector > BYTETracker::iou_distance(vector &atracks, vector &btracks, int &dist_size, int &dist_size_size) { vector > cost_matrix; if (atracks.size() * btracks.size() == 0) { dist_size = atracks.size(); dist_size_size = btracks.size(); return cost_matrix; } vector > atlbrs, btlbrs; for (int i = 0; i < atracks.size(); i++) { atlbrs.push_back(atracks[i]->tlbr); } for (int i = 0; i < btracks.size(); i++) { btlbrs.push_back(btracks[i].tlbr); } dist_size = atracks.size(); dist_size_size = btracks.size(); vector > _ious = ious(atlbrs, btlbrs); for (int i = 0; i < _ious.size();i++) { vector _iou; for (int j = 0; j < _ious[i].size(); j++) { _iou.push_back(1 - _ious[i][j]); } cost_matrix.push_back(_iou); } return cost_matrix; } vector > BYTETracker::iou_distance(vector &atracks, vector &btracks) { vector > atlbrs, btlbrs; for (int i = 0; i < atracks.size(); i++) { atlbrs.push_back(atracks[i].tlbr); } for (int i = 0; i < btracks.size(); i++) { btlbrs.push_back(btracks[i].tlbr); } vector > _ious = ious(atlbrs, btlbrs); vector > cost_matrix; for (int i = 0; i < _ious.size(); i++) { vector _iou; for (int j = 0; j < _ious[i].size(); j++) { _iou.push_back(1 - _ious[i][j]); } cost_matrix.push_back(_iou); } return cost_matrix; } double BYTETracker::lapjv(const vector > &cost, vector &rowsol, vector &colsol, bool extend_cost, float cost_limit, bool return_cost) { vector > cost_c; cost_c.assign(cost.begin(), cost.end()); vector > cost_c_extended; int n_rows = cost.size(); int n_cols = cost[0].size(); rowsol.resize(n_rows); colsol.resize(n_cols); int n = 0; if (n_rows == n_cols) { n = n_rows; } else { if (!extend_cost) { cout << "set extend_cost=True" << endl; system("pause"); exit(0); } } if (extend_cost || cost_limit < LONG_MAX) { n = n_rows + n_cols; cost_c_extended.resize(n); for (int i = 0; i < cost_c_extended.size(); i++) cost_c_extended[i].resize(n); if (cost_limit < LONG_MAX) { for (int i = 0; i < cost_c_extended.size(); i++) { for (int j = 0; j < cost_c_extended[i].size(); j++) { cost_c_extended[i][j] = cost_limit / 2.0; } } } else { float cost_max = -1; for (int i = 0; i < cost_c.size(); i++) { for (int j = 0; j < cost_c[i].size(); j++) { if (cost_c[i][j] > cost_max) cost_max = cost_c[i][j]; } } for (int i = 0; i < cost_c_extended.size(); i++) { for (int j = 0; j < cost_c_extended[i].size(); j++) { cost_c_extended[i][j] = cost_max + 1; } } } for (int i = n_rows; i < cost_c_extended.size(); i++) { for (int j = n_cols; j < cost_c_extended[i].size(); j++) { cost_c_extended[i][j] = 0; } } for (int i = 0; i < n_rows; i++) { for (int j = 0; j < n_cols; j++) { cost_c_extended[i][j] = cost_c[i][j]; } } cost_c.clear(); cost_c.assign(cost_c_extended.begin(), cost_c_extended.end()); } double **cost_ptr; cost_ptr = new double *[sizeof(double *) * n]; for (int i = 0; i < n; i++) cost_ptr[i] = new double[sizeof(double) * n]; for (int i = 0; i < n; i++) { for (int j = 0; j < n; j++) { cost_ptr[i][j] = cost_c[i][j]; } } int* x_c = new int[sizeof(int) * n]; int *y_c = new int[sizeof(int) * n]; int ret = lapjv_internal(n, cost_ptr, x_c, y_c); if (ret != 0) { cout << "Calculate Wrong!" << endl; system("pause"); exit(0); } double opt = 0.0; if (n != n_rows) { for (int i = 0; i < n; i++) { if (x_c[i] >= n_cols) x_c[i] = -1; if (y_c[i] >= n_rows) y_c[i] = -1; } for (int i = 0; i < n_rows; i++) { rowsol[i] = x_c[i]; } for (int i = 0; i < n_cols; i++) { colsol[i] = y_c[i]; } if (return_cost) { for (int i = 0; i < rowsol.size(); i++) { if (rowsol[i] != -1) { //cout << i << "\t" << rowsol[i] << "\t" << cost_ptr[i][rowsol[i]] << endl; opt += cost_ptr[i][rowsol[i]]; } } } } else if (return_cost) { for (int i = 0; i < rowsol.size(); i++) { opt += cost_ptr[i][rowsol[i]]; } } for (int i = 0; i < n; i++) { delete[]cost_ptr[i]; } delete[]cost_ptr; delete[]x_c; delete[]y_c; return opt; } Scalar BYTETracker::get_color(int idx) { idx += 3; return Scalar(37 * idx % 255, 17 * idx % 255, 29 * idx % 255); } ================================================ FILE: opencv/python/byte_tracker/byte_tracker_onnx.py ================================================ #!/usr/bin/env python # -*- coding: utf-8 -*- import copy import numpy as np import onnxruntime from byte_tracker.utils.yolox_utils import ( pre_process, post_process, multiclass_nms, ) from byte_tracker.tracker.byte_tracker import BYTETracker class ByteTrackerONNX(object): def __init__(self, args): self.args = args self.rgb_means = (0.485, 0.456, 0.406) self.std = (0.229, 0.224, 0.225) self.session = onnxruntime.InferenceSession(args.model) self.input_shape = tuple(map(int, args.input_shape.split(','))) self.tracker = BYTETracker(args, frame_rate=30) def _pre_process(self, image): image_info = {'id': 0} image_info['image'] = copy.deepcopy(image) image_info['width'] = image.shape[1] image_info['height'] = image.shape[0] preprocessed_image, ratio = pre_process( image, self.input_shape, self.rgb_means, self.std, ) image_info['ratio'] = ratio return preprocessed_image, image_info def inference(self, image): image, image_info = self._pre_process(image) input_name = self.session.get_inputs()[0].name result = self.session.run(None, {input_name: image[None, :, :, :]}) dets = self._post_process(result, image_info) bboxes, ids, scores = self._tracker_update( dets, image_info, ) return image_info, bboxes, ids, scores def _post_process(self, result, image_info): predictions = post_process( result[0], self.input_shape, p6=self.args.with_p6, ) predictions = predictions[0] boxes = predictions[:, :4] scores = predictions[:, 4:5] * predictions[:, 5:] boxes_xyxy = np.ones_like(boxes) boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2] / 2. boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3] / 2. boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2] / 2. boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3] / 2. boxes_xyxy /= image_info['ratio'] dets = multiclass_nms( boxes_xyxy, scores, nms_thr=self.args.nms_th, score_thr=self.args.score_th, ) return dets def _tracker_update(self, dets, image_info): online_targets = [] if dets is not None: online_targets = self.tracker.update( dets[:, :-1], [image_info['height'], image_info['width']], [image_info['height'], image_info['width']], ) online_tlwhs = [] online_ids = [] online_scores = [] for online_target in online_targets: tlwh = online_target.tlwh track_id = online_target.track_id vertical = tlwh[2] / tlwh[3] > 1.6 if tlwh[2] * tlwh[3] > self.args.min_box_area and not vertical: online_tlwhs.append(tlwh) online_ids.append(track_id) online_scores.append(online_target.score) return online_tlwhs, online_ids, online_scores ================================================ FILE: opencv/python/byte_tracker/tracker/basetrack.py ================================================ import numpy as np from collections import OrderedDict class TrackState(object): New = 0 Tracked = 1 Lost = 2 Removed = 3 class BaseTrack(object): _count = 0 track_id = 0 is_activated = False state = TrackState.New history = OrderedDict() features = [] curr_feature = None score = 0 start_frame = 0 frame_id = 0 time_since_update = 0 # multi-camera location = (np.inf, np.inf) @property def end_frame(self): return self.frame_id @staticmethod def next_id(): BaseTrack._count += 1 return BaseTrack._count def activate(self, *args): raise NotImplementedError def predict(self): raise NotImplementedError def update(self, *args, **kwargs): raise NotImplementedError def mark_lost(self): self.state = TrackState.Lost def mark_removed(self): self.state = TrackState.Removed ================================================ FILE: opencv/python/byte_tracker/tracker/byte_tracker.py ================================================ import numpy as np from collections import deque import os import os.path as osp import copy import torch import torch.nn.functional as F from .kalman_filter import KalmanFilter from byte_tracker.tracker import matching from .basetrack import BaseTrack, TrackState class STrack(BaseTrack): shared_kalman = KalmanFilter() def __init__(self, tlwh, score): # wait activate self._tlwh = np.asarray(tlwh, dtype=np.float) self.kalman_filter = None self.mean, self.covariance = None, None self.is_activated = False self.score = score self.tracklet_len = 0 def predict(self): mean_state = self.mean.copy() if self.state != TrackState.Tracked: mean_state[7] = 0 self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance) @staticmethod def multi_predict(stracks): if len(stracks) > 0: multi_mean = np.asarray([st.mean.copy() for st in stracks]) multi_covariance = np.asarray([st.covariance for st in stracks]) for i, st in enumerate(stracks): if st.state != TrackState.Tracked: multi_mean[i][7] = 0 multi_mean, multi_covariance = STrack.shared_kalman.multi_predict(multi_mean, multi_covariance) for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)): stracks[i].mean = mean stracks[i].covariance = cov def activate(self, kalman_filter, frame_id): """Start a new tracklet""" self.kalman_filter = kalman_filter self.track_id = self.next_id() self.mean, self.covariance = self.kalman_filter.initiate(self.tlwh_to_xyah(self._tlwh)) self.tracklet_len = 0 self.state = TrackState.Tracked if frame_id == 1: self.is_activated = True # self.is_activated = True self.frame_id = frame_id self.start_frame = frame_id def re_activate(self, new_track, frame_id, new_id=False): self.mean, self.covariance = self.kalman_filter.update( self.mean, self.covariance, self.tlwh_to_xyah(new_track.tlwh) ) self.tracklet_len = 0 self.state = TrackState.Tracked self.is_activated = True self.frame_id = frame_id if new_id: self.track_id = self.next_id() self.score = new_track.score def update(self, new_track, frame_id): """ Update a matched track :type new_track: STrack :type frame_id: int :type update_feature: bool :return: """ self.frame_id = frame_id self.tracklet_len += 1 new_tlwh = new_track.tlwh self.mean, self.covariance = self.kalman_filter.update( self.mean, self.covariance, self.tlwh_to_xyah(new_tlwh)) self.state = TrackState.Tracked self.is_activated = True self.score = new_track.score @property # @jit(nopython=True) def tlwh(self): """Get current position in bounding box format `(top left x, top left y, width, height)`. """ if self.mean is None: return self._tlwh.copy() ret = self.mean[:4].copy() ret[2] *= ret[3] ret[:2] -= ret[2:] / 2 return ret @property # @jit(nopython=True) def tlbr(self): """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., `(top left, bottom right)`. """ ret = self.tlwh.copy() ret[2:] += ret[:2] return ret @staticmethod # @jit(nopython=True) def tlwh_to_xyah(tlwh): """Convert bounding box to format `(center x, center y, aspect ratio, height)`, where the aspect ratio is `width / height`. """ ret = np.asarray(tlwh).copy() ret[:2] += ret[2:] / 2 ret[2] /= ret[3] return ret def to_xyah(self): return self.tlwh_to_xyah(self.tlwh) @staticmethod # @jit(nopython=True) def tlbr_to_tlwh(tlbr): ret = np.asarray(tlbr).copy() ret[2:] -= ret[:2] return ret @staticmethod # @jit(nopython=True) def tlwh_to_tlbr(tlwh): ret = np.asarray(tlwh).copy() ret[2:] += ret[:2] return ret def __repr__(self): return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame) class BYTETracker(object): def __init__(self, args, frame_rate=30): self.tracked_stracks = [] # type: list[STrack] self.lost_stracks = [] # type: list[STrack] self.removed_stracks = [] # type: list[STrack] self.frame_id = 0 self.args = args #self.det_thresh = args.track_thresh self.det_thresh = args.track_thresh + 0.1 self.buffer_size = int(frame_rate / 30.0 * args.track_buffer) self.max_time_lost = self.buffer_size self.kalman_filter = KalmanFilter() def update(self, output_results, img_info, img_size): self.frame_id += 1 activated_starcks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] if output_results.shape[1] == 5: scores = output_results[:, 4] bboxes = output_results[:, :4] else: output_results = output_results.cpu().numpy() scores = output_results[:, 4] * output_results[:, 5] bboxes = output_results[:, :4] # x1y1x2y2 img_h, img_w = img_info[0], img_info[1] scale = min(img_size[0] / float(img_h), img_size[1] / float(img_w)) bboxes /= scale remain_inds = scores > self.args.track_thresh inds_low = scores > 0.1 inds_high = scores < self.args.track_thresh inds_second = np.logical_and(inds_low, inds_high) dets_second = bboxes[inds_second] dets = bboxes[remain_inds] scores_keep = scores[remain_inds] scores_second = scores[inds_second] if len(dets) > 0: '''Detections''' detections = [STrack(STrack.tlbr_to_tlwh(tlbr), s) for (tlbr, s) in zip(dets, scores_keep)] else: detections = [] ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with high score detection boxes''' strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) # Predict the current location with KF STrack.multi_predict(strack_pool) dists = matching.iou_distance(strack_pool, detections) if not self.args.mot20: dists = matching.fuse_score(dists, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=self.args.match_thresh) for itracked, idet in matches: track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(detections[idet], self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) ''' Step 3: Second association, with low score detection boxes''' # association the untrack to the low score detections if len(dets_second) > 0: '''Detections''' detections_second = [STrack(STrack.tlbr_to_tlwh(tlbr), s) for (tlbr, s) in zip(dets_second, scores_second)] else: detections_second = [] r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked] dists = matching.iou_distance(r_tracked_stracks, detections_second) matches, u_track, u_detection_second = matching.linear_assignment(dists, thresh=0.5) for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections_second[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append(track) '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) if not self.args.mot20: dists = matching.fuse_score(dists, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_starcks.append(unconfirmed[itracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] if track.score < self.det_thresh: continue track.activate(self.kalman_filter, self.frame_id) activated_starcks.append(track) """ Step 5: Update state""" for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() removed_stracks.append(track) # print('Ramained match {} s'.format(t4-t3)) self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked] self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.removed_stracks.extend(removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks) # get scores of lost tracks output_stracks = [track for track in self.tracked_stracks if track.is_activated] return output_stracks def joint_stracks(tlista, tlistb): exists = {} res = [] for t in tlista: exists[t.track_id] = 1 res.append(t) for t in tlistb: tid = t.track_id if not exists.get(tid, 0): exists[tid] = 1 res.append(t) return res def sub_stracks(tlista, tlistb): stracks = {} for t in tlista: stracks[t.track_id] = t for t in tlistb: tid = t.track_id if stracks.get(tid, 0): del stracks[tid] return list(stracks.values()) def remove_duplicate_stracks(stracksa, stracksb): pdist = matching.iou_distance(stracksa, stracksb) pairs = np.where(pdist < 0.15) dupa, dupb = list(), list() for p, q in zip(*pairs): timep = stracksa[p].frame_id - stracksa[p].start_frame timeq = stracksb[q].frame_id - stracksb[q].start_frame if timep > timeq: dupb.append(q) else: dupa.append(p) resa = [t for i, t in enumerate(stracksa) if not i in dupa] resb = [t for i, t in enumerate(stracksb) if not i in dupb] return resa, resb ================================================ FILE: opencv/python/byte_tracker/tracker/kalman_filter.py ================================================ # vim: expandtab:ts=4:sw=4 import numpy as np import scipy.linalg """ Table for the 0.95 quantile of the chi-square distribution with N degrees of freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv function and used as Mahalanobis gating threshold. """ chi2inv95 = { 1: 3.8415, 2: 5.9915, 3: 7.8147, 4: 9.4877, 5: 11.070, 6: 12.592, 7: 14.067, 8: 15.507, 9: 16.919} class KalmanFilter(object): """ A simple Kalman filter for tracking bounding boxes in image space. The 8-dimensional state space x, y, a, h, vx, vy, va, vh contains the bounding box center position (x, y), aspect ratio a, height h, and their respective velocities. Object motion follows a constant velocity model. The bounding box location (x, y, a, h) is taken as direct observation of the state space (linear observation model). """ def __init__(self): ndim, dt = 4, 1. # Create Kalman filter model matrices. self._motion_mat = np.eye(2 * ndim, 2 * ndim) for i in range(ndim): self._motion_mat[i, ndim + i] = dt self._update_mat = np.eye(ndim, 2 * ndim) # Motion and observation uncertainty are chosen relative to the current # state estimate. These weights control the amount of uncertainty in # the model. This is a bit hacky. self._std_weight_position = 1. / 20 self._std_weight_velocity = 1. / 160 def initiate(self, measurement): """Create track from unassociated measurement. Parameters ---------- measurement : ndarray Bounding box coordinates (x, y, a, h) with center position (x, y), aspect ratio a, and height h. Returns ------- (ndarray, ndarray) Returns the mean vector (8 dimensional) and covariance matrix (8x8 dimensional) of the new track. Unobserved velocities are initialized to 0 mean. """ mean_pos = measurement mean_vel = np.zeros_like(mean_pos) mean = np.r_[mean_pos, mean_vel] std = [ 2 * self._std_weight_position * measurement[3], 2 * self._std_weight_position * measurement[3], 1e-2, 2 * self._std_weight_position * measurement[3], 10 * self._std_weight_velocity * measurement[3], 10 * self._std_weight_velocity * measurement[3], 1e-5, 10 * self._std_weight_velocity * measurement[3]] covariance = np.diag(np.square(std)) return mean, covariance def predict(self, mean, covariance): """Run Kalman filter prediction step. Parameters ---------- mean : ndarray The 8 dimensional mean vector of the object state at the previous time step. covariance : ndarray The 8x8 dimensional covariance matrix of the object state at the previous time step. Returns ------- (ndarray, ndarray) Returns the mean vector and covariance matrix of the predicted state. Unobserved velocities are initialized to 0 mean. """ std_pos = [ self._std_weight_position * mean[3], self._std_weight_position * mean[3], 1e-2, self._std_weight_position * mean[3]] std_vel = [ self._std_weight_velocity * mean[3], self._std_weight_velocity * mean[3], 1e-5, self._std_weight_velocity * mean[3]] motion_cov = np.diag(np.square(np.r_[std_pos, std_vel])) #mean = np.dot(self._motion_mat, mean) mean = np.dot(mean, self._motion_mat.T) covariance = np.linalg.multi_dot(( self._motion_mat, covariance, self._motion_mat.T)) + motion_cov return mean, covariance def project(self, mean, covariance): """Project state distribution to measurement space. Parameters ---------- mean : ndarray The state's mean vector (8 dimensional array). covariance : ndarray The state's covariance matrix (8x8 dimensional). Returns ------- (ndarray, ndarray) Returns the projected mean and covariance matrix of the given state estimate. """ std = [ self._std_weight_position * mean[3], self._std_weight_position * mean[3], 1e-1, self._std_weight_position * mean[3]] innovation_cov = np.diag(np.square(std)) mean = np.dot(self._update_mat, mean) covariance = np.linalg.multi_dot(( self._update_mat, covariance, self._update_mat.T)) return mean, covariance + innovation_cov def multi_predict(self, mean, covariance): """Run Kalman filter prediction step (Vectorized version). Parameters ---------- mean : ndarray The Nx8 dimensional mean matrix of the object states at the previous time step. covariance : ndarray The Nx8x8 dimensional covariance matrics of the object states at the previous time step. Returns ------- (ndarray, ndarray) Returns the mean vector and covariance matrix of the predicted state. Unobserved velocities are initialized to 0 mean. """ std_pos = [ self._std_weight_position * mean[:, 3], self._std_weight_position * mean[:, 3], 1e-2 * np.ones_like(mean[:, 3]), self._std_weight_position * mean[:, 3]] std_vel = [ self._std_weight_velocity * mean[:, 3], self._std_weight_velocity * mean[:, 3], 1e-5 * np.ones_like(mean[:, 3]), self._std_weight_velocity * mean[:, 3]] sqr = np.square(np.r_[std_pos, std_vel]).T motion_cov = [] for i in range(len(mean)): motion_cov.append(np.diag(sqr[i])) motion_cov = np.asarray(motion_cov) mean = np.dot(mean, self._motion_mat.T) left = np.dot(self._motion_mat, covariance).transpose((1, 0, 2)) covariance = np.dot(left, self._motion_mat.T) + motion_cov return mean, covariance def update(self, mean, covariance, measurement): """Run Kalman filter correction step. Parameters ---------- mean : ndarray The predicted state's mean vector (8 dimensional). covariance : ndarray The state's covariance matrix (8x8 dimensional). measurement : ndarray The 4 dimensional measurement vector (x, y, a, h), where (x, y) is the center position, a the aspect ratio, and h the height of the bounding box. Returns ------- (ndarray, ndarray) Returns the measurement-corrected state distribution. """ projected_mean, projected_cov = self.project(mean, covariance) chol_factor, lower = scipy.linalg.cho_factor( projected_cov, lower=True, check_finite=False) kalman_gain = scipy.linalg.cho_solve( (chol_factor, lower), np.dot(covariance, self._update_mat.T).T, check_finite=False).T innovation = measurement - projected_mean new_mean = mean + np.dot(innovation, kalman_gain.T) new_covariance = covariance - np.linalg.multi_dot(( kalman_gain, projected_cov, kalman_gain.T)) return new_mean, new_covariance def gating_distance(self, mean, covariance, measurements, only_position=False, metric='maha'): """Compute gating distance between state distribution and measurements. A suitable distance threshold can be obtained from `chi2inv95`. If `only_position` is False, the chi-square distribution has 4 degrees of freedom, otherwise 2. Parameters ---------- mean : ndarray Mean vector over the state distribution (8 dimensional). covariance : ndarray Covariance of the state distribution (8x8 dimensional). measurements : ndarray An Nx4 dimensional matrix of N measurements, each in format (x, y, a, h) where (x, y) is the bounding box center position, a the aspect ratio, and h the height. only_position : Optional[bool] If True, distance computation is done with respect to the bounding box center position only. Returns ------- ndarray Returns an array of length N, where the i-th element contains the squared Mahalanobis distance between (mean, covariance) and `measurements[i]`. """ mean, covariance = self.project(mean, covariance) if only_position: mean, covariance = mean[:2], covariance[:2, :2] measurements = measurements[:, :2] d = measurements - mean if metric == 'gaussian': return np.sum(d * d, axis=1) elif metric == 'maha': cholesky_factor = np.linalg.cholesky(covariance) z = scipy.linalg.solve_triangular( cholesky_factor, d.T, lower=True, check_finite=False, overwrite_b=True) squared_maha = np.sum(z * z, axis=0) return squared_maha else: raise ValueError('invalid distance metric') ================================================ FILE: opencv/python/byte_tracker/tracker/matching.py ================================================ import cv2 import numpy as np import scipy import lap from scipy.spatial.distance import cdist from cython_bbox import bbox_overlaps as bbox_ious from byte_tracker.tracker import kalman_filter import time def merge_matches(m1, m2, shape): O,P,Q = shape m1 = np.asarray(m1) m2 = np.asarray(m2) M1 = scipy.sparse.coo_matrix((np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P)) M2 = scipy.sparse.coo_matrix((np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q)) mask = M1*M2 match = mask.nonzero() match = list(zip(match[0], match[1])) unmatched_O = tuple(set(range(O)) - set([i for i, j in match])) unmatched_Q = tuple(set(range(Q)) - set([j for i, j in match])) return match, unmatched_O, unmatched_Q def _indices_to_matches(cost_matrix, indices, thresh): matched_cost = cost_matrix[tuple(zip(*indices))] matched_mask = (matched_cost <= thresh) matches = indices[matched_mask] unmatched_a = tuple(set(range(cost_matrix.shape[0])) - set(matches[:, 0])) unmatched_b = tuple(set(range(cost_matrix.shape[1])) - set(matches[:, 1])) return matches, unmatched_a, unmatched_b def linear_assignment(cost_matrix, thresh): if cost_matrix.size == 0: return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1])) matches, unmatched_a, unmatched_b = [], [], [] cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh) for ix, mx in enumerate(x): if mx >= 0: matches.append([ix, mx]) unmatched_a = np.where(x < 0)[0] unmatched_b = np.where(y < 0)[0] matches = np.asarray(matches) return matches, unmatched_a, unmatched_b def ious(atlbrs, btlbrs): """ Compute cost based on IoU :type atlbrs: list[tlbr] | np.ndarray :type atlbrs: list[tlbr] | np.ndarray :rtype ious np.ndarray """ ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float) if ious.size == 0: return ious ious = bbox_ious( np.ascontiguousarray(atlbrs, dtype=np.float), np.ascontiguousarray(btlbrs, dtype=np.float) ) return ious def iou_distance(atracks, btracks): """ Compute cost based on IoU :type atracks: list[STrack] :type btracks: list[STrack] :rtype cost_matrix np.ndarray """ if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)): atlbrs = atracks btlbrs = btracks else: atlbrs = [track.tlbr for track in atracks] btlbrs = [track.tlbr for track in btracks] _ious = ious(atlbrs, btlbrs) cost_matrix = 1 - _ious return cost_matrix def v_iou_distance(atracks, btracks): """ Compute cost based on IoU :type atracks: list[STrack] :type btracks: list[STrack] :rtype cost_matrix np.ndarray """ if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)): atlbrs = atracks btlbrs = btracks else: atlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in atracks] btlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in btracks] _ious = ious(atlbrs, btlbrs) cost_matrix = 1 - _ious return cost_matrix def embedding_distance(tracks, detections, metric='cosine'): """ :param tracks: list[STrack] :param detections: list[BaseTrack] :param metric: :return: cost_matrix np.ndarray """ cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float) if cost_matrix.size == 0: return cost_matrix det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float) #for i, track in enumerate(tracks): #cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric)) track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float) cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric)) # Nomalized features return cost_matrix def gate_cost_matrix(kf, cost_matrix, tracks, detections, only_position=False): if cost_matrix.size == 0: return cost_matrix gating_dim = 2 if only_position else 4 gating_threshold = kalman_filter.chi2inv95[gating_dim] measurements = np.asarray([det.to_xyah() for det in detections]) for row, track in enumerate(tracks): gating_distance = kf.gating_distance( track.mean, track.covariance, measurements, only_position) cost_matrix[row, gating_distance > gating_threshold] = np.inf return cost_matrix def fuse_motion(kf, cost_matrix, tracks, detections, only_position=False, lambda_=0.98): if cost_matrix.size == 0: return cost_matrix gating_dim = 2 if only_position else 4 gating_threshold = kalman_filter.chi2inv95[gating_dim] measurements = np.asarray([det.to_xyah() for det in detections]) for row, track in enumerate(tracks): gating_distance = kf.gating_distance( track.mean, track.covariance, measurements, only_position, metric='maha') cost_matrix[row, gating_distance > gating_threshold] = np.inf cost_matrix[row] = lambda_ * cost_matrix[row] + (1 - lambda_) * gating_distance return cost_matrix def fuse_iou(cost_matrix, tracks, detections): if cost_matrix.size == 0: return cost_matrix reid_sim = 1 - cost_matrix iou_dist = iou_distance(tracks, detections) iou_sim = 1 - iou_dist fuse_sim = reid_sim * (1 + iou_sim) / 2 det_scores = np.array([det.score for det in detections]) det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0) #fuse_sim = fuse_sim * (1 + det_scores) / 2 fuse_cost = 1 - fuse_sim return fuse_cost def fuse_score(cost_matrix, detections): if cost_matrix.size == 0: return cost_matrix iou_sim = 1 - cost_matrix det_scores = np.array([det.score for det in detections]) det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0) fuse_sim = iou_sim * det_scores fuse_cost = 1 - fuse_sim return fuse_cost ================================================ FILE: opencv/python/byte_tracker/utils/yolox_utils.py ================================================ #!/usr/bin/env python # -*- coding: utf-8 -*- import cv2 import numpy as np def nms(boxes, scores, nms_thr): """Single class NMS implemented in Numpy.""" x1 = boxes[:, 0] y1 = boxes[:, 1] x2 = boxes[:, 2] y2 = boxes[:, 3] areas = (x2 - x1 + 1) * (y2 - y1 + 1) order = scores.argsort()[::-1] keep = [] while order.size > 0: i = order[0] keep.append(i) xx1 = np.maximum(x1[i], x1[order[1:]]) yy1 = np.maximum(y1[i], y1[order[1:]]) xx2 = np.minimum(x2[i], x2[order[1:]]) yy2 = np.minimum(y2[i], y2[order[1:]]) w = np.maximum(0.0, xx2 - xx1 + 1) h = np.maximum(0.0, yy2 - yy1 + 1) inter = w * h ovr = inter / (areas[i] + areas[order[1:]] - inter) inds = np.where(ovr <= nms_thr)[0] order = order[inds + 1] return keep def multiclass_nms(boxes, scores, nms_thr, score_thr): """Multiclass NMS implemented in Numpy""" final_dets = [] num_classes = scores.shape[1] for cls_ind in range(num_classes): cls_scores = scores[:, cls_ind] valid_score_mask = cls_scores > score_thr if valid_score_mask.sum() == 0: continue else: valid_scores = cls_scores[valid_score_mask] valid_boxes = boxes[valid_score_mask] keep = nms(valid_boxes, valid_scores, nms_thr) if len(keep) > 0: cls_inds = np.ones((len(keep), 1)) * cls_ind dets = np.concatenate( [valid_boxes[keep], valid_scores[keep, None], cls_inds], 1) final_dets.append(dets) if len(final_dets) == 0: return None return np.concatenate(final_dets, 0) def pre_process(image, input_size, mean, std, swap=(2, 0, 1)): if len(image.shape) == 3: padded_img = np.ones((input_size[0], input_size[1], 3)) * 114.0 else: padded_img = np.ones(input_size) * 114.0 img = np.array(image) r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1]) resized_img = cv2.resize( img, (int(img.shape[1] * r), int(img.shape[0] * r)), interpolation=cv2.INTER_LINEAR, ).astype(np.float32) padded_img[:int(img.shape[0] * r), :int(img.shape[1] * r)] = resized_img padded_img = padded_img[:, :, ::-1] padded_img /= 255.0 if mean is not None: padded_img -= mean if std is not None: padded_img /= std padded_img = padded_img.transpose(swap) padded_img = np.ascontiguousarray(padded_img, dtype=np.float32) return padded_img, r def post_process(outputs, img_size, p6=False): grids = [] expanded_strides = [] if not p6: strides = [8, 16, 32] else: strides = [8, 16, 32, 64] hsizes = [img_size[0] // stride for stride in strides] wsizes = [img_size[1] // stride for stride in strides] for hsize, wsize, stride in zip(hsizes, wsizes, strides): xv, yv = np.meshgrid(np.arange(wsize), np.arange(hsize)) grid = np.stack((xv, yv), 2).reshape(1, -1, 2) grids.append(grid) shape = grid.shape[:2] expanded_strides.append(np.full((*shape, 1), stride)) grids = np.concatenate(grids, 1) expanded_strides = np.concatenate(expanded_strides, 1) outputs[..., :2] = (outputs[..., :2] + grids) * expanded_strides outputs[..., 2:4] = np.exp(outputs[..., 2:4]) * expanded_strides return outputs ================================================ FILE: opencv/python/main.py ================================================ import os import copy import time import argparse import cv2 from loguru import logger import numpy as np from byte_tracker.utils.yolox_utils import ( post_process, multiclass_nms, ) from byte_tracker.tracker.byte_tracker import BYTETracker def pre_process(image, input_size, mean, std): if len(image.shape) == 3: padded_img = np.ones((input_size[0], input_size[1], 3)) * 114.0 else: padded_img = np.ones(input_size) * 114.0 img = np.array(image) r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1]) resized_img = cv2.resize( img, (int(img.shape[1] * r), int(img.shape[0] * r)), interpolation=cv2.INTER_LINEAR, ).astype(np.float32) padded_img[:int(img.shape[0] * r), :int(img.shape[1] * r)] = resized_img padded_img = padded_img[:, :, ::-1] padded_img /= 255.0 if mean is not None: padded_img -= mean if std is not None: padded_img /= std padded_img = np.ascontiguousarray(padded_img, dtype=np.float32) return padded_img, r class ByteTracker(object): def __init__(self, args): self.args = args self.rgb_means = (0.485, 0.456, 0.406) self.std = (0.229, 0.224, 0.225) self.net = cv2.dnn.readNet(args.model) self.input_shape = tuple(map(int, args.input_shape.split(','))) self.tracker = BYTETracker(args, frame_rate=30) def _pre_process(self, image): image_info = {'id': 0} image_info['image'] = copy.deepcopy(image) image_info['width'] = image.shape[1] image_info['height'] = image.shape[0] preprocessed_image, ratio = pre_process( image, self.input_shape, self.rgb_means, self.std, ) image_info['ratio'] = ratio return preprocessed_image, image_info def inference(self, image): image, image_info = self._pre_process(image) blob = cv2.dnn.blobFromImage(image) self.net.setInput(blob) result = self.net.forward() dets = self._post_process(result, image_info) bboxes, ids, scores = self._tracker_update( dets, image_info, ) return image_info, bboxes, ids, scores def _post_process(self, result, image_info): predictions = post_process( result, self.input_shape, p6=self.args.with_p6, ) predictions = predictions[0] boxes = predictions[:, :4] scores = predictions[:, 4:5] * predictions[:, 5:] boxes_xyxy = np.ones_like(boxes) boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2] / 2. boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3] / 2. boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2] / 2. boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3] / 2. boxes_xyxy /= image_info['ratio'] dets = multiclass_nms( boxes_xyxy, scores, nms_thr=self.args.nms_th, score_thr=self.args.score_th, ) return dets def _tracker_update(self, dets, image_info): online_targets = [] if dets is not None: online_targets = self.tracker.update( dets[:, :-1], [image_info['height'], image_info['width']], [image_info['height'], image_info['width']], ) online_tlwhs = [] online_ids = [] online_scores = [] for online_target in online_targets: tlwh = online_target.tlwh track_id = online_target.track_id vertical = tlwh[2] / tlwh[3] > 1.6 if tlwh[2] * tlwh[3] > self.args.min_box_area and not vertical: online_tlwhs.append(tlwh) online_ids.append(track_id) online_scores.append(online_target.score) return online_tlwhs, online_ids, online_scores def get_args(): parser = argparse.ArgumentParser() parser.add_argument( '--use_debug_window', action='store_true', ) parser.add_argument( '--model', type=str, default='byte_tracker/model/bytetrack_s.onnx', ) parser.add_argument( '--video', type=str, default='sample.mp4', ) parser.add_argument( '--output_dir', type=str, default='output', ) parser.add_argument( '--score_th', type=float, default=0.1, ) parser.add_argument( '--nms_th', type=float, default=0.7, ) parser.add_argument( '--input_shape', type=str, default='608,1088', ) parser.add_argument( '--with_p6', action='store_true', help='Whether your model uses p6 in FPN/PAN.', ) # tracking args parser.add_argument( '--track_thresh', type=float, default=0.5, help='tracking confidence threshold', ) parser.add_argument( '--track_buffer', type=int, default=30, help='the frames for keep lost tracks', ) parser.add_argument( '--match_thresh', type=float, default=0.8, help='matching threshold for tracking', ) parser.add_argument( '--min-box-area', type=float, default=10, help='filter out tiny boxes', ) parser.add_argument( '--mot20', dest='mot20', default=False, action='store_true', help='test mot20.', ) args = parser.parse_args() return args def main(): args = get_args() use_debug_window = args.use_debug_window video_path = args.video output_dir = args.output_dir byte_tracker = ByteTracker(args) cap = cv2.VideoCapture(video_path) width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) fps = cap.get(cv2.CAP_PROP_FPS) frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT) if not use_debug_window: os.makedirs(output_dir, exist_ok=True) save_path = os.path.join(output_dir, video_path.split("/")[-1]) logger.info(f"video save path is {save_path}") video_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (int(width), int(height)), ) frame_id = 1 winName = 'Deep learning object detection in OpenCV' while True: start_time = time.time() ret, frame = cap.read() if not ret: break debug_image = copy.deepcopy(frame) _, bboxes, ids, scores = byte_tracker.inference(frame) elapsed_time = time.time() - start_time debug_image = draw_tracking_info( debug_image, bboxes, ids, scores, frame_id, elapsed_time, ) if use_debug_window: key = cv2.waitKey(1) if key == 27: # ESC break cv2.namedWindow(winName, 0) cv2.imshow(winName, debug_image) else: video_writer.write(debug_image) logger.info( 'frame {}/{} ({:.2f} ms)'.format(frame_id, int(frame_count), elapsed_time * 1000), ) frame_id += 1 if use_debug_window: cap.release() cv2.destroyAllWindows() def get_id_color(index): temp_index = abs(int(index)) * 3 color = ((37 * temp_index) % 255, (17 * temp_index) % 255, (29 * temp_index) % 255) return color def draw_tracking_info( image, tlwhs, ids, scores, frame_id=0, elapsed_time=0., ): text_scale = 1.5 text_thickness = 2 line_thickness = 2 text = 'frame: %d ' % (frame_id) text += 'elapsed time: %.0fms ' % (elapsed_time * 1000) text += 'num: %d' % (len(tlwhs)) cv2.putText( image, text, (0, int(15 * text_scale)), cv2.FONT_HERSHEY_PLAIN, 2, (0, 255, 0), thickness=text_thickness, ) for index, tlwh in enumerate(tlwhs): x1, y1 = int(tlwh[0]), int(tlwh[1]) x2, y2 = x1 + int(tlwh[2]), y1 + int(tlwh[3]) color = get_id_color(ids[index]) cv2.rectangle(image, (x1, y1), (x2, y2), color, line_thickness) # text = str(ids[index]) + ':%.2f' % (scores[index]) text = str(ids[index]) cv2.putText(image, text, (x1, y1 - 5), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 0, 0), text_thickness + 3) cv2.putText(image, text, (x1, y1 - 5), cv2.FONT_HERSHEY_PLAIN, text_scale, (255, 255, 255), text_thickness) return image if __name__ == '__main__': main() ================================================ FILE: requirements.txt ================================================ Cython pycocotools scipy loguru thop lap cython_bbox