gitextract_f8211mg8/

├── .gitignore
├── .gitmodules
├── .pre-commit-config.yaml
├── EgoTracks/
│   ├── README.md
│   ├── configs/
│   │   └── STARK/
│   │       ├── stark_st_R101.yaml
│   │       └── stark_st_base.yaml
│   ├── setup.py
│   ├── test.sh
│   ├── tools/
│   │   ├── __init__.py
│   │   ├── eval_datasets/
│   │   │   ├── __init__.py
│   │   │   ├── build.py
│   │   │   └── eval_ego4d_lt_tracking.py
│   │   ├── eval_net.py
│   │   ├── preprocess/
│   │   │   ├── __init__.py
│   │   │   └── extract_ego4d_clip_frames.py
│   │   ├── train_net.py
│   │   └── trainers/
│   │       ├── __init__.py
│   │       ├── base_trainer.py
│   │       └── starkst_trainer.py
│   ├── tracking/
│   │   ├── __init__.py
│   │   ├── config/
│   │   │   ├── __init__.py
│   │   │   ├── config.py
│   │   │   └── stark_defaults.py
│   │   ├── dataset/
│   │   │   ├── __init__.py
│   │   │   ├── base_image_dataset.py
│   │   │   ├── base_video_dataset.py
│   │   │   ├── build.py
│   │   │   ├── data_specs/
│   │   │   │   ├── README.md
│   │   │   │   ├── got10k_train_full_split.txt
│   │   │   │   ├── got10k_train_split.txt
│   │   │   │   ├── got10k_val_split.txt
│   │   │   │   ├── got10k_vot_exclude.txt
│   │   │   │   ├── got10k_vot_train_split.txt
│   │   │   │   ├── got10k_vot_val_split.txt
│   │   │   │   ├── lasot_train_split.txt
│   │   │   │   └── trackingnet_classmap.txt
│   │   │   ├── dataloader.py
│   │   │   ├── eval_datasets/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── base_dataset.py
│   │   │   │   └── ego4d_lt_tracking_dataset.py
│   │   │   ├── processing/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── base_processing.py
│   │   │   │   ├── processing_utils.py
│   │   │   │   └── stark_processing.py
│   │   │   ├── trackingdataset.py
│   │   │   ├── train_datasets/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── coco_seq.py
│   │   │   │   ├── ego4d_lt_tracking.py
│   │   │   │   ├── ego4d_vq.py
│   │   │   │   ├── got10k.py
│   │   │   │   ├── lasot.py
│   │   │   │   └── tracking_net.py
│   │   │   └── transforms.py
│   │   ├── metrics/
│   │   │   ├── __init__.py
│   │   │   └── miou.py
│   │   ├── models/
│   │   │   ├── __init__.py
│   │   │   ├── multiple_object_tracker.py
│   │   │   ├── single_object_tracker.py
│   │   │   ├── stark_tracker/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── backbone.py
│   │   │   │   ├── config/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── stark_st2/
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       └── config.py
│   │   │   │   ├── head.py
│   │   │   │   ├── params.py
│   │   │   │   ├── position_encoding.py
│   │   │   │   ├── resnet.py
│   │   │   │   ├── stark_s.py
│   │   │   │   ├── stark_st.py
│   │   │   │   ├── stark_tracker.py
│   │   │   │   ├── transformer.py
│   │   │   │   └── utils/
│   │   │   │       ├── __init__.py
│   │   │   │       ├── box_ops.py
│   │   │   │       ├── merge.py
│   │   │   │       ├── misc.py
│   │   │   │       └── preprocessing_utils.py
│   │   │   ├── template.py
│   │   │   └── tracker.py
│   │   ├── solver/
│   │   │   ├── __init__.py
│   │   │   └── build.py
│   │   └── utils/
│   │       ├── __init__.py
│   │       ├── bbox_helper.py
│   │       ├── defaults.py
│   │       ├── env.py
│   │       ├── load_helper.py
│   │       ├── load_text.py
│   │       ├── meters.py
│   │       ├── multiprocessing.py
│   │       ├── tensor.py
│   │       ├── types.py
│   │       └── utils.py
│   └── train.sh
├── LICENSE
├── MQ/
│   ├── Convert_annotation.py
│   ├── Eval.py
│   ├── Evaluation/
│   │   ├── ego4d/
│   │   │   ├── eval_action_detection.py
│   │   │   ├── eval_detection.py
│   │   │   ├── generate_detection.py
│   │   │   ├── generate_retrieval.py
│   │   │   ├── get_detect_performance.py
│   │   │   └── get_retrieval_performance.py
│   │   └── utils.py
│   ├── Infer.py
│   ├── Merge_detection_retrieval.py
│   ├── Models/
│   │   ├── ActionGenerator.py
│   │   ├── AnchorGenerator.py
│   │   ├── BoundaryAdjust.py
│   │   ├── BoxCoder.py
│   │   ├── GCNs.py
│   │   ├── Head.py
│   │   ├── Loss.py
│   │   ├── VSGN.py
│   │   ├── XGPN.py
│   │   └── matcher.py
│   ├── README.md
│   ├── Train.py
│   └── Utils/
│       ├── __init__.py
│       ├── dataset.py
│       └── opts.py
├── NLQ/
│   ├── 2D-TAN/
│   │   ├── CODE_OF_CONDUCT.md
│   │   ├── CONTRIBUTING.md
│   │   ├── LICENSE
│   │   ├── README.md
│   │   ├── README_2D-TAN.md
│   │   ├── SECURITY.md
│   │   ├── data/
│   │   │   ├── ActivityNet/
│   │   │   │   ├── download.sh
│   │   │   │   ├── test.json
│   │   │   │   ├── train.json
│   │   │   │   └── val.json
│   │   │   ├── Charades-STA/
│   │   │   │   ├── Charades_v1_test.csv
│   │   │   │   ├── Charades_v1_train.csv
│   │   │   │   ├── charades_sta_test.txt
│   │   │   │   ├── charades_sta_train.txt
│   │   │   │   ├── convert_vgg_features_to_hdf5.py
│   │   │   │   ├── download.sh
│   │   │   │   └── valid_videos.txt
│   │   │   ├── Ego4D_clip/
│   │   │   │   └── .gitkeep
│   │   │   └── TACoS/
│   │   │       ├── merge_npys_to_hdf5.py
│   │   │       ├── test.json
│   │   │       ├── train.json
│   │   │       └── val.json
│   │   ├── experiments/
│   │   │   ├── activitynet/
│   │   │   │   ├── 2D-TAN-64x64-K9L4-conv.yaml
│   │   │   │   └── 2D-TAN-64x64-K9L4-pool.yaml
│   │   │   ├── charades/
│   │   │   │   ├── 2D-TAN-16x16-K5L8-conv.yaml
│   │   │   │   └── 2D-TAN-16x16-K5L8-pool.yaml
│   │   │   ├── ego4d/
│   │   │   │   └── 2D-TAN-40x40-K9L4-pool-window-std-sf.yaml
│   │   │   └── tacos/
│   │   │       ├── 2D-TAN-128x128-K5L8-conv.yaml
│   │   │       └── 2D-TAN-128x128-K5L8-pool.yaml
│   │   ├── lib/
│   │   │   ├── core/
│   │   │   │   ├── config.py
│   │   │   │   ├── engine.py
│   │   │   │   ├── eval.py
│   │   │   │   └── utils.py
│   │   │   ├── datasets/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── activitynet.py
│   │   │   │   ├── charades.py
│   │   │   │   ├── ego4d_clip.py
│   │   │   │   └── tacos.py
│   │   │   └── models/
│   │   │       ├── __init__.py
│   │   │       ├── frame_modules/
│   │   │       │   ├── __init__.py
│   │   │       │   └── frame_pool.py
│   │   │       ├── fusion_modules/
│   │   │       │   ├── __init__.py
│   │   │       │   └── base_fusion.py
│   │   │       ├── loss.py
│   │   │       ├── map_modules/
│   │   │       │   ├── __init__.py
│   │   │       │   └── map_conv.py
│   │   │       ├── prop_modules/
│   │   │       │   ├── __init__.py
│   │   │       │   ├── dense.py
│   │   │       │   └── sparse.py
│   │   │       └── tan.py
│   │   ├── moment_localization/
│   │   │   ├── _init_paths.py
│   │   │   ├── test.py
│   │   │   └── train.py
│   │   └── notice.md
│   └── VSLNet/
│       ├── .gitignore
│       ├── README.md
│       ├── main.py
│       ├── model/
│       │   ├── VSLNet.py
│       │   ├── __init__.py
│       │   └── layers.py
│       ├── options.py
│       ├── requirements.txt
│       ├── run_train.sh
│       └── utils/
│           ├── __init__.py
│           ├── data_gen.py
│           ├── data_loader.py
│           ├── data_util.py
│           ├── evaluate_ego4d_nlq.py
│           ├── prepare_ego4d_dataset.py
│           └── runner_utils.py
├── README.md
├── VQ2D/
│   ├── .gitignore
│   ├── README.md
│   ├── configs/
│   │   ├── Base-RCNN-FPN.yaml
│   │   ├── siam_rcnn_2_gpus.yaml
│   │   └── siam_rcnn_8_gpus.yaml
│   ├── convert_videos_to_clips.py
│   ├── convert_videos_to_images.py
│   ├── detectron2_extensions/
│   │   ├── __init__.py
│   │   ├── config/
│   │   │   ├── __init__.py
│   │   │   └── defaults.py
│   │   ├── layers/
│   │   │   ├── __init__.py
│   │   │   └── wrappers.py
│   │   └── modeling/
│   │       ├── meta_arch/
│   │       │   ├── __init__.py
│   │       │   └── siam_rcnn.py
│   │       └── roi_heads/
│   │           ├── __init__.py
│   │           ├── set_heads.py
│   │           └── siam_heads.py
│   ├── evaluate_vq.py
│   ├── extract_vq_detection_scores.py
│   ├── perform_vq_inference.py
│   ├── process_vq_dataset.py
│   ├── requirements.txt
│   ├── scripts/
│   │   ├── extract_vq_detections.sh
│   │   ├── faster_evaluation/
│   │   │   └── merge_results.py
│   │   ├── infer_vq.sh
│   │   ├── train_2_gpus.sh
│   │   └── train_8_gpus.sh
│   ├── tools/
│   │   ├── test_model_loading.py
│   │   └── validate_extracted_clips.py
│   ├── train_siam_rcnn.py
│   ├── validate_challenge_predictions.py
│   ├── visualizations/
│   │   ├── .gitignore
│   │   ├── visualize_annotation_stats.ipynb
│   │   └── visualize_annotations.py
│   └── vq2d/
│       ├── baselines/
│       │   ├── __init__.py
│       │   ├── dataloader.py
│       │   ├── dataset.py
│       │   ├── feature_retrieval.py
│       │   ├── predictor.py
│       │   └── utils.py
│       ├── config.yaml
│       ├── constants.py
│       ├── metrics/
│       │   ├── __init__.py
│       │   ├── metrics.py
│       │   ├── spatio_temporal_metrics.py
│       │   ├── success_metrics.py
│       │   ├── temporal_metrics.py
│       │   ├── tracking_metrics.py
│       │   └── utils.py
│       ├── stats.py
│       ├── structures.py
│       ├── tools/
│       │   └── get_average_detector_flops.py
│       └── tracking/
│           ├── __init__.py
│           ├── kys.py
│           ├── particle_filter.py
│           ├── pfilter.py
│           ├── tracker.py
│           └── utils.py
└── VQ3D/
    ├── README.md
    ├── VQ3D/
    │   ├── API/
    │   │   ├── get_query_3d_ground_truth.py
    │   │   └── metrics.py
    │   ├── README.md
    │   └── scripts/
    │       ├── eval.py
    │       ├── prepare_ground_truth_for_queries.py
    │       └── run.py
    ├── annotation_API/
    │   └── API/
    │       └── bounding_box.py
    ├── camera_pose_estimation/
    │   ├── Camera_Intrinsics_API/
    │   │   ├── extract_frames.py
    │   │   └── get_camera_intrinsics.py
    │   ├── README.md
    │   ├── SuperGlueMatching/
    │   │   ├── LICENSE
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── extract_descriptors_api.py
    │   │   ├── extract_visual_database.py
    │   │   ├── match_pairs_api.py
    │   │   ├── models/
    │   │   │   ├── __init__.py
    │   │   │   ├── matching.py
    │   │   │   ├── superglue.py
    │   │   │   ├── superpoint.py
    │   │   │   ├── utils.py
    │   │   │   └── weights/
    │   │   │       ├── superglue_indoor.pth
    │   │   │       ├── superglue_outdoor.pth
    │   │   │       └── superpoint_v1.pth
    │   │   ├── preprocess_color.py
    │   │   ├── requirements.txt
    │   │   └── scripts.sh
    │   ├── Visualization/
    │   │   ├── camera_trajectory.json
    │   │   └── visualize_render_images.py
    │   ├── extract_frames_all_clips.py
    │   ├── extract_frames_for_colmap.py
    │   ├── get_intrinsics_for_all_clips.py
    │   ├── get_intrinsics_for_all_clips_greedy.py
    │   ├── get_median_intrinsics.py
    │   ├── main.sh
    │   ├── pnp_api.py
    │   ├── reconstruction.py
    │   ├── run_all_data.py
    │   ├── sfm_api_wsuperglue.py
    │   ├── superglue_tracker.py
    │   ├── undistort_image_api.py
    │   ├── utils.py
    │   └── visual_database_api.py
    ├── data/
    │   ├── README.md
    │   ├── all_clips_camera_poses_val.json
    │   ├── mapping_vq2d_to_vq3d_queries_annotations_test.json
    │   ├── mapping_vq2d_to_vq3d_queries_annotations_train.json
    │   ├── mapping_vq2d_to_vq3d_queries_annotations_val.json
    │   ├── scan_to_intrinsics.json
    │   ├── vq3d_results/
    │   │   └── siam_rcnn_residual_kys_val.json
    │   └── vq3d_test_unannotated_template.json
    ├── depth_estimation/
    │   ├── README.md
    │   ├── compute_depth_for_ego4d.py
    │   ├── main.sh
    │   └── prepare_inputs_for_depth_estimation.py
    └── requirements.txt