Copy disabled (too large)
Download .txt
Showing preview only (12,026K chars total). Download the full file to get everything.
Repository: MTLab/MorphMLP
Branch: master
Commit: 62ebe80d7f76
Files: 302
Total size: 11.4 MB
Directory structure:
gitextract_p8oeuhyn/
├── DATASET.md
├── INSTALL.md
├── LICENSE
├── README.md
├── VISUALIZATION_TOOLS.md
├── build/
│ └── lib/
│ └── slowfast/
│ ├── __init__.py
│ ├── config/
│ │ ├── __init__.py
│ │ ├── custom_config.py
│ │ └── defaults.py
│ ├── datasets/
│ │ ├── __init__.py
│ │ ├── ava_dataset.py
│ │ ├── ava_helper.py
│ │ ├── build.py
│ │ ├── charades.py
│ │ ├── cv2_transform.py
│ │ ├── decoder.py
│ │ ├── imagenet.py
│ │ ├── kinetics.py
│ │ ├── loader.py
│ │ ├── mixup.py
│ │ ├── multigrid_helper.py
│ │ ├── ptv_datasets.py
│ │ ├── rand_augment.py
│ │ ├── random_erasing.py
│ │ ├── ssv2.py
│ │ ├── sth.py
│ │ ├── transform.py
│ │ ├── utils.py
│ │ └── video_container.py
│ ├── models/
│ │ ├── __init__.py
│ │ ├── attention.py
│ │ ├── batchnorm_helper.py
│ │ ├── build.py
│ │ ├── common.py
│ │ ├── contrastive.py
│ │ ├── custom_video_model_builder.py
│ │ ├── head_helper.py
│ │ ├── losses.py
│ │ ├── morphmlp.py
│ │ ├── morphmlp_32.py
│ │ ├── nonlocal_helper.py
│ │ ├── operators.py
│ │ ├── optimizer.py
│ │ ├── ptv_model_builder.py
│ │ ├── resnet_helper.py
│ │ ├── stem_helper.py
│ │ ├── utils.py
│ │ └── video_model_builder.py
│ ├── site.py
│ ├── utils/
│ │ ├── __init__.py
│ │ ├── ava_eval_helper.py
│ │ ├── ava_evaluation/
│ │ │ ├── __init__.py
│ │ │ ├── label_map_util.py
│ │ │ ├── metrics.py
│ │ │ ├── np_box_list.py
│ │ │ ├── np_box_list_ops.py
│ │ │ ├── np_box_mask_list.py
│ │ │ ├── np_box_mask_list_ops.py
│ │ │ ├── np_box_ops.py
│ │ │ ├── np_mask_ops.py
│ │ │ ├── object_detection_evaluation.py
│ │ │ ├── per_image_evaluation.py
│ │ │ └── standard_fields.py
│ │ ├── benchmark.py
│ │ ├── bn_helper.py
│ │ ├── c2_model_loading.py
│ │ ├── checkpoint.py
│ │ ├── checkpoint_amp.py
│ │ ├── distributed.py
│ │ ├── env.py
│ │ ├── logging.py
│ │ ├── lr_policy.py
│ │ ├── meters.py
│ │ ├── metrics.py
│ │ ├── misc.py
│ │ ├── multigrid.py
│ │ ├── multiprocessing.py
│ │ ├── parser.py
│ │ └── weight_init_helper.py
│ └── visualization/
│ ├── __init__.py
│ ├── async_predictor.py
│ ├── ava_demo_precomputed_boxes.py
│ ├── demo_loader.py
│ ├── gradcam_utils.py
│ ├── prediction_vis.py
│ ├── predictor.py
│ ├── tensorboard_vis.py
│ ├── utils.py
│ └── video_visualizer.py
├── configs/
│ ├── AVA/
│ │ ├── SLOWFAST_32x2_R50_SHORT.yaml
│ │ ├── SLOW_8x8_R50_SHORT.yaml
│ │ └── c2/
│ │ ├── SLOWFAST_32x2_R101_50_50.yaml
│ │ ├── SLOWFAST_32x2_R101_50_50_v2.1.yaml
│ │ ├── SLOWFAST_32x2_R50.yaml
│ │ ├── SLOWFAST_64x2_R101_50_50.yaml
│ │ └── SLOW_8x8_R50.yaml
│ ├── Charades/
│ │ ├── SLOWFAST_16x8_R50.yaml
│ │ ├── SLOWFAST_16x8_R50_multigrid.yaml
│ │ └── pytorchvideo/
│ │ ├── SLOWFAST_8x8_R50.yaml
│ │ └── SLOW_8x8_R50.yaml
│ ├── ImageNet/
│ │ ├── MVIT_B_16_CONV.yaml
│ │ └── RES_R50.yaml
│ ├── K400/
│ │ ├── K400_MLP_B16x4.yaml
│ │ ├── K400_MLP_B32x4.yaml
│ │ ├── K400_MLP_S16x4.yaml
│ │ └── K400_MLP_S32x4.yaml
│ ├── Kinetics/
│ │ ├── C2D_8x8_R50.yaml
│ │ ├── C2D_8x8_R50_IN1K.yaml
│ │ ├── C2D_NLN_8x8_R50.yaml
│ │ ├── C2D_NLN_8x8_R50_IN1K.yaml
│ │ ├── I3D_8x8_R101.yaml
│ │ ├── I3D_8x8_R50.yaml
│ │ ├── I3D_8x8_R50_IN1K.yaml
│ │ ├── I3D_NLN_8x8_R101.yaml
│ │ ├── I3D_NLN_8x8_R50.yaml
│ │ ├── I3D_NLN_8x8_R50_IN1K.yaml
│ │ ├── MVIT_B_16x4_CONV.yaml
│ │ ├── MVIT_B_32x3_CONV.yaml
│ │ ├── MVITv2_B_32x3.yaml
│ │ ├── MVITv2_L_40x3_test.yaml
│ │ ├── MVITv2_S_16x4.yaml
│ │ ├── SLOWFAST_4x16_R50.yaml
│ │ ├── SLOWFAST_8x8_R50.yaml
│ │ ├── SLOWFAST_8x8_R50_stepwise.yaml
│ │ ├── SLOWFAST_8x8_R50_stepwise_multigrid.yaml
│ │ ├── SLOWFAST_NLN_4x16_R50.yaml
│ │ ├── SLOWFAST_NLN_8x8_R50.yaml
│ │ ├── SLOW_4x16_R50.yaml
│ │ ├── SLOW_8x8_R50.yaml
│ │ ├── SLOW_NLN_4x16_R50.yaml
│ │ ├── SLOW_NLN_8x8_R50.yaml
│ │ ├── X3D_L.yaml
│ │ ├── X3D_M.yaml
│ │ ├── X3D_S.yaml
│ │ ├── X3D_XS.yaml
│ │ ├── c2/
│ │ │ ├── C2D_NOPOOL_8x8_R50.yaml
│ │ │ ├── I3D_8x8_R50.yaml
│ │ │ ├── I3D_NLN_8x8_R50.yaml
│ │ │ ├── SLOWFAST_16x8_R101_50_50.yaml
│ │ │ ├── SLOWFAST_4x16_R50.yaml
│ │ │ ├── SLOWFAST_8x8_R101_101_101.yaml
│ │ │ ├── SLOWFAST_8x8_R101_50_101.yaml
│ │ │ ├── SLOWFAST_8x8_R101_50_50.yaml
│ │ │ ├── SLOWFAST_8x8_R50.yaml
│ │ │ ├── SLOWFAST_NLN_16x8_R101_50_50.yaml
│ │ │ ├── SLOW_4x16_R50.yaml
│ │ │ └── SLOW_8x8_R50.yaml
│ │ └── pytorchvideo/
│ │ ├── C2D_8x8_R50.yaml
│ │ ├── CSN_32x2_R101.yaml
│ │ ├── I3D_8x8_R50.yaml
│ │ ├── MVIT_B_16x4_CONV.yaml
│ │ ├── R2PLUS1D_16x4_R50.yaml
│ │ ├── SLOWFAST_16x8_R101_50_50.yaml
│ │ ├── SLOWFAST_4x16_R50.yaml
│ │ ├── SLOWFAST_8x8_R101.yaml
│ │ ├── SLOWFAST_8x8_R50.yaml
│ │ ├── SLOW_4x16_R50.yaml
│ │ ├── SLOW_8x8_R50.yaml
│ │ ├── X3D_L.yaml
│ │ ├── X3D_M.yaml
│ │ ├── X3D_S.yaml
│ │ └── X3D_XS.yaml
│ ├── SSV1/
│ │ ├── SSV1_MLP_B16.yaml
│ │ ├── SSV1_MLP_B32.yaml
│ │ ├── SSV1_MLP_S16.yaml
│ │ └── SSV1_MLP_S32.yaml
│ ├── SSV2/
│ │ ├── SSV2_MLP_B16.yaml
│ │ ├── SSV2_MLP_B32.yaml
│ │ ├── SSV2_MLP_S16.yaml
│ │ └── SSV2_MLP_S32.yaml
│ ├── SSv2/
│ │ ├── SLOWFAST_16x8_R50.yaml
│ │ ├── SLOWFAST_16x8_R50_multigrid.yaml
│ │ └── pytorchvideo/
│ │ ├── SLOWFAST_8x8_R50.yaml
│ │ └── SLOW_8x8_R50.yaml
│ └── ssl/
│ ├── BYOL_SlowR50_8x8.yaml
│ ├── MoCo_SlowR50_8x8.yaml
│ ├── SimCLR_SlowR50_8x8.yaml
│ ├── SwAV_Slow_R50_8x8.yaml
│ ├── finetune_SSv2_Slow_R50_syn0.yaml
│ ├── finetune_SSv2_Slow_R50_syn8.yaml
│ ├── finetune_ava_Slow_R50_syn0.yaml
│ ├── finetune_ava_Slow_R50_syn8.yaml
│ ├── finetune_ucf_Slow_R50_syn0.yaml
│ ├── finetune_ucf_Slow_R50_syn8.yaml
│ ├── linear_k400_Slow_8x8_R50_syn0.yaml
│ └── linear_k400_Slow_8x8_R50_syn8.yaml
├── data_list/
│ ├── k400/
│ │ ├── kinetics_400_categroies.txt
│ │ ├── test.csv
│ │ ├── train.csv
│ │ └── val.csv
│ ├── sthv1/
│ │ ├── category.txt
│ │ ├── somesomev1_rgb_train_split.txt
│ │ └── somesomev1_rgb_validation_split.txt
│ └── sthv2/
│ ├── category.txt
│ ├── somesomev2_rgb_test_split.txt
│ ├── somesomev2_rgb_train_split.txt
│ └── somesomev2_rgb_validation_split.txt
├── demo/
│ ├── AVA/
│ │ └── SLOWFAST_32x2_R101_50_50.yaml
│ └── Kinetics/
│ └── SLOWFAST_8x8_R50.yaml
├── linter.sh
├── mlp_images/
│ └── IMAGE.md
├── pretrained/
│ └── note.md
├── projects/
│ ├── multigrid/
│ │ └── README.md
│ ├── mvit/
│ │ └── README.md
│ ├── mvitv2/
│ │ └── README.md
│ ├── pytorchvideo/
│ │ └── README.md
│ ├── ssl/
│ │ └── README.md
│ └── x3d/
│ └── README.md
├── setup.cfg
├── setup.py
├── slowfast/
│ ├── __init__.py
│ ├── config/
│ │ ├── __init__.py
│ │ ├── custom_config.py
│ │ └── defaults.py
│ ├── datasets/
│ │ ├── DATASET.md
│ │ ├── __init__.py
│ │ ├── ava_dataset.py
│ │ ├── ava_helper.py
│ │ ├── build.py
│ │ ├── charades.py
│ │ ├── cv2_transform.py
│ │ ├── decoder.py
│ │ ├── imagenet.py
│ │ ├── kinetics.py
│ │ ├── loader.py
│ │ ├── mixup.py
│ │ ├── multigrid_helper.py
│ │ ├── ptv_datasets.py
│ │ ├── rand_augment.py
│ │ ├── random_erasing.py
│ │ ├── ssv2.py
│ │ ├── sth.py
│ │ ├── transform.py
│ │ ├── utils.py
│ │ └── video_container.py
│ ├── models/
│ │ ├── __init__.py
│ │ ├── attention.py
│ │ ├── batchnorm_helper.py
│ │ ├── build.py
│ │ ├── common.py
│ │ ├── contrastive.py
│ │ ├── custom_video_model_builder.py
│ │ ├── head_helper.py
│ │ ├── losses.py
│ │ ├── morphmlp.py
│ │ ├── morphmlp_32.py
│ │ ├── nonlocal_helper.py
│ │ ├── operators.py
│ │ ├── optimizer.py
│ │ ├── ptv_model_builder.py
│ │ ├── resnet_helper.py
│ │ ├── stem_helper.py
│ │ ├── utils.py
│ │ └── video_model_builder.py
│ ├── utils/
│ │ ├── __init__.py
│ │ ├── ava_eval_helper.py
│ │ ├── ava_evaluation/
│ │ │ ├── README.md
│ │ │ ├── __init__.py
│ │ │ ├── ava_action_list_v2.1_for_activitynet_2018.pbtxt.txt
│ │ │ ├── label_map_util.py
│ │ │ ├── metrics.py
│ │ │ ├── np_box_list.py
│ │ │ ├── np_box_list_ops.py
│ │ │ ├── np_box_mask_list.py
│ │ │ ├── np_box_mask_list_ops.py
│ │ │ ├── np_box_ops.py
│ │ │ ├── np_mask_ops.py
│ │ │ ├── object_detection_evaluation.py
│ │ │ ├── per_image_evaluation.py
│ │ │ └── standard_fields.py
│ │ ├── benchmark.py
│ │ ├── bn_helper.py
│ │ ├── c2_model_loading.py
│ │ ├── checkpoint.py
│ │ ├── checkpoint_amp.py
│ │ ├── distributed.py
│ │ ├── env.py
│ │ ├── logging.py
│ │ ├── lr_policy.py
│ │ ├── meters.py
│ │ ├── metrics.py
│ │ ├── misc.py
│ │ ├── multigrid.py
│ │ ├── multiprocessing.py
│ │ ├── parser.py
│ │ └── weight_init_helper.py
│ └── visualization/
│ ├── __init__.py
│ ├── async_predictor.py
│ ├── ava_demo_precomputed_boxes.py
│ ├── demo_loader.py
│ ├── gradcam_utils.py
│ ├── prediction_vis.py
│ ├── predictor.py
│ ├── tensorboard_vis.py
│ ├── utils.py
│ └── video_visualizer.py
└── tools/
├── benchmark.py
├── demo_net.py
├── run_net.py
├── test_net.py
├── train_net.py
└── visualization.py
================================================
FILE CONTENTS
================================================
================================================
FILE: DATASET.md
================================================
# Dataset Preparation
We provide our labels in `data_list`.
## Kinetics
The Kinetics Dataset could be downloaded via the code released by ActivityNet:
1. Download the videos via the official [scripts](https://github.com/activitynet/ActivityNet/tree/master/Crawler/Kinetics). You also can download videos as show in [cvdfoundation](https://github.com/cvdfoundation/kinetics-dataset).
2. After all the videos were downloaded, resize the video to the short edge size of 320, then prepare the csv files for training, validation, and testing set as `train.csv`, `val.csv`, `test.csv` in `data_list/k400`. The format of the csv file is:
```
path_to_video_1,label_1
path_to_video_2,label_2
path_to_video_3,label_3
...
path_to_video_N,label_N
```
Note that we use `decord` to decode the Kinetics videos on the fly.
> Since some videos may no longer be available, it will lead to small performance gap. If necessary, we will provide our version of Kinetics-400.
## Something-Something V2
1. Please download the dataset and annotations from [dataset provider](https://20bn.com/datasets/something-something).
2. Download the *frame list* from the following links: ([train](https://dl.fbaipublicfiles.com/pyslowfast/dataset/ssv2/frame_lists/train.csv), [val](https://dl.fbaipublicfiles.com/pyslowfast/dataset/ssv2/frame_lists/val.csv)).
3. Extract the frames at 30 FPS. (We used ffmpeg-4.1.3 with command
`ffmpeg -i "${video}" -r 30 -q:v 1 "${out_name}"`
in experiments.) Please put the frames in a structure consistent with the frame lists.
Please put all annotation json files and the frame lists in the same folder, and set `DATA.PATH_TO_DATA_DIR` to this folder path e.g., `data_list/sthv1` and `data_list/sthv2`. Set `DATA.PATH_PREFIX` to be the path to the folder containing extracted frames.
> Since the web page of 20BN is not accessible, we will provide the raw datasets if necessary.
================================================
FILE: INSTALL.md
================================================
# Installation
## Requirements
- Python >= 3.6
- Numpy
- PyTorch >= 1.5
- [fvcore](https://github.com/facebookresearch/fvcore/): `pip install 'git+https://github.com/facebookresearch/fvcore'`
- [torchvision](https://github.com/pytorch/vision/) that matches the PyTorch installation.
You can install them together at [pytorch.org](https://pytorch.org) to make sure of this.
- simplejson: `pip install simplejson`
- GCC >= 4.9
- PyAV: `conda install av -c conda-forge`
- ffmpeg (4.0 is prefereed, will be installed along with PyAV)
- PyYaml: (will be installed along with fvcore)
- tqdm: (will be installed along with fvcore)
- iopath: `pip install -U iopath` or `conda install -c iopath iopath`
- psutil: `pip install psutil`
- OpenCV: `pip install opencv-python`
- torchvision: `pip install torchvision` or `conda install torchvision -c pytorch`
- tensorboard: `pip install tensorboard`
- moviepy: (optional, for visualizing video on tensorboard) `conda install -c conda-forge moviepy` or `pip install moviepy`
- PyTorchVideo: `pip install pytorchvideo`
- Decord: `pip install decord`
Different from the [SlowFast](https://github.com/facebookresearch/SlowFast) repository, we remove some codes using [Detectron2](https://github.com/facebookresearch/detectron2) for easy installation, which are about detection and visulazation. If you want to used them, please follow the [SlowFast](https://github.com/facebookresearch/SlowFast) repository.
## Add this repository to $PYTHONPATH
```shell
export PYTHONPATH=/path/to/MorphMLP/slowfast:$PYTHONPATH
```
## Build MorphMLP
After having the above dependencies, run:
```shell
git clone https://github.com/MTLab/MorphMLP
cd MorphMLP
python setup.py build develop
```
================================================
FILE: LICENSE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright 2022, Meitu, Inc
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: README.md
================================================
# [ECCV2022] MorphMLP \[[arxiv](https://arxiv.org/abs/2111.12527)\]
Our MorphMLP paper was accepted to ECCV 2022!!
We current release the code and models for:
- [x] Kintics-400
- [x] Something-Something V1
- [x] Something-Something V2
- ImageNet-1K: For our models training/testing on ImageNet-1K, and how to transfer the pretrained weight for video usage, you can refer [IMAGE.md](mlp_images/IMAGE.md).
## Update
***Aug,3rd 2022***
**\[Initial commits\]:**
1. Pretrained models on Kinetics-400, Something-Something V1
## Model Zoo
> The ImageNet-1K pretrained models, followed models and logs can be downloaded on **Google Drive**: [total_models](https://drive.google.com/drive/folders/1VIJTQtc69l11MxDNiq-OzyPEAlUVNHIx?usp=sharing).
>
> We also release the models on **Baidu Cloud**: [total_models (bbyy)](https://pan.baidu.com/s/1L3leT9MrqzGhSMFPTPhsow?pwd=bbyy).
### Note
- All the models are pretrained on ImageNet-1K. You can find those pre-trained models in [pretrained](https://drive.google.com/drive/folders/105DRws977iNnjEv-Hjfix5Q3JtzqoDUm?usp=sharing) and put them in `pretrained` folder.
- \#Frame = \#input_frame x \#crop x \#clip
- \#input_frame means how many frames are input for model per inference
- \#crop means spatial crops (e.g., 3 for left/right/center)
- \#clip means temporal clips (e.g., 4 means repeted sampling four clips with different start indices)
### Kinetics-400
| Model | #Frame | Sampling Stride | FLOPs | Top1 | Model | Log | config |
| ----------- | ------ | --------------- | ----- | ---- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
| MorphMLP-S | 16x1x4 | 4 | 268G | 78.7 | [google](https://drive.google.com/file/d/12qhWov_iUi2EXHM85P7eaLC88r8Eigty/view?usp=share_link) | [google](https://drive.google.com/file/d/1WEYSe1RK68EHRehBZgzLUEOKDX_JGBsZ/view?usp=sharing) |[config](configs/K400/K400_MLP_S16x4.yaml) |
| MorphMLP-S | 32x1x4 | 4 | 532G | 79.7 | [google](https://drive.google.com/file/d/12qhWov_iUi2EXHM85P7eaLC88r8Eigty/view?usp=share_link) | [google](https://drive.google.com/file/d/1ik9_OnG85boYGqXwN3TIaf1nDnE-J50V/view?usp=sharing) | [config](configs/K400/K400_MLP_S32x4.yaml) |
| MorphMLP-B | 16x1x4 | 4 | 392G | 79.5 | [google](https://drive.google.com/file/d/1bmJcpcln9fVEj_o8fzFYHjup7dRGbJeD/view?usp=sharing) | [google](https://drive.google.com/file/d/1uazG3dahCxms2V1MMuvntkTkcCp0A1oV/view?usp=sharing) | [config](configs/K400/K400_MLP_B16x4.yaml) |
| MorphMLP-B | 32x1x4 | 4 | 788G | 80.8 | [google](https://drive.google.com/file/d/17iu9L5lnQ0CucXV1vvlJZDAF2RfXqdYu/view?usp=sharing) | [google](https://drive.google.com/file/d/17sCcKYb5F2axvPFd_TWAk74fC2bBGdK1/view?usp=sharing) | [config](configs/K400/K400_MLP_B32x4.yaml) |
### Something-Something V1
| Model | Pretrain | #Frame | FLOPs | Top1 | Model | Log | config |
| ----------- | -------- | ------ | ----- | ---- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
| MorphMLP-S | IN-1K | 16x1x1 | 67G | 50.6 | [soon] | [soon] |[config](configs/SSV1/SSV1_MLP_S16.yaml) |
| MorphMLP-S | IN-1K | 16x3x1 | 201G | 53.9 | [soon] | [soon] |[config](configs/SSV1/SSV1_MLP_S32.yaml) |
| MorphMLP-B | IN-1K | 16x3x1 | 294G | 55.1 | [google](https://drive.google.com/file/d/1Cz4xQ4Uad9AiQbmTwDElml_Dxe1Nw2SN/view?usp=sharing) | [google](https://drive.google.com/file/d/1QJ7QgB1TTrlbJMfYyMm4hmt0AW9YxkJz/view?usp=sharing) |[config](configs/SSV1/SSV1_MLP_B16.yaml) |
| MorphMLP-B | IN-1K | 32x3x1 | 591G | 57.4 | [google](https://drive.google.com/file/d/1yxwoR53L0qRU44MRx9gZM2D6YPU4_eZw/view?usp=sharing) | [google](https://drive.google.com/file/d/1YVHPDKhtjFvcSrAwXZWswBhgoyRR6q8g/view?usp=sharing) |[config](configs/SSV1/SSV1_MLP_B32.yaml) |
### Something-Something V2
| Model | Pretrain | #Frame | FLOPs | Top1 | Model | Log | config |
| ----------- | -------- | ------ | ----- | ---- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
| MorphMLP-S | IN-1K | 16x3x1 | 201G | 67.1 | [soon] | [soon] |[config](configs/SSV2/SSV2_MLP_S16.yaml) |
| MorphMLP-S | IN-1K | 32x3x1 | 405G | 68.3 | [soon] | [soon] |[config](configs/SSV2/SSV2_MLP_S32.yaml) |
| MorphMLP-B | IN-1K | 16x3x1 | 294G | 67.6 | [soon]| [soon] |[config](configs/SSV2/SSV2_MLP_B16.yaml) |
| MorphMLP-B | IN-1K | 32x3x1 | 591G | 70.1 | [soon]| [soon] |[config](configs/SSV2/SSV2_MLP_B32.yaml) |
## Usage
### Installation
Please follow the installation instructions in [INSTALL.md](INSTALL.md). You may follow the instructions in [DATASET.md](DATASET.md) to prepare the datasets.
### Training
1. Download the pretrained models into the pretrained folder.
2. Simply run the training code as followed:
```shell
python3 tools/run_net.py --cfg configs/K400/K400_MLP_S16x4.yaml DATA.PATH_PREFIX path_to_data OUTPUT_DIR your_save_path
```
**[Note]:**
- You can change the configs files to determine which type of the experiments.
- For more config details, you can read the comments in `slowfast/config/defaults.py`.
- To avoid **out of memory**, you can use `torch.utils.checkpoint` (will be updated soon):
### Testing
We provide testing example as followed:
### Kinetics400
```shell
python3 tools/run_net.py --cfg configs/K400/K400_MLP_S16x4.yaml DATA.PATH_PREFIX path_to_data TRAIN.ENABLE False TEST.NUM_ENSEMBLE_VIEWS 4 TEST.NUM_SPATIAL_CROPS 1 TEST.CHECKPOINT_FILE_PATH your_model_path OUTPUT_DIR your_output_dir
```
### SomethingV1&V2
```shell
python3 tools/run_net.py --cfg configs/SSV1/SSV1_MLP_B32.yaml DATA.PATH_PREFIX your_data_path TEST.NUM_ENSEMBLE_VIEWS 1 TEST.NUM_SPATIAL_CROPS 3 TEST.CHECKPOINT_FILE_PATH your_model_path OUTPUT_DIR your_output_dir
```
Specifically, we need to set the number of crops&clips and your checkpoint path then run multi-crop/multi-clip test:
Set the number of crops and clips:
**Multi-clip testing for Kinetics**
```shell
TEST.NUM_ENSEMBLE_VIEWS 4
TEST.NUM_SPATIAL_CROPS 1
```
**Multi-crop testing for Something-Something**
```shell
TEST.NUM_ENSEMBLE_VIEWS 1
TEST.NUM_SPATIAL_CROPS 3
```
You can also set the checkpoint path via:
```shell
TEST.CHECKPOINT_FILE_PATH your_model_path
```
## Cite MorphMLP
If you find this repository useful, please use the following BibTeX entry for citation.
```latex
@article{zhang2021morphmlp,
title={Morphmlp: A self-attention free, mlp-like backbone for image and video},
author={Zhang, David Junhao and Li, Kunchang and Chen, Yunpeng and Wang, Yali and Chandra, Shashwat and Qiao, Yu and Liu, Luoqi and Shou, Mike Zheng},
journal={arXiv preprint arXiv:2111.12527},
year={2021}
}
```
## Acknowledgement
This repository is built based on [SlowFast](https://github.com/facebookresearch/SlowFast) and [Uniformer](https://github.com/Sense-X/UniFormer) repository.
================================================
FILE: VISUALIZATION_TOOLS.md
================================================
# Visualization Tools for PySlowFast
This document provides a brief intro for running various visualization tools provided with PySlowFast. Before launching any job, make sure you have properly installed the PySlowFast following the instruction in [README.md](README.md) and you have prepared the dataset following [DATASET.md](slowfast/datasets/DATASET.md) with the correct format.
## Tensorboard Support for Train/Eval/Test
We provide Tensorboard support during the train/eval/test pipeline to assist live monitoring various metrics, and class-level performance
with loss/error graphs, confusion matrices and histograms. Enable Tensorboard support by adding the following to your yaml config file:
```
TENSORBOARD:
ENABLE: True
LOG_DIR: # Leave empty to use cfg.OUTPUT_DIR/runs-{cfg.TRAIN.DATASET} as path.
CLASS_NAMES_PATH: # Path to json file providing class_name - id mapping.
CONFUSION_MATRIX:
ENABLE: True
SUBSET_PATH: # Path to txt file contains class names separated by newline characters.
# Only classes in this file will be visualized in the confusion matrix.
HISTOGRAM:
ENABLE: True
TOP_K: 10 # Top-k most frequently predicted classes for each class in the dataset.
SUBSET_PATH: # Path to txt file contains class names separated by newline characters.
# Only classes in this file will be visualized with histograms.
```
More details can be found at [defaults.py](slowfast/config/defaults.py)
### Loss & Error Graphs on Tensorboard:
<div align="center">
<img src="demo/visualization/metrics/loss.png" width="800px"/>
</div>
### Confusion matrices:
<div align="center">
<img src="demo/visualization/metrics/cf_subset.png" width="367px" style="margin:10px;"/>
<img src="demo/visualization/metrics/cf_parent.png" width="350px" style="margin:10px;"/>
</div>
<div align="center">
</div>
To enable this mode, set:
```
TENSORBOARD:
ENABLE: True
CATEGORIES_PATH: # Path to a json file for categories -> classes mapping
# in the format {"parent_class": ["child_class1", "child_class2",...], ...}.
CONFUSION_MATRIX:
ENABLE: True
```
### Histograms of top-k most frequent predictions:
<div align="center">
<img src="demo/visualization/metrics/hist1.png" width="400px" style="margin:10px;"/>
<img src="demo/visualization/metrics/hist2.png" width="406px" style="margin:10px;"/>
</div>
## Model Analysis
In addition, we provide tools to help with understanding your trained model(s), more options at [defaults.py](slowfast/config/defaults.py)
Adding the following to your yaml config file:
```
TENSORBOARD:
ENABLE: True
MODEL_VIS:
ENABLE: True
MODEL_WEIGHTS: # Set to True to visualize model weights.
ACTIVATIONS: # Set to True to visualize feature maps.
INPUT_VIDEO: # Set to True to visualize the input video(s) for the corresponding feature maps.
LAYER_LIST: # List of layer names to visualize weights and activations for.
GRAD_CAM:
ENABLE: True
LAYER_LIST: # List of CNN layers to use for Grad-CAM visualization method.
# The number of layer must be equal to the number of pathway(s).
```
### Weights Visualization on Tensorboard:
<div align="center">
<img src="demo/visualization/analysis/weights1.png" width="300px" style="margin:10px;"/>
<img src="demo/visualization/analysis/weights2.png" width="328px" style="margin:18px;"/>
</div>
### Feature Maps & Inputs Visualization:
<div align="center">
<img src="demo/visualization/analysis/activations.gif" width="800px"/>
</div>
### Input Videos Visualization with Grad-CAM:
<div align="center">
<img src="demo/visualization/analysis/gradcam.gif" width="400px" style="margin:10px;"/>
<img src="demo/visualization/analysis/gradcam2.gif" width="400px" style="margin:10px;"/>
</div>
## Run the Demo on Videos/Camera
To run inference with PySlowFast model(s) on wild video(s), add the following to your yaml config file:
```
DEMO:
ENABLE: True
LABEL_FILE_PATH: # Path to json file providing class_name - id mapping.
INPUT_VIDEO: # Path to input video file.
OUTPUT_FILE: # Path to output video file to write results to.
# Leave an empty string if you would like to display results to a window.
THREAD_ENABLE: # Run video reader/writer in the background with multi-threading.
NUM_VIS_INSTANCES: # Number of CPU(s)/processes use to run video visualizer.
NUM_CLIPS_SKIP: # Number of clips to skip prediction/visualization
# (mostly to smoothen/improve display quality with wecam input).
```
If you would like to use webcam as an input, in place of `DEMO.INPUT_VIDEO`, set `DEMO.WEBCAM` to the index of the webcam for input. Please check for more options at [defaults.py](slowfast/config/defaults.py)
### Action Recognition Demo:
<div align="center">
<img src="demo/visualization/demo_gifs/recognition.gif" width="600px"/>
</div>
### Action Detection Demo:
<div align="center">
<img src="demo/visualization/demo_gifs/detection.gif" width="600px" style="margin:10px;"/>
</div>
### Demo with AVA video(s):
We also offer an option to use trained models to create and visualize prediction results and ground-truth labels on AVA-format videos and metadata. An example config is:
```
DEMO:
ENABLE: True
OUTPUT_FILE: yourPath/output.mp4
LABEL_FILE_PATH: yourPath/ava_classnames.json
INPUT_VIDEO: yourPath/frames/HVAmkvLrthQ # Path to a video file or image folder
PREDS_BOXES: yourPath/ava_detection_train_boxes_and_labels_include_negative.csv # Path to pre-computed bouding boxes in AVA format.
GT_BOXES: yourPath/ava_train_v2.2.csv # Path to ground-truth boxes and labels in AVA format (optional).
```
<div align="center">
<img src="demo/visualization/demo_gifs/ava_demo2.gif" width="600px" style="margin:10px;"/>
</div>
### Run command
```
python \tools\run_net.py --cfg path/to/<pretrained_model_config_file>.yaml
```
### Download class name files
- [AVA class names json file](https://dl.fbaipublicfiles.com/pyslowfast/dataset/class_names/ava_classids.json)
- [Kinetics class names json file](https://dl.fbaipublicfiles.com/pyslowfast/dataset/class_names/kinetics_classnames.json)
- [Kinetics parent-child class mapping](https://dl.fbaipublicfiles.com/pyslowfast/dataset/class_names/parents.json)
================================================
FILE: build/lib/slowfast/__init__.py
================================================
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
from slowfast.utils.env import setup_environment
setup_environment()
================================================
FILE: build/lib/slowfast/config/__init__.py
================================================
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
================================================
FILE: build/lib/slowfast/config/custom_config.py
================================================
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
"""Add custom configs and default values"""
def add_custom_config(_C):
# Add your own customized configs.
pass
================================================
FILE: build/lib/slowfast/config/defaults.py
================================================
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
"""Configs."""
from fvcore.common.config import CfgNode
from . import custom_config
# -----------------------------------------------------------------------------
# Config definition
# -----------------------------------------------------------------------------
_C = CfgNode()
# ---------------------------------------------------------------------------- #
# Batch norm options
# ---------------------------------------------------------------------------- #
_C.BN = CfgNode()
# Precise BN stats.
_C.BN.USE_PRECISE_STATS = False
# Number of samples use to compute precise bn.
_C.BN.NUM_BATCHES_PRECISE = 200
# Weight decay value that applies on BN.
_C.BN.WEIGHT_DECAY = 0.0
# Norm type, options include `batchnorm`, `sub_batchnorm`, `sync_batchnorm`
_C.BN.NORM_TYPE = "batchnorm"
# Parameter for SubBatchNorm, where it splits the batch dimension into
# NUM_SPLITS splits, and run BN on each of them separately independently.
_C.BN.NUM_SPLITS = 1
# Parameter for NaiveSyncBatchNorm3d, where the stats across `NUM_SYNC_DEVICES`
# devices will be synchronized.
_C.BN.NUM_SYNC_DEVICES = 1
# ---------------------------------------------------------------------------- #
# Training options.
# ---------------------------------------------------------------------------- #
_C.TRAIN = CfgNode()
# If True Train the model, else skip training.
_C.TRAIN.ENABLE = True
# Dataset.
_C.TRAIN.DATASET = "kinetics"
# Total mini-batch size.
_C.TRAIN.BATCH_SIZE = 64
# Evaluate model on test data every eval period epochs.
_C.TRAIN.EVAL_PERIOD = 10
# Save model checkpoint every checkpoint period epochs.
_C.TRAIN.CHECKPOINT_PERIOD = 10
# Resume training from the latest checkpoint in the output directory.
_C.TRAIN.AUTO_RESUME = True
# Path to the checkpoint to load the initial weight.
_C.TRAIN.CHECKPOINT_FILE_PATH = ""
# Checkpoint types include `caffe2` or `pytorch`.
_C.TRAIN.CHECKPOINT_TYPE = "pytorch"
# If True, perform inflation when loading checkpoint.
_C.TRAIN.CHECKPOINT_INFLATE = False
# If True, reset epochs when loading checkpoint.
_C.TRAIN.CHECKPOINT_EPOCH_RESET = False
# If set, clear all layer names according to the pattern provided.
_C.TRAIN.CHECKPOINT_CLEAR_NAME_PATTERN = () # ("backbone.",)
# If True, use FP16 for activations
_C.TRAIN.MIXED_PRECISION = False
# ---------------------------------------------------------------------------- #
# Augmentation options.
# ---------------------------------------------------------------------------- #
_C.AUG = CfgNode()
# Whether to enable randaug.
_C.AUG.ENABLE = False
# Number of repeated augmentations to used during training.
# If this is greater than 1, then the actual batch size is
# TRAIN.BATCH_SIZE * AUG.NUM_SAMPLE.
_C.AUG.NUM_SAMPLE = 1
# Not used if using randaug.
_C.AUG.COLOR_JITTER = 0.4
# RandAug parameters.
_C.AUG.AA_TYPE = "rand-m9-mstd0.5-inc1"
# Interpolation method.
_C.AUG.INTERPOLATION = "bicubic"
# Probability of random erasing.
_C.AUG.RE_PROB = 0.25
# Random erasing mode.
_C.AUG.RE_MODE = "pixel"
# Random erase count.
_C.AUG.RE_COUNT = 1
# Do not random erase first (clean) augmentation split.
_C.AUG.RE_SPLIT = False
# ---------------------------------------------------------------------------- #
# MipUp options.
# ---------------------------------------------------------------------------- #
_C.MIXUP = CfgNode()
# Whether to use mixup.
_C.MIXUP.ENABLE = False
# Mixup alpha.
_C.MIXUP.ALPHA = 0.8
# Cutmix alpha.
_C.MIXUP.CUTMIX_ALPHA = 1.0
# Probability of performing mixup or cutmix when either/both is enabled.
_C.MIXUP.PROB = 1.0
# Probability of switching to cutmix when both mixup and cutmix enabled.
_C.MIXUP.SWITCH_PROB = 0.5
# Label smoothing.
_C.MIXUP.LABEL_SMOOTH_VALUE = 0.1
# ---------------------------------------------------------------------------- #
# Testing options
# ---------------------------------------------------------------------------- #
_C.TEST = CfgNode()
# If True test the model, else skip the testing.
_C.TEST.ENABLE = True
# Dataset for testing.
_C.TEST.DATASET = "kinetics"
# Total mini-batch size
_C.TEST.BATCH_SIZE = 8
# Path to the checkpoint to load the initial weight.
_C.TEST.CHECKPOINT_FILE_PATH = ""
# Number of clips to sample from a video uniformly for aggregating the
# prediction results.
_C.TEST.NUM_ENSEMBLE_VIEWS = 10
# Number of crops to sample from a frame spatially for aggregating the
# prediction results.
_C.TEST.NUM_SPATIAL_CROPS = 3
# Checkpoint types include `caffe2` or `pytorch`.
_C.TEST.CHECKPOINT_TYPE = "pytorch"
# Path to saving prediction results file.
_C.TEST.SAVE_RESULTS_PATH = ""
# -----------------------------------------------------------------------------
# ResNet options
# -----------------------------------------------------------------------------
_C.RESNET = CfgNode()
# Transformation function.
_C.RESNET.TRANS_FUNC = "bottleneck_transform"
# Number of groups. 1 for ResNet, and larger than 1 for ResNeXt).
_C.RESNET.NUM_GROUPS = 1
# Width of each group (64 -> ResNet; 4 -> ResNeXt).
_C.RESNET.WIDTH_PER_GROUP = 64
# Apply relu in a inplace manner.
_C.RESNET.INPLACE_RELU = True
# Apply stride to 1x1 conv.
_C.RESNET.STRIDE_1X1 = False
# If true, initialize the gamma of the final BN of each block to zero.
_C.RESNET.ZERO_INIT_FINAL_BN = False
# Number of weight layers.
_C.RESNET.DEPTH = 50
# If the current block has more than NUM_BLOCK_TEMP_KERNEL blocks, use temporal
# kernel of 1 for the rest of the blocks.
_C.RESNET.NUM_BLOCK_TEMP_KERNEL = [[3], [4], [6], [3]]
# Size of stride on different res stages.
_C.RESNET.SPATIAL_STRIDES = [[1], [2], [2], [2]]
# Size of dilation on different res stages.
_C.RESNET.SPATIAL_DILATIONS = [[1], [1], [1], [1]]
# -----------------------------------------------------------------------------
# MORPH options
# -----------------------------------------------------------------------------
_C.MORPH = CfgNode()
# layers.
_C.MORPH.LAYERS = [4, 3, 8, 3]
# whether downsample in the end.
_C.MORPH.TRANSITIONS = [True, False, False, False]
# dimesion of segment.
_C.MORPH.SEGMENT_DIM = [32, 16, 16, 16]
# temporal stride.
_C.MORPH.T_STRIDE = 1
# mlp ratio of different layer.
_C.MORPH.MLP_RATIOS = [3, 3, 3, 3]
# embedding dimensions of different layer.
_C.MORPH.EMBED_DIMS = [192, 384, 384, 384]
# patch size.
_C.MORPH.PATCH_SIZE = 7
# override default qk scale of head_dim ** -0.5 if set.
_C.MORPH.QKV_SCALE = None
# enable bias for qkv if True.
_C.MORPH.QKV_BIAS = True
# attention dropout rate.
_C.MORPH.ATTENTION_DROPOUT_RATE = 0
# stochastic depth rate.
_C.MORPH.DROP_DEPTH_RATE = 0.1
# pretrained path
_C.MORPH.PRETRAIN_PATH = None
# ---------------------------------------------------------------------------- #
# X3D options
# See https://arxiv.org/abs/2004.04730 for details about X3D Networks.
# ---------------------------------------------------------------------------- #
_C.X3D = CfgNode()
# Width expansion factor.
_C.X3D.WIDTH_FACTOR = 1.0
# Depth expansion factor.
_C.X3D.DEPTH_FACTOR = 1.0
# Bottleneck expansion factor for the 3x3x3 conv.
_C.X3D.BOTTLENECK_FACTOR = 1.0 #
# Dimensions of the last linear layer before classificaiton.
_C.X3D.DIM_C5 = 2048
# Dimensions of the first 3x3 conv layer.
_C.X3D.DIM_C1 = 12
# Whether to scale the width of Res2, default is false.
_C.X3D.SCALE_RES2 = False
# Whether to use a BatchNorm (BN) layer before the classifier, default is false.
_C.X3D.BN_LIN5 = False
# Whether to use channelwise (=depthwise) convolution in the center (3x3x3)
# convolution operation of the residual blocks.
_C.X3D.CHANNELWISE_3x3x3 = True
# -----------------------------------------------------------------------------
# Nonlocal options
# -----------------------------------------------------------------------------
_C.NONLOCAL = CfgNode()
# Index of each stage and block to add nonlocal layers.
_C.NONLOCAL.LOCATION = [[[]], [[]], [[]], [[]]]
# Number of group for nonlocal for each stage.
_C.NONLOCAL.GROUP = [[1], [1], [1], [1]]
# Instatiation to use for non-local layer.
_C.NONLOCAL.INSTANTIATION = "dot_product"
# Size of pooling layers used in Non-Local.
_C.NONLOCAL.POOL = [
# Res2
[[1, 2, 2], [1, 2, 2]],
# Res3
[[1, 2, 2], [1, 2, 2]],
# Res4
[[1, 2, 2], [1, 2, 2]],
# Res5
[[1, 2, 2], [1, 2, 2]],
]
# -----------------------------------------------------------------------------
# Model options
# -----------------------------------------------------------------------------
_C.MODEL = CfgNode()
# Model architecture.
_C.MODEL.ARCH = "slowfast"
_C.MODEL.USE_CHECKPOINT=True
_C.MODEL.CHECKPOINT_NUM=2
# Model name
_C.MODEL.MODEL_NAME = "SlowFast"
# The number of classes to predict for the model.
_C.MODEL.NUM_CLASSES = 400
# Loss function.
_C.MODEL.LOSS_FUNC = "cross_entropy"
# Model architectures that has one single pathway.
_C.MODEL.SINGLE_PATHWAY_ARCH = ["2d", "c2d", "i3d", "slow", "x3d", "mvit", "morph"]
# Model architectures that has multiple pathways.
_C.MODEL.MULTI_PATHWAY_ARCH = ["slowfast"]
# Dropout rate before final projection in the backbone.
_C.MODEL.DROPOUT_RATE = 0.5
# Randomly drop rate for Res-blocks, linearly increase from res2 to res5
_C.MODEL.DROPCONNECT_RATE = 0.0
# The std to initialize the fc layer(s).
_C.MODEL.FC_INIT_STD = 0.01
# Activation layer for the output head.
_C.MODEL.HEAD_ACT = "softmax"
# -----------------------------------------------------------------------------
# MViT options
# -----------------------------------------------------------------------------
_C.MVIT = CfgNode()
# Options include `conv`, `max`.
_C.MVIT.MODE = "conv"
# If True, perform pool before projection in attention.
_C.MVIT.POOL_FIRST = False
# If True, use cls embed in the network, otherwise don't use cls_embed in transformer.
_C.MVIT.CLS_EMBED_ON = True
# Kernel size for patchtification.
_C.MVIT.PATCH_KERNEL = [3, 7, 7]
# Stride size for patchtification.
_C.MVIT.PATCH_STRIDE = [2, 4, 4]
# Padding size for patchtification.
_C.MVIT.PATCH_PADDING = [2, 4, 4]
# If True, use 2d patch, otherwise use 3d patch.
_C.MVIT.PATCH_2D = False
# Base embedding dimension for the transformer.
_C.MVIT.EMBED_DIM = 96
# Base num of heads for the transformer.
_C.MVIT.NUM_HEADS = 1
# Dimension reduction ratio for the MLP layers.
_C.MVIT.MLP_RATIO = 4.0
# If use, use bias term in attention fc layers.
_C.MVIT.QKV_BIAS = True
# Drop path rate for the tranfomer.
_C.MVIT.DROPPATH_RATE = 0.1
# Depth of the transformer.
_C.MVIT.DEPTH = 16
# Normalization layer for the transformer. Only layernorm is supported now.
_C.MVIT.NORM = "layernorm"
# Dimension multiplication at layer i. If 2.0 is used, then the next block will increase
# the dimension by 2 times. Format: [depth_i: mul_dim_ratio]
_C.MVIT.DIM_MUL = []
# Head number multiplication at layer i. If 2.0 is used, then the next block will
# increase the number of heads by 2 times. Format: [depth_i: head_mul_ratio]
_C.MVIT.HEAD_MUL = []
# Stride size for the Pool KV at layer i.
# Format: [[i, stride_t_i, stride_h_i, stride_w_i], ...,]
_C.MVIT.POOL_KV_STRIDE = None
# Initial stride size for KV at layer 1. The stride size will be further reduced with
# the raio of MVIT.DIM_MUL. If will overwrite MVIT.POOL_KV_STRIDE if not None.
_C.MVIT.POOL_KV_STRIDE_ADAPTIVE = None
# Stride size for the Pool Q at layer i.
# Format: [[i, stride_t_i, stride_h_i, stride_w_i], ...,]
_C.MVIT.POOL_Q_STRIDE = []
# If not None, overwrite the KV_KERNEL and Q_KERNEL size with POOL_KVQ_CONV_SIZ.
# Otherwise the kernel_size is [s + 1 if s > 1 else s for s in stride_size].
_C.MVIT.POOL_KVQ_KERNEL = None
# If True, perform no decay on positional embedding and cls embedding.
_C.MVIT.ZERO_DECAY_POS_CLS = True
# If True, use norm after stem.
_C.MVIT.NORM_STEM = False
# If True, perform separate positional embedding.
_C.MVIT.SEP_POS_EMBED = False
# Dropout rate for the MViT backbone.
_C.MVIT.DROPOUT_RATE = 0.0
# -----------------------------------------------------------------------------
# SlowFast options
# -----------------------------------------------------------------------------
_C.SLOWFAST = CfgNode()
# Corresponds to the inverse of the channel reduction ratio, $\beta$ between
# the Slow and Fast pathways.
_C.SLOWFAST.BETA_INV = 8
# Corresponds to the frame rate reduction ratio, $\alpha$ between the Slow and
# Fast pathways.
_C.SLOWFAST.ALPHA = 8
# Ratio of channel dimensions between the Slow and Fast pathways.
_C.SLOWFAST.FUSION_CONV_CHANNEL_RATIO = 2
# Kernel dimension used for fusing information from Fast pathway to Slow
# pathway.
_C.SLOWFAST.FUSION_KERNEL_SZ = 5
# -----------------------------------------------------------------------------
# Data options
# -----------------------------------------------------------------------------
_C.DATA = CfgNode()
_C.DATA.LABEL_PATH_TEMPLATE="somesomev1_rgb_{}_split.txt"
_C.DATA.IMAGE_TEMPLATE="{:05d}.jpg"
# The path to the data directory.
_C.DATA.PATH_TO_DATA_DIR = "/mnt/bd/jh-backbone/UniFormer/video_classification/data_list/sthv1"
# The separator used between path and label.
_C.DATA.PATH_LABEL_SEPARATOR = " "
# Video path prefix if any.
_C.DATA.PATH_PREFIX = ""
# The number of frames of the input clip.
_C.DATA.NUM_FRAMES = 8
# The video sampling rate of the input clip.
_C.DATA.SAMPLING_RATE = 8
# Eigenvalues for PCA jittering. Note PCA is RGB based.
_C.DATA.TRAIN_PCA_EIGVAL = [0.225, 0.224, 0.229]
# Eigenvectors for PCA jittering.
_C.DATA.TRAIN_PCA_EIGVEC = [
[-0.5675, 0.7192, 0.4009],
[-0.5808, -0.0045, -0.8140],
[-0.5836, -0.6948, 0.4203],
]
# If a imdb have been dumpped to a local file with the following format:
# `{"im_path": im_path, "class": cont_id}`
# then we can skip the construction of imdb and load it from the local file.
_C.DATA.PATH_TO_PRELOAD_IMDB = ""
# The mean value of the video raw pixels across the R G B channels.
_C.DATA.MEAN = [0.45, 0.45, 0.45]
# List of input frame channel dimensions.
_C.DATA.INPUT_CHANNEL_NUM = [3, 3]
# The std value of the video raw pixels across the R G B channels.
_C.DATA.STD = [0.225, 0.225, 0.225]
# The spatial augmentation jitter scales for training.
_C.DATA.TRAIN_JITTER_SCALES = [256, 320]
# The relative scale range of Inception-style area based random resizing augmentation.
# If this is provided, DATA.TRAIN_JITTER_SCALES above is ignored.
_C.DATA.TRAIN_JITTER_SCALES_RELATIVE = []
# The relative aspect ratio range of Inception-style area based random resizing
# augmentation.
_C.DATA.TRAIN_JITTER_ASPECT_RELATIVE = []
# If True, perform stride length uniform temporal sampling.
_C.DATA.USE_OFFSET_SAMPLING = False
# Whether to apply motion shift for augmentation.
_C.DATA.TRAIN_JITTER_MOTION_SHIFT = False
# The spatial crop size for training.
_C.DATA.TRAIN_CROP_SIZE = 224
# The spatial crop size for testing.
_C.DATA.TEST_CROP_SIZE = 256
# Input videos may has different fps, convert it to the target video fps before
# frame sampling.
_C.DATA.TARGET_FPS = 30
# Decoding backend, options include `pyav` or `torchvision`
_C.DATA.DECODING_BACKEND = "pyav"
# if True, sample uniformly in [1 / max_scale, 1 / min_scale] and take a
# reciprocal to get the scale. If False, take a uniform sample from
# [min_scale, max_scale].
_C.DATA.INV_UNIFORM_SAMPLE = False
# If True, perform random horizontal flip on the video frames during training.
_C.DATA.RANDOM_FLIP = True
# If True, calculdate the map as metric.
_C.DATA.MULTI_LABEL = False
# Method to perform the ensemble, options include "sum" and "max".
_C.DATA.ENSEMBLE_METHOD = "sum"
# If True, revert the default input channel (RBG <-> BGR).
_C.DATA.REVERSE_INPUT_CHANNEL = False
# ---------------------------------------------------------------------------- #
# Optimizer options
# ---------------------------------------------------------------------------- #
_C.SOLVER = CfgNode()
# Base learning rate.
_C.SOLVER.BASE_LR = 0.1
# Learning rate policy (see utils/lr_policy.py for options and examples).
_C.SOLVER.LR_POLICY = "cosine"
# Final learning rates for 'cosine' policy.
_C.SOLVER.COSINE_END_LR = 0.0
# Exponential decay factor.
_C.SOLVER.GAMMA = 0.1
# Step size for 'exp' and 'cos' policies (in epochs).
_C.SOLVER.STEP_SIZE = 1
# Steps for 'steps_' policies (in epochs).
_C.SOLVER.STEPS = []
# Learning rates for 'steps_' policies.
_C.SOLVER.LRS = []
# Maximal number of epochs.
_C.SOLVER.MAX_EPOCH = 300
# Momentum.
_C.SOLVER.MOMENTUM = 0.9
# Momentum dampening.
_C.SOLVER.DAMPENING = 0.0
# Nesterov momentum.
_C.SOLVER.NESTEROV = True
# L2 regularization.
_C.SOLVER.WEIGHT_DECAY = 1e-4
# Start the warm up from SOLVER.BASE_LR * SOLVER.WARMUP_FACTOR.
_C.SOLVER.WARMUP_FACTOR = 0.1
# Gradually warm up the SOLVER.BASE_LR over this number of epochs.
_C.SOLVER.WARMUP_EPOCHS = 0.0
# The start learning rate of the warm up.
_C.SOLVER.WARMUP_START_LR = 0.01
# Optimization method.
_C.SOLVER.OPTIMIZING_METHOD = "sgd"
# Base learning rate is linearly scaled with NUM_SHARDS.
_C.SOLVER.BASE_LR_SCALE_NUM_SHARDS = False
# If True, start from the peak cosine learning rate after warm up.
_C.SOLVER.COSINE_AFTER_WARMUP = False
# If True, perform no weight decay on parameter with one dimension (bias term, etc).
_C.SOLVER.ZERO_WD_1D_PARAM = False
# Clip gradient at this value before optimizer update
_C.SOLVER.CLIP_GRAD_VAL = None
# Clip gradient at this norm before optimizer update
_C.SOLVER.CLIP_GRAD_L2NORM = None
_C.SOLVER.CLIP_GRADIENT = 20
# ---------------------------------------------------------------------------- #
# Misc options
# ---------------------------------------------------------------------------- #
# Number of GPUs to use (applies to both training and testing).
_C.NUM_GPUS = 1
# Number of machine to use for the job.
_C.NUM_SHARDS = 1
# The index of the current machine.
_C.SHARD_ID = 0
# Output basedir.
_C.OUTPUT_DIR = "./tmp"
# Note that non-determinism may still be present due to non-deterministic
# operator implementations in GPU operator libraries.
_C.RNG_SEED = 1
# Log period in iters.
_C.LOG_PERIOD = 10
# If True, log the model info.
_C.LOG_MODEL_INFO = True
# Distributed backend.
_C.DIST_BACKEND = "nccl"
# ---------------------------------------------------------------------------- #
# Benchmark options
# ---------------------------------------------------------------------------- #
_C.BENCHMARK = CfgNode()
# Number of epochs for data loading benchmark.
_C.BENCHMARK.NUM_EPOCHS = 5
# Log period in iters for data loading benchmark.
_C.BENCHMARK.LOG_PERIOD = 100
# If True, shuffle dataloader for epoch during benchmark.
_C.BENCHMARK.SHUFFLE = False
# ---------------------------------------------------------------------------- #
# Common train/test data loader options
# ---------------------------------------------------------------------------- #
_C.DATA_LOADER = CfgNode()
# Number of data loader workers per training process.
_C.DATA_LOADER.NUM_WORKERS = 8
# Load data to pinned host memory.
_C.DATA_LOADER.PIN_MEMORY = True
# Enable multi thread decoding.
_C.DATA_LOADER.ENABLE_MULTI_THREAD_DECODE = False
# ---------------------------------------------------------------------------- #
# Detection options.
# ---------------------------------------------------------------------------- #
_C.DETECTION = CfgNode()
# Whether enable video detection.
_C.DETECTION.ENABLE = False
# Aligned version of RoI. More details can be found at slowfast/models/head_helper.py
_C.DETECTION.ALIGNED = True
# Spatial scale factor.
_C.DETECTION.SPATIAL_SCALE_FACTOR = 16
# RoI tranformation resolution.
_C.DETECTION.ROI_XFORM_RESOLUTION = 7
# -----------------------------------------------------------------------------
# AVA Dataset options
# -----------------------------------------------------------------------------
_C.AVA = CfgNode()
# Directory path of frames.
_C.AVA.FRAME_DIR = "/mnt/fair-flash3-east/ava_trainval_frames.img/"
# Directory path for files of frame lists.
_C.AVA.FRAME_LIST_DIR = (
"/mnt/vol/gfsai-flash3-east/ai-group/users/haoqifan/ava/frame_list/"
)
# Directory path for annotation files.
_C.AVA.ANNOTATION_DIR = (
"/mnt/vol/gfsai-flash3-east/ai-group/users/haoqifan/ava/frame_list/"
)
# Filenames of training samples list files.
_C.AVA.TRAIN_LISTS = ["train.csv"]
# Filenames of test samples list files.
_C.AVA.TEST_LISTS = ["val.csv"]
# Filenames of box list files for training. Note that we assume files which
# contains predicted boxes will have a suffix "predicted_boxes" in the
# filename.
_C.AVA.TRAIN_GT_BOX_LISTS = ["ava_train_v2.2.csv"]
_C.AVA.TRAIN_PREDICT_BOX_LISTS = []
# Filenames of box list files for test.
_C.AVA.TEST_PREDICT_BOX_LISTS = ["ava_val_predicted_boxes.csv"]
# This option controls the score threshold for the predicted boxes to use.
_C.AVA.DETECTION_SCORE_THRESH = 0.9
# If use BGR as the format of input frames.
_C.AVA.BGR = False
# Training augmentation parameters
# Whether to use color augmentation method.
_C.AVA.TRAIN_USE_COLOR_AUGMENTATION = False
# Whether to only use PCA jitter augmentation when using color augmentation
# method (otherwise combine with color jitter method).
_C.AVA.TRAIN_PCA_JITTER_ONLY = True
# Whether to do horizontal flipping during test.
_C.AVA.TEST_FORCE_FLIP = False
# Whether to use full test set for validation split.
_C.AVA.FULL_TEST_ON_VAL = False
# The name of the file to the ava label map.
_C.AVA.LABEL_MAP_FILE = "ava_action_list_v2.2_for_activitynet_2019.pbtxt"
# The name of the file to the ava exclusion.
_C.AVA.EXCLUSION_FILE = "ava_val_excluded_timestamps_v2.2.csv"
# The name of the file to the ava groundtruth.
_C.AVA.GROUNDTRUTH_FILE = "ava_val_v2.2.csv"
# Backend to process image, includes `pytorch` and `cv2`.
_C.AVA.IMG_PROC_BACKEND = "cv2"
# ---------------------------------------------------------------------------- #
# Multigrid training options
# See https://arxiv.org/abs/1912.00998 for details about multigrid training.
# ---------------------------------------------------------------------------- #
_C.MULTIGRID = CfgNode()
# Multigrid training allows us to train for more epochs with fewer iterations.
# This hyperparameter specifies how many times more epochs to train.
# The default setting in paper trains for 1.5x more epochs than baseline.
_C.MULTIGRID.EPOCH_FACTOR = 1.5
# Enable short cycles.
_C.MULTIGRID.SHORT_CYCLE = False
# Short cycle additional spatial dimensions relative to the default crop size.
_C.MULTIGRID.SHORT_CYCLE_FACTORS = [0.5, 0.5 ** 0.5]
_C.MULTIGRID.LONG_CYCLE = False
# (Temporal, Spatial) dimensions relative to the default shape.
_C.MULTIGRID.LONG_CYCLE_FACTORS = [
(0.25, 0.5 ** 0.5),
(0.5, 0.5 ** 0.5),
(0.5, 1),
(1, 1),
]
# While a standard BN computes stats across all examples in a GPU,
# for multigrid training we fix the number of clips to compute BN stats on.
# See https://arxiv.org/abs/1912.00998 for details.
_C.MULTIGRID.BN_BASE_SIZE = 8
# Multigrid training epochs are not proportional to actual training time or
# computations, so _C.TRAIN.EVAL_PERIOD leads to too frequent or rare
# evaluation. We use a multigrid-specific rule to determine when to evaluate:
# This hyperparameter defines how many times to evaluate a model per long
# cycle shape.
_C.MULTIGRID.EVAL_FREQ = 3
# No need to specify; Set automatically and used as global variables.
_C.MULTIGRID.LONG_CYCLE_SAMPLING_RATE = 0
_C.MULTIGRID.DEFAULT_B = 0
_C.MULTIGRID.DEFAULT_T = 0
_C.MULTIGRID.DEFAULT_S = 0
# -----------------------------------------------------------------------------
# Tensorboard Visualization Options
# -----------------------------------------------------------------------------
_C.TENSORBOARD = CfgNode()
# Log to summary writer, this will automatically.
# log loss, lr and metrics during train/eval.
_C.TENSORBOARD.ENABLE = False
# Provide path to prediction results for visualization.
# This is a pickle file of [prediction_tensor, label_tensor]
_C.TENSORBOARD.PREDICTIONS_PATH = ""
# Path to directory for tensorboard logs.
# Default to to cfg.OUTPUT_DIR/runs-{cfg.TRAIN.DATASET}.
_C.TENSORBOARD.LOG_DIR = ""
# Path to a json file providing class_name - id mapping
# in the format {"class_name1": id1, "class_name2": id2, ...}.
# This file must be provided to enable plotting confusion matrix
# by a subset or parent categories.
_C.TENSORBOARD.CLASS_NAMES_PATH = ""
# Path to a json file for categories -> classes mapping
# in the format {"parent_class": ["child_class1", "child_class2",...], ...}.
_C.TENSORBOARD.CATEGORIES_PATH = ""
# Config for confusion matrices visualization.
_C.TENSORBOARD.CONFUSION_MATRIX = CfgNode()
# Visualize confusion matrix.
_C.TENSORBOARD.CONFUSION_MATRIX.ENABLE = False
# Figure size of the confusion matrices plotted.
_C.TENSORBOARD.CONFUSION_MATRIX.FIGSIZE = [8, 8]
# Path to a subset of categories to visualize.
# File contains class names separated by newline characters.
_C.TENSORBOARD.CONFUSION_MATRIX.SUBSET_PATH = ""
# Config for histogram visualization.
_C.TENSORBOARD.HISTOGRAM = CfgNode()
# Visualize histograms.
_C.TENSORBOARD.HISTOGRAM.ENABLE = False
# Path to a subset of classes to plot histograms.
# Class names must be separated by newline characters.
_C.TENSORBOARD.HISTOGRAM.SUBSET_PATH = ""
# Visualize top-k most predicted classes on histograms for each
# chosen true label.
_C.TENSORBOARD.HISTOGRAM.TOPK = 10
# Figure size of the histograms plotted.
_C.TENSORBOARD.HISTOGRAM.FIGSIZE = [8, 8]
# Config for layers' weights and activations visualization.
# _C.TENSORBOARD.ENABLE must be True.
_C.TENSORBOARD.MODEL_VIS = CfgNode()
# If False, skip model visualization.
_C.TENSORBOARD.MODEL_VIS.ENABLE = False
# If False, skip visualizing model weights.
_C.TENSORBOARD.MODEL_VIS.MODEL_WEIGHTS = False
# If False, skip visualizing model activations.
_C.TENSORBOARD.MODEL_VIS.ACTIVATIONS = False
# If False, skip visualizing input videos.
_C.TENSORBOARD.MODEL_VIS.INPUT_VIDEO = False
# List of strings containing data about layer names and their indexing to
# visualize weights and activations for. The indexing is meant for
# choosing a subset of activations outputed by a layer for visualization.
# If indexing is not specified, visualize all activations outputed by the layer.
# For each string, layer name and indexing is separated by whitespaces.
# e.g.: [layer1 1,2;1,2, layer2, layer3 150,151;3,4]; this means for each array `arr`
# along the batch dimension in `layer1`, we take arr[[1, 2], [1, 2]]
_C.TENSORBOARD.MODEL_VIS.LAYER_LIST = []
# Top-k predictions to plot on videos
_C.TENSORBOARD.MODEL_VIS.TOPK_PREDS = 1
# Colormap to for text boxes and bounding boxes colors
_C.TENSORBOARD.MODEL_VIS.COLORMAP = "Pastel2"
# Config for visualization video inputs with Grad-CAM.
# _C.TENSORBOARD.ENABLE must be True.
_C.TENSORBOARD.MODEL_VIS.GRAD_CAM = CfgNode()
# Whether to run visualization using Grad-CAM technique.
_C.TENSORBOARD.MODEL_VIS.GRAD_CAM.ENABLE = True
# CNN layers to use for Grad-CAM. The number of layers must be equal to
# number of pathway(s).
_C.TENSORBOARD.MODEL_VIS.GRAD_CAM.LAYER_LIST = []
# If True, visualize Grad-CAM using true labels for each instances.
# If False, use the highest predicted class.
_C.TENSORBOARD.MODEL_VIS.GRAD_CAM.USE_TRUE_LABEL = False
# Colormap to for text boxes and bounding boxes colors
_C.TENSORBOARD.MODEL_VIS.GRAD_CAM.COLORMAP = "viridis"
# Config for visualization for wrong prediction visualization.
# _C.TENSORBOARD.ENABLE must be True.
_C.TENSORBOARD.WRONG_PRED_VIS = CfgNode()
_C.TENSORBOARD.WRONG_PRED_VIS.ENABLE = False
# Folder tag to origanize model eval videos under.
_C.TENSORBOARD.WRONG_PRED_VIS.TAG = "Incorrectly classified videos."
# Subset of labels to visualize. Only wrong predictions with true labels
# within this subset is visualized.
_C.TENSORBOARD.WRONG_PRED_VIS.SUBSET_PATH = ""
# ---------------------------------------------------------------------------- #
# Demo options
# ---------------------------------------------------------------------------- #
_C.DEMO = CfgNode()
# Run model in DEMO mode.
_C.DEMO.ENABLE = False
# Path to a json file providing class_name - id mapping
# in the format {"class_name1": id1, "class_name2": id2, ...}.
_C.DEMO.LABEL_FILE_PATH = ""
# Specify a camera device as input. This will be prioritized
# over input video if set.
# If -1, use input video instead.
_C.DEMO.WEBCAM = -1
# Path to input video for demo.
_C.DEMO.INPUT_VIDEO = ""
# Custom width for reading input video data.
_C.DEMO.DISPLAY_WIDTH = 0
# Custom height for reading input video data.
_C.DEMO.DISPLAY_HEIGHT = 0
# Path to Detectron2 object detection model configuration,
# only used for detection tasks.
_C.DEMO.DETECTRON2_CFG = "COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"
# Path to Detectron2 object detection model pre-trained weights.
_C.DEMO.DETECTRON2_WEIGHTS = "detectron2://COCO-Detection/faster_rcnn_R_50_FPN_3x/137849458/model_final_280758.pkl"
# Threshold for choosing predicted bounding boxes by Detectron2.
_C.DEMO.DETECTRON2_THRESH = 0.9
# Number of overlapping frames between 2 consecutive clips.
# Increase this number for more frequent action predictions.
# The number of overlapping frames cannot be larger than
# half of the sequence length `cfg.DATA.NUM_FRAMES * cfg.DATA.SAMPLING_RATE`
_C.DEMO.BUFFER_SIZE = 0
# If specified, the visualized outputs will be written this a video file of
# this path. Otherwise, the visualized outputs will be displayed in a window.
_C.DEMO.OUTPUT_FILE = ""
# Frames per second rate for writing to output video file.
# If not set (-1), use fps rate from input file.
_C.DEMO.OUTPUT_FPS = -1
# Input format from demo video reader ("RGB" or "BGR").
_C.DEMO.INPUT_FORMAT = "BGR"
# Draw visualization frames in [keyframe_idx - CLIP_VIS_SIZE, keyframe_idx + CLIP_VIS_SIZE] inclusively.
_C.DEMO.CLIP_VIS_SIZE = 10
# Number of processes to run video visualizer.
_C.DEMO.NUM_VIS_INSTANCES = 2
# Path to pre-computed predicted boxes
_C.DEMO.PREDS_BOXES = ""
# Whether to run in with multi-threaded video reader.
_C.DEMO.THREAD_ENABLE = False
# Take one clip for every `DEMO.NUM_CLIPS_SKIP` + 1 for prediction and visualization.
# This is used for fast demo speed by reducing the prediction/visualiztion frequency.
# If -1, take the most recent read clip for visualization. This mode is only supported
# if `DEMO.THREAD_ENABLE` is set to True.
_C.DEMO.NUM_CLIPS_SKIP = 0
# Path to ground-truth boxes and labels (optional)
_C.DEMO.GT_BOXES = ""
# The starting second of the video w.r.t bounding boxes file.
_C.DEMO.STARTING_SECOND = 900
# Frames per second of the input video/folder of images.
_C.DEMO.FPS = 30
# Visualize with top-k predictions or predictions above certain threshold(s).
# Option: {"thres", "top-k"}
_C.DEMO.VIS_MODE = "thres"
# Threshold for common class names.
_C.DEMO.COMMON_CLASS_THRES = 0.7
# Theshold for uncommon class names. This will not be
# used if `_C.DEMO.COMMON_CLASS_NAMES` is empty.
_C.DEMO.UNCOMMON_CLASS_THRES = 0.3
# This is chosen based on distribution of examples in
# each classes in AVA dataset.
_C.DEMO.COMMON_CLASS_NAMES = [
"watch (a person)",
"talk to (e.g., self, a person, a group)",
"listen to (a person)",
"touch (an object)",
"carry/hold (an object)",
"walk",
"sit",
"lie/sleep",
"bend/bow (at the waist)",
]
# Slow-motion rate for the visualization. The visualized portions of the
# video will be played `_C.DEMO.SLOWMO` times slower than usual speed.
_C.DEMO.SLOWMO = 1
# Add custom config with default values.
custom_config.add_custom_config(_C)
def assert_and_infer_cfg(cfg):
# BN assertions.
if cfg.BN.USE_PRECISE_STATS:
assert cfg.BN.NUM_BATCHES_PRECISE >= 0
# TRAIN assertions.
assert cfg.TRAIN.CHECKPOINT_TYPE in ["pytorch", "caffe2"]
assert cfg.NUM_GPUS == 0 or cfg.TRAIN.BATCH_SIZE % cfg.NUM_GPUS == 0
# TEST assertions.
assert cfg.TEST.CHECKPOINT_TYPE in ["pytorch", "caffe2"]
assert cfg.NUM_GPUS == 0 or cfg.TEST.BATCH_SIZE % cfg.NUM_GPUS == 0
# RESNET assertions.
assert cfg.RESNET.NUM_GROUPS > 0
assert cfg.RESNET.WIDTH_PER_GROUP > 0
assert cfg.RESNET.WIDTH_PER_GROUP % cfg.RESNET.NUM_GROUPS == 0
# Execute LR scaling by num_shards.
if cfg.SOLVER.BASE_LR_SCALE_NUM_SHARDS:
cfg.SOLVER.BASE_LR *= cfg.NUM_SHARDS
cfg.SOLVER.WARMUP_START_LR *= cfg.NUM_SHARDS
cfg.SOLVER.COSINE_END_LR *= cfg.NUM_SHARDS
# General assertions.
assert cfg.SHARD_ID < cfg.NUM_SHARDS
return cfg
def get_cfg():
"""
Get a copy of the default config.
"""
return _C.clone()
================================================
FILE: build/lib/slowfast/datasets/__init__.py
================================================
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
from .ava_dataset import Ava # noqa
from .build import DATASET_REGISTRY, build_dataset # noqa
from .charades import Charades # noqa
from .imagenet import Imagenet # noqa
from .kinetics import Kinetics # noqa
from .ssv2 import Ssv2 # noqa
from .sth import Sth
try:
from .ptv_datasets import Ptvcharades, Ptvkinetics, Ptvssv2 # noqa
except Exception:
print("Please update your PyTorchVideo to latest master")
================================================
FILE: build/lib/slowfast/datasets/ava_dataset.py
================================================
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
import logging
import numpy as np
import torch
from . import ava_helper as ava_helper
from . import cv2_transform as cv2_transform
from . import transform as transform
from . import utils as utils
from .build import DATASET_REGISTRY
logger = logging.getLogger(__name__)
@DATASET_REGISTRY.register()
class Ava(torch.utils.data.Dataset):
"""
AVA Dataset
"""
def __init__(self, cfg, split):
self.cfg = cfg
self._split = split
self._sample_rate = cfg.DATA.SAMPLING_RATE
self._video_length = cfg.DATA.NUM_FRAMES
self._seq_len = self._video_length * self._sample_rate
self._num_classes = cfg.MODEL.NUM_CLASSES
# Augmentation params.
self._data_mean = cfg.DATA.MEAN
self._data_std = cfg.DATA.STD
self._use_bgr = cfg.AVA.BGR
self.random_horizontal_flip = cfg.DATA.RANDOM_FLIP
if self._split == "train":
self._crop_size = cfg.DATA.TRAIN_CROP_SIZE
self._jitter_min_scale = cfg.DATA.TRAIN_JITTER_SCALES[0]
self._jitter_max_scale = cfg.DATA.TRAIN_JITTER_SCALES[1]
self._use_color_augmentation = cfg.AVA.TRAIN_USE_COLOR_AUGMENTATION
self._pca_jitter_only = cfg.AVA.TRAIN_PCA_JITTER_ONLY
self._pca_eigval = cfg.DATA.TRAIN_PCA_EIGVAL
self._pca_eigvec = cfg.DATA.TRAIN_PCA_EIGVEC
else:
self._crop_size = cfg.DATA.TEST_CROP_SIZE
self._test_force_flip = cfg.AVA.TEST_FORCE_FLIP
self._load_data(cfg)
def _load_data(self, cfg):
"""
Load frame paths and annotations from files
Args:
cfg (CfgNode): config
"""
# Loading frame paths.
(
self._image_paths,
self._video_idx_to_name,
) = ava_helper.load_image_lists(cfg, is_train=(self._split == "train"))
# Loading annotations for boxes and labels.
boxes_and_labels = ava_helper.load_boxes_and_labels(
cfg, mode=self._split
)
assert len(boxes_and_labels) == len(self._image_paths)
boxes_and_labels = [
boxes_and_labels[self._video_idx_to_name[i]]
for i in range(len(self._image_paths))
]
# Get indices of keyframes and corresponding boxes and labels.
(
self._keyframe_indices,
self._keyframe_boxes_and_labels,
) = ava_helper.get_keyframe_data(boxes_and_labels)
# Calculate the number of used boxes.
self._num_boxes_used = ava_helper.get_num_boxes_used(
self._keyframe_indices, self._keyframe_boxes_and_labels
)
self.print_summary()
def print_summary(self):
logger.info("=== AVA dataset summary ===")
logger.info("Split: {}".format(self._split))
logger.info("Number of videos: {}".format(len(self._image_paths)))
total_frames = sum(
len(video_img_paths) for video_img_paths in self._image_paths
)
logger.info("Number of frames: {}".format(total_frames))
logger.info("Number of key frames: {}".format(len(self)))
logger.info("Number of boxes: {}.".format(self._num_boxes_used))
def __len__(self):
"""
Returns:
(int): the number of videos in the dataset.
"""
return self.num_videos
@property
def num_videos(self):
"""
Returns:
(int): the number of videos in the dataset.
"""
return len(self._keyframe_indices)
def _images_and_boxes_preprocessing_cv2(self, imgs, boxes):
"""
This function performs preprocessing for the input images and
corresponding boxes for one clip with opencv as backend.
Args:
imgs (tensor): the images.
boxes (ndarray): the boxes for the current clip.
Returns:
imgs (tensor): list of preprocessed images.
boxes (ndarray): preprocessed boxes.
"""
height, width, _ = imgs[0].shape
boxes[:, [0, 2]] *= width
boxes[:, [1, 3]] *= height
boxes = cv2_transform.clip_boxes_to_image(boxes, height, width)
# `transform.py` is list of np.array. However, for AVA, we only have
# one np.array.
boxes = [boxes]
# The image now is in HWC, BGR format.
if self._split == "train": # "train"
imgs, boxes = cv2_transform.random_short_side_scale_jitter_list(
imgs,
min_size=self._jitter_min_scale,
max_size=self._jitter_max_scale,
boxes=boxes,
)
imgs, boxes = cv2_transform.random_crop_list(
imgs, self._crop_size, order="HWC", boxes=boxes
)
if self.random_horizontal_flip:
# random flip
imgs, boxes = cv2_transform.horizontal_flip_list(
0.5, imgs, order="HWC", boxes=boxes
)
elif self._split == "val":
# Short side to test_scale. Non-local and STRG uses 256.
imgs = [cv2_transform.scale(self._crop_size, img) for img in imgs]
boxes = [
cv2_transform.scale_boxes(
self._crop_size, boxes[0], height, width
)
]
imgs, boxes = cv2_transform.spatial_shift_crop_list(
self._crop_size, imgs, 1, boxes=boxes
)
if self._test_force_flip:
imgs, boxes = cv2_transform.horizontal_flip_list(
1, imgs, order="HWC", boxes=boxes
)
elif self._split == "test":
# Short side to test_scale. Non-local and STRG uses 256.
imgs = [cv2_transform.scale(self._crop_size, img) for img in imgs]
boxes = [
cv2_transform.scale_boxes(
self._crop_size, boxes[0], height, width
)
]
if self._test_force_flip:
imgs, boxes = cv2_transform.horizontal_flip_list(
1, imgs, order="HWC", boxes=boxes
)
else:
raise NotImplementedError(
"Unsupported split mode {}".format(self._split)
)
# Convert image to CHW keeping BGR order.
imgs = [cv2_transform.HWC2CHW(img) for img in imgs]
# Image [0, 255] -> [0, 1].
imgs = [img / 255.0 for img in imgs]
imgs = [
np.ascontiguousarray(
# img.reshape((3, self._crop_size, self._crop_size))
img.reshape((3, imgs[0].shape[1], imgs[0].shape[2]))
).astype(np.float32)
for img in imgs
]
# Do color augmentation (after divided by 255.0).
if self._split == "train" and self._use_color_augmentation:
if not self._pca_jitter_only:
imgs = cv2_transform.color_jitter_list(
imgs,
img_brightness=0.4,
img_contrast=0.4,
img_saturation=0.4,
)
imgs = cv2_transform.lighting_list(
imgs,
alphastd=0.1,
eigval=np.array(self._pca_eigval).astype(np.float32),
eigvec=np.array(self._pca_eigvec).astype(np.float32),
)
# Normalize images by mean and std.
imgs = [
cv2_transform.color_normalization(
img,
np.array(self._data_mean, dtype=np.float32),
np.array(self._data_std, dtype=np.float32),
)
for img in imgs
]
# Concat list of images to single ndarray.
imgs = np.concatenate(
[np.expand_dims(img, axis=1) for img in imgs], axis=1
)
if not self._use_bgr:
# Convert image format from BGR to RGB.
imgs = imgs[::-1, ...]
imgs = np.ascontiguousarray(imgs)
imgs = torch.from_numpy(imgs)
boxes = cv2_transform.clip_boxes_to_image(
boxes[0], imgs[0].shape[1], imgs[0].shape[2]
)
return imgs, boxes
def _images_and_boxes_preprocessing(self, imgs, boxes):
"""
This function performs preprocessing for the input images and
corresponding boxes for one clip.
Args:
imgs (tensor): the images.
boxes (ndarray): the boxes for the current clip.
Returns:
imgs (tensor): list of preprocessed images.
boxes (ndarray): preprocessed boxes.
"""
# Image [0, 255] -> [0, 1].
imgs = imgs.float()
imgs = imgs / 255.0
height, width = imgs.shape[2], imgs.shape[3]
# The format of boxes is [x1, y1, x2, y2]. The input boxes are in the
# range of [0, 1].
boxes[:, [0, 2]] *= width
boxes[:, [1, 3]] *= height
boxes = transform.clip_boxes_to_image(boxes, height, width)
if self._split == "train":
# Train split
imgs, boxes = transform.random_short_side_scale_jitter(
imgs,
min_size=self._jitter_min_scale,
max_size=self._jitter_max_scale,
boxes=boxes,
)
imgs, boxes = transform.random_crop(
imgs, self._crop_size, boxes=boxes
)
# Random flip.
imgs, boxes = transform.horizontal_flip(0.5, imgs, boxes=boxes)
elif self._split == "val":
# Val split
# Resize short side to crop_size. Non-local and STRG uses 256.
imgs, boxes = transform.random_short_side_scale_jitter(
imgs,
min_size=self._crop_size,
max_size=self._crop_size,
boxes=boxes,
)
# Apply center crop for val split
imgs, boxes = transform.uniform_crop(
imgs, size=self._crop_size, spatial_idx=1, boxes=boxes
)
if self._test_force_flip:
imgs, boxes = transform.horizontal_flip(1, imgs, boxes=boxes)
elif self._split == "test":
# Test split
# Resize short side to crop_size. Non-local and STRG uses 256.
imgs, boxes = transform.random_short_side_scale_jitter(
imgs,
min_size=self._crop_size,
max_size=self._crop_size,
boxes=boxes,
)
if self._test_force_flip:
imgs, boxes = transform.horizontal_flip(1, imgs, boxes=boxes)
else:
raise NotImplementedError(
"{} split not supported yet!".format(self._split)
)
# Do color augmentation (after divided by 255.0).
if self._split == "train" and self._use_color_augmentation:
if not self._pca_jitter_only:
imgs = transform.color_jitter(
imgs,
img_brightness=0.4,
img_contrast=0.4,
img_saturation=0.4,
)
imgs = transform.lighting_jitter(
imgs,
alphastd=0.1,
eigval=np.array(self._pca_eigval).astype(np.float32),
eigvec=np.array(self._pca_eigvec).astype(np.float32),
)
# Normalize images by mean and std.
imgs = transform.color_normalization(
imgs,
np.array(self._data_mean, dtype=np.float32),
np.array(self._data_std, dtype=np.float32),
)
if not self._use_bgr:
# Convert image format from BGR to RGB.
# Note that Kinetics pre-training uses RGB!
imgs = imgs[:, [2, 1, 0], ...]
boxes = transform.clip_boxes_to_image(
boxes, self._crop_size, self._crop_size
)
return imgs, boxes
def __getitem__(self, idx):
"""
Generate corresponding clips, boxes, labels and metadata for given idx.
Args:
idx (int): the video index provided by the pytorch sampler.
Returns:
frames (tensor): the frames of sampled from the video. The dimension
is `channel` x `num frames` x `height` x `width`.
label (ndarray): the label for correspond boxes for the current video.
time index (zero): The time index is currently not supported for AVA.
idx (int): the video index provided by the pytorch sampler.
extra_data (dict): a dict containing extra data fields, like "boxes",
"ori_boxes" and "metadata".
"""
short_cycle_idx = None
# When short cycle is used, input index is a tupple.
if isinstance(idx, tuple):
idx, self._num_yielded = idx
if self.cfg.MULTIGRID.SHORT_CYCLE:
idx, short_cycle_idx = idx
video_idx, sec_idx, sec, center_idx = self._keyframe_indices[idx]
# Get the frame idxs for current clip.
seq = utils.get_sequence(
center_idx,
self._seq_len // 2,
self._sample_rate,
num_frames=len(self._image_paths[video_idx]),
)
clip_label_list = self._keyframe_boxes_and_labels[video_idx][sec_idx]
assert len(clip_label_list) > 0
# Get boxes and labels for current clip.
boxes = []
labels = []
for box_labels in clip_label_list:
boxes.append(box_labels[0])
labels.append(box_labels[1])
boxes = np.array(boxes)
# Score is not used.
boxes = boxes[:, :4].copy()
ori_boxes = boxes.copy()
# Load images of current clip.
image_paths = [self._image_paths[video_idx][frame] for frame in seq]
imgs = utils.retry_load_images(
image_paths, backend=self.cfg.AVA.IMG_PROC_BACKEND
)
if self.cfg.AVA.IMG_PROC_BACKEND == "pytorch":
# T H W C -> T C H W.
imgs = imgs.permute(0, 3, 1, 2)
# Preprocess images and boxes.
imgs, boxes = self._images_and_boxes_preprocessing(
imgs, boxes=boxes
)
# T C H W -> C T H W.
imgs = imgs.permute(1, 0, 2, 3)
else:
# Preprocess images and boxes
imgs, boxes = self._images_and_boxes_preprocessing_cv2(
imgs, boxes=boxes
)
# Construct label arrays.
label_arrs = np.zeros((len(labels), self._num_classes), dtype=np.int32)
for i, box_labels in enumerate(labels):
# AVA label index starts from 1.
for label in box_labels:
if label == -1:
continue
assert label >= 1 and label <= 80
label_arrs[i][label - 1] = 1
imgs = utils.pack_pathway_output(self.cfg, imgs)
metadata = [[video_idx, sec]] * len(boxes)
extra_data = {
"boxes": boxes,
"ori_boxes": ori_boxes,
"metadata": metadata,
}
return imgs, label_arrs, idx, torch.zeros(1), extra_data
================================================
FILE: build/lib/slowfast/datasets/ava_helper.py
================================================
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
import logging
import os
from collections import defaultdict
from slowfast.utils.env import pathmgr
logger = logging.getLogger(__name__)
FPS = 30
AVA_VALID_FRAMES = range(902, 1799)
def load_image_lists(cfg, is_train):
"""
Loading image paths from corresponding files.
Args:
cfg (CfgNode): config.
is_train (bool): if it is training dataset or not.
Returns:
image_paths (list[list]): a list of items. Each item (also a list)
corresponds to one video and contains the paths of images for
this video.
video_idx_to_name (list): a list which stores video names.
"""
list_filenames = [
os.path.join(cfg.AVA.FRAME_LIST_DIR, filename)
for filename in (
cfg.AVA.TRAIN_LISTS if is_train else cfg.AVA.TEST_LISTS
)
]
image_paths = defaultdict(list)
video_name_to_idx = {}
video_idx_to_name = []
for list_filename in list_filenames:
with pathmgr.open(list_filename, "r") as f:
f.readline()
for line in f:
row = line.split()
# The format of each row should follow:
# original_vido_id video_id frame_id path labels.
assert len(row) == 5
video_name = row[0]
if video_name not in video_name_to_idx:
idx = len(video_name_to_idx)
video_name_to_idx[video_name] = idx
video_idx_to_name.append(video_name)
data_key = video_name_to_idx[video_name]
image_paths[data_key].append(
os.path.join(cfg.AVA.FRAME_DIR, row[3])
)
image_paths = [image_paths[i] for i in range(len(image_paths))]
logger.info(
"Finished loading image paths from: %s" % ", ".join(list_filenames)
)
return image_paths, video_idx_to_name
def load_boxes_and_labels(cfg, mode):
"""
Loading boxes and labels from csv files.
Args:
cfg (CfgNode): config.
mode (str): 'train', 'val', or 'test' mode.
Returns:
all_boxes (dict): a dict which maps from `video_name` and
`frame_sec` to a list of `box`. Each `box` is a
[`box_coord`, `box_labels`] where `box_coord` is the
coordinates of box and 'box_labels` are the corresponding
labels for the box.
"""
gt_lists = cfg.AVA.TRAIN_GT_BOX_LISTS if mode == "train" else []
pred_lists = (
cfg.AVA.TRAIN_PREDICT_BOX_LISTS
if mode == "train"
else cfg.AVA.TEST_PREDICT_BOX_LISTS
)
ann_filenames = [
os.path.join(cfg.AVA.ANNOTATION_DIR, filename)
for filename in gt_lists + pred_lists
]
ann_is_gt_box = [True] * len(gt_lists) + [False] * len(pred_lists)
detect_thresh = cfg.AVA.DETECTION_SCORE_THRESH
# Only select frame_sec % 4 = 0 samples for validation if not
# set FULL_TEST_ON_VAL.
boxes_sample_rate = (
4 if mode == "val" and not cfg.AVA.FULL_TEST_ON_VAL else 1
)
all_boxes, count, unique_box_count = parse_bboxes_file(
ann_filenames=ann_filenames,
ann_is_gt_box=ann_is_gt_box,
detect_thresh=detect_thresh,
boxes_sample_rate=boxes_sample_rate,
)
logger.info(
"Finished loading annotations from: %s" % ", ".join(ann_filenames)
)
logger.info("Detection threshold: {}".format(detect_thresh))
logger.info("Number of unique boxes: %d" % unique_box_count)
logger.info("Number of annotations: %d" % count)
return all_boxes
def get_keyframe_data(boxes_and_labels):
"""
Getting keyframe indices, boxes and labels in the dataset.
Args:
boxes_and_labels (list[dict]): a list which maps from video_idx to a dict.
Each dict `frame_sec` to a list of boxes and corresponding labels.
Returns:
keyframe_indices (list): a list of indices of the keyframes.
keyframe_boxes_and_labels (list[list[list]]): a list of list which maps from
video_idx and sec_idx to a list of boxes and corresponding labels.
"""
def sec_to_frame(sec):
"""
Convert time index (in second) to frame index.
0: 900
30: 901
"""
return (sec - 900) * FPS
keyframe_indices = []
keyframe_boxes_and_labels = []
count = 0
for video_idx in range(len(boxes_and_labels)):
sec_idx = 0
keyframe_boxes_and_labels.append([])
for sec in boxes_and_labels[video_idx].keys():
if sec not in AVA_VALID_FRAMES:
continue
if len(boxes_and_labels[video_idx][sec]) > 0:
keyframe_indices.append(
(video_idx, sec_idx, sec, sec_to_frame(sec))
)
keyframe_boxes_and_labels[video_idx].append(
boxes_and_labels[video_idx][sec]
)
sec_idx += 1
count += 1
logger.info("%d keyframes used." % count)
return keyframe_indices, keyframe_boxes_and_labels
def get_num_boxes_used(keyframe_indices, keyframe_boxes_and_labels):
"""
Get total number of used boxes.
Args:
keyframe_indices (list): a list of indices of the keyframes.
keyframe_boxes_and_labels (list[list[list]]): a list of list which maps from
video_idx and sec_idx to a list of boxes and corresponding labels.
Returns:
count (int): total number of used boxes.
"""
count = 0
for video_idx, sec_idx, _, _ in keyframe_indices:
count += len(keyframe_boxes_and_labels[video_idx][sec_idx])
return count
def parse_bboxes_file(
ann_filenames, ann_is_gt_box, detect_thresh, boxes_sample_rate=1
):
"""
Parse AVA bounding boxes files.
Args:
ann_filenames (list of str(s)): a list of AVA bounding boxes annotation files.
ann_is_gt_box (list of bools): a list of boolean to indicate whether the corresponding
ann_file is ground-truth. `ann_is_gt_box[i]` correspond to `ann_filenames[i]`.
detect_thresh (float): threshold for accepting predicted boxes, range [0, 1].
boxes_sample_rate (int): sample rate for test bounding boxes. Get 1 every `boxes_sample_rate`.
"""
all_boxes = {}
count = 0
unique_box_count = 0
for filename, is_gt_box in zip(ann_filenames, ann_is_gt_box):
with pathmgr.open(filename, "r") as f:
for line in f:
row = line.strip().split(",")
# When we use predicted boxes to train/eval, we need to
# ignore the boxes whose scores are below the threshold.
if not is_gt_box:
score = float(row[7])
if score < detect_thresh:
continue
video_name, frame_sec = row[0], int(row[1])
if frame_sec % boxes_sample_rate != 0:
continue
# Box with format [x1, y1, x2, y2] with a range of [0, 1] as float.
box_key = ",".join(row[2:6])
box = list(map(float, row[2:6]))
label = -1 if row[6] == "" else int(row[6])
if video_name not in all_boxes:
all_boxes[video_name] = {}
for sec in AVA_VALID_FRAMES:
all_boxes[video_name][sec] = {}
if box_key not in all_boxes[video_name][frame_sec]:
all_boxes[video_name][frame_sec][box_key] = [box, []]
unique_box_count += 1
all_boxes[video_name][frame_sec][box_key][1].append(label)
if label != -1:
count += 1
for video_name in all_boxes.keys():
for frame_sec in all_boxes[video_name].keys():
# Save in format of a list of [box_i, box_i_labels].
all_boxes[video_name][frame_sec] = list(
all_boxes[video_name][frame_sec].values()
)
return all_boxes, count, unique_box_count
================================================
FILE: build/lib/slowfast/datasets/build.py
================================================
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
from fvcore.common.registry import Registry
DATASET_REGISTRY = Registry("DATASET")
DATASET_REGISTRY.__doc__ = """
Registry for dataset.
The registered object will be called with `obj(cfg, split)`.
The call should return a `torch.utils.data.Dataset` object.
"""
def build_dataset(dataset_name, cfg, split):
"""
Build a dataset, defined by `dataset_name`.
Args:
dataset_name (str): the name of the dataset to be constructed.
cfg (CfgNode): configs. Details can be found in
slowfast/config/defaults.py
split (str): the split of the data loader. Options include `train`,
`val`, and `test`.
Returns:
Dataset: a constructed dataset specified by dataset_name.
"""
# Capitalize the the first letter of the dataset_name since the dataset_name
# in configs may be in lowercase but the name of dataset class should always
# start with an uppercase letter.
name = dataset_name.capitalize()
return DATASET_REGISTRY.get(name)(cfg, split)
================================================
FILE: build/lib/slowfast/datasets/charades.py
================================================
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
import os
import random
from itertools import chain as chain
import torch
import torch.utils.data
import slowfast.utils.logging as logging
from slowfast.utils.env import pathmgr
from . import utils as utils
from .build import DATASET_REGISTRY
logger = logging.get_logger(__name__)
@DATASET_REGISTRY.register()
class Charades(torch.utils.data.Dataset):
"""
Charades video loader. Construct the Charades video loader, then sample
clips from the videos. For training and validation, a single clip is randomly
sampled from every video with random cropping, scaling, and flipping. For
testing, multiple clips are uniformaly sampled from every video with uniform
cropping. For uniform cropping, we take the left, center, and right crop if
the width is larger than height, or take top, center, and bottom crop if the
height is larger than the width.
"""
def __init__(self, cfg, mode, num_retries=10):
"""
Load Charades data (frame paths, labels, etc. ) to a given Dataset object.
The dataset could be downloaded from Chrades official website
(https://allenai.org/plato/charades/).
Please see datasets/DATASET.md for more information about the data format.
Args:
dataset (Dataset): a Dataset object to load Charades data to.
mode (string): 'train', 'val', or 'test'.
Args:
cfg (CfgNode): configs.
mode (string): Options includes `train`, `val`, or `test` mode.
For the train and val mode, the data loader will take data
from the train or val set, and sample one clip per video.
For the test mode, the data loader will take data from test set,
and sample multiple clips per video.
num_retries (int): number of retries.
"""
# Only support train, val, and test mode.
assert mode in [
"train",
"val",
"test",
], "Split '{}' not supported for Charades ".format(mode)
self.mode = mode
self.cfg = cfg
self._video_meta = {}
self._num_retries = num_retries
# For training or validation mode, one single clip is sampled from every
# video. For testing, NUM_ENSEMBLE_VIEWS clips are sampled from every
# video. For every clip, NUM_SPATIAL_CROPS is cropped spatially from
# the frames.
if self.mode in ["train", "val"]:
self._num_clips = 1
elif self.mode in ["test"]:
self._num_clips = (
cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS
)
logger.info("Constructing Charades {}...".format(mode))
self._construct_loader()
def _construct_loader(self):
"""
Construct the video loader.
"""
path_to_file = os.path.join(
self.cfg.DATA.PATH_TO_DATA_DIR,
"{}.csv".format("train" if self.mode == "train" else "val"),
)
assert pathmgr.exists(path_to_file), "{} dir not found".format(
path_to_file
)
(self._path_to_videos, self._labels) = utils.load_image_lists(
path_to_file, self.cfg.DATA.PATH_PREFIX, return_list=True
)
if self.mode != "train":
# Form video-level labels from frame level annotations.
self._labels = utils.convert_to_video_level_labels(self._labels)
self._path_to_videos = list(
chain.from_iterable(
[[x] * self._num_clips for x in self._path_to_videos]
)
)
self._labels = list(
chain.from_iterable([[x] * self._num_clips for x in self._labels])
)
self._spatial_temporal_idx = list(
chain.from_iterable(
[range(self._num_clips) for _ in range(len(self._labels))]
)
)
logger.info(
"Charades dataloader constructed (size: {}) from {}".format(
len(self._path_to_videos), path_to_file
)
)
def get_seq_frames(self, index):
"""
Given the video index, return the list of indexs of sampled frames.
Args:
index (int): the video index.
Returns:
seq (list): the indexes of sampled frames from the video.
"""
temporal_sample_index = (
-1
if self.mode in ["train", "val"]
else self._spatial_temporal_idx[index]
// self.cfg.TEST.NUM_SPATIAL_CROPS
)
num_frames = self.cfg.DATA.NUM_FRAMES
sampling_rate = utils.get_random_sampling_rate(
self.cfg.MULTIGRID.LONG_CYCLE_SAMPLING_RATE,
self.cfg.DATA.SAMPLING_RATE,
)
video_length = len(self._path_to_videos[index])
assert video_length == len(self._labels[index])
clip_length = (num_frames - 1) * sampling_rate + 1
if temporal_sample_index == -1:
if clip_length > video_length:
start = random.randint(video_length - clip_length, 0)
else:
start = random.randint(0, video_length - clip_length)
else:
gap = float(max(video_length - clip_length, 0)) / (
self.cfg.TEST.NUM_ENSEMBLE_VIEWS - 1
)
start = int(round(gap * temporal_sample_index))
seq = [
max(min(start + i * sampling_rate, video_length - 1), 0)
for i in range(num_frames)
]
return seq
def __getitem__(self, index):
"""
Given the video index, return the list of frames, label, and video
index if the video frames can be fetched.
Args:
index (int): the video index provided by the pytorch sampler.
Returns:
frames (tensor): the frames of sampled from the video. The dimension
is `channel` x `num frames` x `height` x `width`.
label (int): the label of the current video.
index (int): the index of the video.
time index (zero): The time index is currently not supported.
{} extra data, currently not supported
"""
short_cycle_idx = None
# When short cycle is used, input index is a tupple.
if isinstance(index, tuple):
index, self._num_yielded = index
if self.cfg.MULTIGRID.SHORT_CYCLE:
index, short_cycle_idx = index
if self.mode in ["train", "val"]:
# -1 indicates random sampling.
spatial_sample_index = -1
min_scale = self.cfg.DATA.TRAIN_JITTER_SCALES[0]
max_scale = self.cfg.DATA.TRAIN_JITTER_SCALES[1]
crop_size = self.cfg.DATA.TRAIN_CROP_SIZE
if short_cycle_idx in [0, 1]:
crop_size = int(
round(
self.cfg.MULTIGRID.SHORT_CYCLE_FACTORS[short_cycle_idx]
* self.cfg.MULTIGRID.DEFAULT_S
)
)
if self.cfg.MULTIGRID.DEFAULT_S > 0:
# Decreasing the scale is equivalent to using a larger "span"
# in a sampling grid.
min_scale = int(
round(
float(min_scale)
* crop_size
/ self.cfg.MULTIGRID.DEFAULT_S
)
)
elif self.mode in ["test"]:
# spatial_sample_index is in [0, 1, 2]. Corresponding to left,
# center, or right if width is larger than height, and top, middle,
# or bottom if height is larger than width.
spatial_sample_index = (
self._spatial_temporal_idx[index]
% self.cfg.TEST.NUM_SPATIAL_CROPS
)
min_scale, max_scale, crop_size = [self.cfg.DATA.TEST_CROP_SIZE] * 3
# The testing is deterministic and no jitter should be performed.
# min_scale, max_scale, and crop_size are expect to be the same.
assert len({min_scale, max_scale, crop_size}) == 1
else:
raise NotImplementedError(
"Does not support {} mode".format(self.mode)
)
seq = self.get_seq_frames(index)
frames = torch.as_tensor(
utils.retry_load_images(
[self._path_to_videos[index][frame] for frame in seq],
self._num_retries,
)
)
label = utils.aggregate_labels(
[self._labels[index][i] for i in range(seq[0], seq[-1] + 1)]
)
label = torch.as_tensor(
utils.as_binary_vector(label, self.cfg.MODEL.NUM_CLASSES)
)
# Perform color normalization.
frames = utils.tensor_normalize(
frames, self.cfg.DATA.MEAN, self.cfg.DATA.STD
)
# T H W C -> C T H W.
frames = frames.permute(3, 0, 1, 2)
# Perform data augmentation.
frames = utils.spatial_sampling(
frames,
spatial_idx=spatial_sample_index,
min_scale=min_scale,
max_scale=max_scale,
crop_size=crop_size,
random_horizontal_flip=self.cfg.DATA.RANDOM_FLIP,
inverse_uniform_sampling=self.cfg.DATA.INV_UNIFORM_SAMPLE,
)
frames = utils.pack_pathway_output(self.cfg, frames)
return frames, label, index, 0, {}
def __len__(self):
"""
Returns:
(int): the number of videos in the dataset.
"""
return self.num_videos
@property
def num_videos(self):
"""
Returns:
(int): the number of videos in the dataset.
"""
return len(self._path_to_videos)
================================================
FILE: build/lib/slowfast/datasets/cv2_transform.py
================================================
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
import math
import numpy as np
import cv2
def clip_boxes_to_image(boxes, height, width):
"""
Clip the boxes with the height and width of the image size.
Args:
boxes (ndarray): bounding boxes to peform crop. The dimension is
`num boxes` x 4.
height (int): the height of the image.
width (int): the width of the image.
Returns:
boxes (ndarray): cropped bounding boxes.
"""
boxes[:, [0, 2]] = np.minimum(
width - 1.0, np.maximum(0.0, boxes[:, [0, 2]])
)
boxes[:, [1, 3]] = np.minimum(
height - 1.0, np.maximum(0.0, boxes[:, [1, 3]])
)
return boxes
def random_short_side_scale_jitter_list(images, min_size, max_size, boxes=None):
"""
Perform a spatial short scale jittering on the given images and
corresponding boxes.
Args:
images (list): list of images to perform scale jitter. Dimension is
`height` x `width` x `channel`.
min_size (int): the minimal size to scale the frames.
max_size (int): the maximal size to scale the frames.
boxes (list): optional. Corresponding boxes to images. Dimension is
`num boxes` x 4.
Returns:
(list): the list of scaled images with dimension of
`new height` x `new width` x `channel`.
(ndarray or None): the scaled boxes with dimension of
`num boxes` x 4.
"""
size = int(round(1.0 / np.random.uniform(1.0 / max_size, 1.0 / min_size)))
height = images[0].shape[0]
width = images[0].shape[1]
if (width <= height and width == size) or (
height <= width and height == size
):
return images, boxes
new_width = size
new_height = size
if width < height:
new_height = int(math.floor((float(height) / width) * size))
if boxes is not None:
boxes = [
proposal * float(new_height) / height for proposal in boxes
]
else:
new_width = int(math.floor((float(width) / height) * size))
if boxes is not None:
boxes = [proposal * float(new_width) / width for proposal in boxes]
return (
[
cv2.resize(
image, (new_width, new_height), interpolation=cv2.INTER_LINEAR
).astype(np.float32)
for image in images
],
boxes,
)
def scale(size, image):
"""
Scale the short side of the image to size.
Args:
size (int): size to scale the image.
image (array): image to perform short side scale. Dimension is
`height` x `width` x `channel`.
Returns:
(ndarray): the scaled image with dimension of
`height` x `width` x `channel`.
"""
height = image.shape[0]
width = image.shape[1]
if (width <= height and width == size) or (
height <= width and height == size
):
return image
new_width = size
new_height = size
if width < height:
new_height = int(math.floor((float(height) / width) * size))
else:
new_width = int(math.floor((float(width) / height) * size))
img = cv2.resize(
image, (new_width, new_height), interpolation=cv2.INTER_LINEAR
)
return img.astype(np.float32)
def scale_boxes(size, boxes, height, width):
"""
Scale the short side of the box to size.
Args:
size (int): size to scale the image.
boxes (ndarray): bounding boxes to peform scale. The dimension is
`num boxes` x 4.
height (int): the height of the image.
width (int): the width of the image.
Returns:
boxes (ndarray): scaled bounding boxes.
"""
if (width <= height and width == size) or (
height <= width and height == size
):
return boxes
new_width = size
new_height = size
if width < height:
new_height = int(math.floor((float(height) / width) * size))
boxes *= float(new_height) / height
else:
new_width = int(math.floor((float(width) / height) * size))
boxes *= float(new_width) / width
return boxes
def horizontal_flip_list(prob, images, order="CHW", boxes=None):
"""
Horizontally flip the list of image and optional boxes.
Args:
prob (float): probability to flip.
image (list): ilist of images to perform short side scale. Dimension is
`height` x `width` x `channel` or `channel` x `height` x `width`.
order (str): order of the `height`, `channel` and `width`.
boxes (list): optional. Corresponding boxes to images.
Dimension is `num boxes` x 4.
Returns:
(ndarray): the scaled image with dimension of
`height` x `width` x `channel`.
(list): optional. Corresponding boxes to images. Dimension is
`num boxes` x 4.
"""
_, width, _ = images[0].shape
if np.random.uniform() < prob:
if boxes is not None:
boxes = [flip_boxes(proposal, width) for proposal in boxes]
if order == "CHW":
out_images = []
for image in images:
image = np.asarray(image).swapaxes(2, 0)
image = image[::-1]
out_images.append(image.swapaxes(0, 2))
return out_images, boxes
elif order == "HWC":
return [cv2.flip(image, 1) for image in images], boxes
return images, boxes
def spatial_shift_crop_list(size, images, spatial_shift_pos, boxes=None):
"""
Perform left, center, or right crop of the given list of images.
Args:
size (int): size to crop.
image (list): ilist of images to perform short side scale. Dimension is
`height` x `width` x `channel` or `channel` x `height` x `width`.
spatial_shift_pos (int): option includes 0 (left), 1 (middle), and
2 (right) crop.
boxes (list): optional. Corresponding boxes to images.
Dimension is `num boxes` x 4.
Returns:
cropped (ndarray): the cropped list of images with dimension of
`height` x `width` x `channel`.
boxes (list): optional. Corresponding boxes to images. Dimension is
`num boxes` x 4.
"""
assert spatial_shift_pos in [0, 1, 2]
height = images[0].shape[0]
width = images[0].shape[1]
y_offset = int(math.ceil((height - size) / 2))
x_offset = int(math.ceil((width - size) / 2))
if height > width:
if spatial_shift_pos == 0:
y_offset = 0
elif spatial_shift_pos == 2:
y_offset = height - size
else:
if spatial_shift_pos == 0:
x_offset = 0
elif spatial_shift_pos == 2:
x_offset = width - size
cropped = [
image[y_offset : y_offset + size, x_offset : x_offset + size, :]
for image in images
]
assert cropped[0].shape[0] == size, "Image height not cropped properly"
assert cropped[0].shape[1] == size, "Image width not cropped properly"
if boxes is not None:
for i in range(len(boxes)):
boxes[i][:, [0, 2]] -= x_offset
boxes[i][:, [1, 3]] -= y_offset
return cropped, boxes
def CHW2HWC(image):
"""
Transpose the dimension from `channel` x `height` x `width` to
`height` x `width` x `channel`.
Args:
image (array): image to transpose.
Returns
(array): transposed image.
"""
return image.transpose([1, 2, 0])
def HWC2CHW(image):
"""
Transpose the dimension from `height` x `width` x `channel` to
`channel` x `height` x `width`.
Args:
image (array): image to transpose.
Returns
(array): transposed image.
"""
return image.transpose([2, 0, 1])
def color_jitter_list(
images, img_brightness=0, img_contrast=0, img_saturation=0
):
"""
Perform color jitter on the list of images.
Args:
images (list): list of images to perform color jitter.
img_brightness (float): jitter ratio for brightness.
img_contrast (float): jitter ratio for contrast.
img_saturation (float): jitter ratio for saturation.
Returns:
images (list): the jittered list of images.
"""
jitter = []
if img_brightness != 0:
jitter.append("brightness")
if img_contrast != 0:
jitter.append("contrast")
if img_saturation != 0:
jitter.append("saturation")
if len(jitter) > 0:
order = np.random.permutation(np.arange(len(jitter)))
for idx in range(0, len(jitter)):
if jitter[order[idx]] == "brightness":
images = brightness_list(img_brightness, images)
elif jitter[order[idx]] == "contrast":
images = contrast_list(img_contrast, images)
elif jitter[order[idx]] == "saturation":
images = saturation_list(img_saturation, images)
return images
def lighting_list(imgs, alphastd, eigval, eigvec, alpha=None):
"""
Perform AlexNet-style PCA jitter on the given list of images.
Args:
images (list): list of images to perform lighting jitter.
alphastd (float): jitter ratio for PCA jitter.
eigval (list): eigenvalues for PCA jitter.
eigvec (list[list]): eigenvectors for PCA jitter.
Returns:
out_images (list): the list of jittered images.
"""
if alphastd == 0:
return imgs
# generate alpha1, alpha2, alpha3
alpha = np.random.normal(0, alphastd, size=(1, 3))
eig_vec = np.array(eigvec)
eig_val = np.reshape(eigval, (1, 3))
rgb = np.sum(
eig_vec * np.repeat(alpha, 3, axis=0) * np.repeat(eig_val, 3, axis=0),
axis=1,
)
out_images = []
for img in imgs:
for idx in range(img.shape[0]):
img[idx] = img[idx] + rgb[2 - idx]
out_images.append(img)
return out_images
def color_normalization(image, mean, stddev):
"""
Perform color normalization on the image with the given mean and stddev.
Args:
image (array): image to perform color normalization.
mean (float): mean value to subtract.
stddev (float): stddev to devide.
"""
# Input image should in format of CHW
assert len(mean) == image.shape[0], "channel mean not computed properly"
assert len(stddev) == image.shape[0], "channel stddev not computed properly"
for idx in range(image.shape[0]):
image[idx] = image[idx] - mean[idx]
image[idx] = image[idx] / stddev[idx]
return image
def pad_image(image, pad_size, order="CHW"):
"""
Pad the given image with the size of pad_size.
Args:
image (array): image to pad.
pad_size (int): size to pad.
order (str): order of the `height`, `channel` and `width`.
Returns:
img (array): padded image.
"""
if order == "CHW":
img = np.pad(
image,
((0, 0), (pad_size, pad_size), (pad_size, pad_size)),
mode=str("constant"),
)
elif order == "HWC":
img = np.pad(
image,
((pad_size, pad_size), (pad_size, pad_size), (0, 0)),
mode=str("constant"),
)
return img
def horizontal_flip(prob, image, order="CHW"):
"""
Horizontally flip the image.
Args:
prob (float): probability to flip.
image (array): image to pad.
order (str): order of the `height`, `channel` and `width`.
Returns:
img (array): flipped image.
"""
assert order in ["CHW", "HWC"], "order {} is not supported".format(order)
if np.random.uniform() < prob:
if order == "CHW":
image = image[:, :, ::-1]
elif order == "HWC":
image = image[:, ::-1, :]
else:
raise NotImplementedError("Unknown order {}".format(order))
return image
def flip_boxes(boxes, im_width):
"""
Horizontally flip the boxes.
Args:
boxes (array): box to flip.
im_width (int): width of the image.
Returns:
boxes_flipped (array): flipped box.
"""
boxes_flipped = boxes.copy()
boxes_flipped[:, 0::4] = im_width - boxes[:, 2::4] - 1
boxes_flipped[:, 2::4] = im_width - boxes[:, 0::4] - 1
return boxes_flipped
def crop_boxes(boxes, x_offset, y_offset):
"""
Crop the boxes given the offsets.
Args:
boxes (array): boxes to crop.
x_offset (int): offset on x.
y_offset (int): offset on y.
"""
boxes[:, [0, 2]] = boxes[:, [0, 2]] - x_offset
boxes[:, [1, 3]] = boxes[:, [1, 3]] - y_offset
return boxes
def random_crop_list(images, size, pad_size=0, order="CHW", boxes=None):
"""
Perform random crop on a list of images.
Args:
images (list): list of images to perform random crop.
size (int): size to crop.
pad_size (int): padding size.
order (str): order of the `height`, `channel` and `width`.
boxes (list): optional. Corresponding boxes to images.
Dimension is `num boxes` x 4.
Returns:
cropped (ndarray): the cropped list of images with dimension of
`height` x `width` x `channel`.
boxes (list): optional. Corresponding boxes to images. Dimension is
`num boxes` x 4.
"""
# explicitly dealing processing per image order to avoid flipping images.
if pad_size > 0:
images = [
pad_image(pad_size=pad_size, image=image, order=order)
for image in images
]
# image format should be CHW.
if order == "CHW":
if images[0].shape[1] == size and images[0].shape[2] == size:
return images, boxes
height = images[0].shape[1]
width = images[0].shape[2]
y_offset = 0
if height > size:
y_offset = int(np.random.randint(0, height - size))
x_offset = 0
if width > size:
x_offset = int(np.random.randint(0, width - size))
cropped = [
image[:, y_offset : y_offset + size, x_offset : x_offset + size]
for image in images
]
assert cropped[0].shape[1] == size, "Image not cropped properly"
assert cropped[0].shape[2] == size, "Image not cropped properly"
elif order == "HWC":
if images[0].shape[0] == size and images[0].shape[1] == size:
return images, boxes
height = images[0].shape[0]
width = images[0].shape[1]
y_offset = 0
if height > size:
y_offset = int(np.random.randint(0, height - size))
x_offset = 0
if width > size:
x_offset = int(np.random.randint(0, width - size))
cropped = [
image[y_offset : y_offset + size, x_offset : x_offset + size, :]
for image in images
]
assert cropped[0].shape[0] == size, "Image not cropped properly"
assert cropped[0].shape[1] == size, "Image not cropped properly"
if boxes is not None:
boxes = [crop_boxes(proposal, x_offset, y_offset) for proposal in boxes]
return cropped, boxes
def center_crop(size, image):
"""
Perform center crop on input images.
Args:
size (int): size of the cropped height and width.
image (array): the image to perform center crop.
"""
height = image.shape[0]
width = image.shape[1]
y_offset = int(math.ceil((height - size) / 2))
x_offset = int(math.ceil((width - size) / 2))
cropped = image[y_offset : y_offset + size, x_offset : x_offset + size, :]
assert cropped.shape[0] == size, "Image height not cropped properly"
assert cropped.shape[1] == size, "Image width not cropped properly"
return cropped
# ResNet style scale jittering: randomly select the scale from
# [1/max_size, 1/min_size]
def random_scale_jitter(image, min_size, max_size):
"""
Perform ResNet style random scale jittering: randomly select the scale from
[1/max_size, 1/min_size].
Args:
image (array): image to perform random scale.
min_size (int): min size to scale.
max_size (int) max size to scale.
Returns:
image (array): scaled image.
"""
img_scale = int(
round(1.0 / np.random.uniform(1.0 / max_size, 1.0 / min_size))
)
image = scale(img_scale, image)
return image
def random_scale_jitter_list(images, min_size, max_size):
"""
Perform ResNet style random scale jittering on a list of image: randomly
select the scale from [1/max_size, 1/min_size]. Note that all the image
will share the same scale.
Args:
images (list): list of images to perform random scale.
min_size (int): min size to scale.
max_size (int) max size to scale.
Returns:
images (list): list of scaled image.
"""
img_scale = int(
round(1.0 / np.random.uniform(1.0 / max_size, 1.0 / min_size))
)
return [scale(img_scale, image) for image in images]
def random_sized_crop(image, size, area_frac=0.08):
"""
Perform random sized cropping on the given image. Random crop with size
8% - 100% image area and aspect ratio in [3/4, 4/3].
Args:
image (array): image to crop.
size (int): size to crop.
area_frac (float): area of fraction.
Returns:
(array): cropped image.
"""
for _ in range(0, 10):
height = image.shape[0]
width = image.shape[1]
area = height * width
target_area = np.random.uniform(area_frac, 1.0) * area
aspect_ratio = np.random.uniform(3.0 / 4.0, 4.0 / 3.0)
w = int(round(math.sqrt(float(target_area) * aspect_ratio)))
h = int(round(math.sqrt(float(target_area) / aspect_ratio)))
if np.random.uniform() < 0.5:
w, h = h, w
if h <= height and w <= width:
if height == h:
y_offset = 0
else:
y_offset = np.random.randint(0, height - h)
if width == w:
x_offset = 0
else:
x_offset = np.random.randint(0, width - w)
y_offset = int(y_offset)
x_offset = int(x_offset)
cropped = image[y_offset : y_offset + h, x_offset : x_offset + w, :]
assert (
cropped.shape[0] == h and cropped.shape[1] == w
), "Wrong crop size"
cropped = cv2.resize(
cropped, (size, size), interpolation=cv2.INTER_LINEAR
)
return cropped.astype(np.float32)
return center_crop(size, scale(size, image))
def lighting(img, alphastd, eigval, eigvec):
"""
Perform AlexNet-style PCA jitter on the given image.
Args:
image (array): list of images to perform lighting jitter.
alphastd (float): jitter ratio for PCA jitter.
eigval (array): eigenvalues for PCA jitter.
eigvec (list): eigenvectors for PCA jitter.
Returns:
img (tensor): the jittered image.
"""
if alphastd == 0:
return img
# generate alpha1, alpha2, alpha3.
alpha = np.random.normal(0, alphastd, size=(1, 3))
eig_vec = np.array(eigvec)
eig_val = np.reshape(eigval, (1, 3))
rgb = np.sum(
eig_vec * np.repeat(alpha, 3, axis=0) * np.repeat(eig_val, 3, axis=0),
axis=1,
)
for idx in range(img.shape[0]):
img[idx] = img[idx] + rgb[2 - idx]
return img
def random_sized_crop_list(images, size, crop_area_fraction=0.08):
"""
Perform random sized cropping on the given list of images. Random crop with
size 8% - 100% image area and aspect ratio in [3/4, 4/3].
Args:
images (list): image to crop.
size (int): size to crop.
area_frac (float): area of fraction.
Returns:
(list): list of cropped image.
"""
for _ in range(0, 10):
height = images[0].shape[0]
width = images[0].shape[1]
area = height * width
target_area = np.random.uniform(crop_area_fraction, 1.0) * area
aspect_ratio = np.random.uniform(3.0 / 4.0, 4.0 / 3.0)
w = int(round(math.sqrt(float(target_area) * aspect_ratio)))
h = int(round(math.sqrt(float(target_area) / aspect_ratio)))
if np.random.uniform() < 0.5:
w, h = h, w
if h <= height and w <= width:
if height == h:
y_offset = 0
else:
y_offset = np.random.randint(0, height - h)
if width == w:
x_offset = 0
else:
x_offset = np.random.randint(0, width - w)
y_offset = int(y_offset)
x_offset = int(x_offset)
croppsed_images = []
for image in images:
cropped = image[
y_offset : y_offset + h, x_offset : x_offset + w, :
]
assert (
cropped.shape[0] == h and cropped.shape[1] == w
), "Wrong crop size"
cropped = cv2.resize(
cropped, (size, size), interpolation=cv2.INTER_LINEAR
)
croppsed_images.append(cropped.astype(np.float32))
return croppsed_images
return [center_crop(size, scale(size, image)) for image in images]
def blend(image1, image2, alpha):
return image1 * alpha + image2 * (1 - alpha)
def grayscale(image):
"""
Convert the image to gray scale.
Args:
image (tensor): image to convert to gray scale. Dimension is
`channel` x `height` x `width`.
Returns:
img_gray (tensor): image in gray scale.
"""
# R -> 0.299, G -> 0.587, B -> 0.114.
img_gray = np.copy(image)
gray_channel = 0.299 * image[2] + 0.587 * image[1] + 0.114 * image[0]
img_gray[0] = gray_channel
img_gray[1] = gray_channel
img_gray[2] = gray_channel
return img_gray
def saturation(var, image):
"""
Perform color saturation on the given image.
Args:
var (float): variance.
image (array): image to perform color saturation.
Returns:
(array): image that performed color saturation.
"""
img_gray = grayscale(image)
alpha = 1.0 + np.random.uniform(-var, var)
return blend(image, img_gray, alpha)
def brightness(var, image):
"""
Perform color brightness on the given image.
Args:
var (float): variance.
image (array): image to perform color brightness.
Returns:
(array): image that performed color brightness.
"""
img_bright = np.zeros(image.shape).astype(image.dtype)
alpha = 1.0 + np.random.uniform(-var, var)
return blend(image, img_bright, alpha)
def contrast(var, image):
"""
Perform color contrast on the given image.
Args:
var (float): variance.
image (array): image to perform color contrast.
Returns:
(array): image that performed color contrast.
"""
img_gray = grayscale(image)
img_gray.fill(np.mean(img_gray[0]))
alpha = 1.0 + np.random.uniform(-var, var)
return blend(image, img_gray, alpha)
def saturation_list(var, images):
"""
Perform color saturation on the list of given images.
Args:
var (float): variance.
images (list): list of images to perform color saturation.
Returns:
(list): list of images that performed color saturation.
"""
alpha = 1.0 + np.random.uniform(-var, var)
out_images = []
for image in images:
img_gray = grayscale(image)
out_images.append(blend(image, img_gray, alpha))
return out_images
def brightness_list(var, images):
"""
Perform color brightness on the given list of images.
Args:
var (float): variance.
images (list): list of images to perform color brightness.
Returns:
(array): list of images that performed color brightness.
"""
alpha = 1.0 + np.random.uniform(-var, var)
out_images = []
for image in images:
img_bright = np.zeros(image.shape).astype(image.dtype)
out_images.append(blend(image, img_bright, alpha))
return out_images
def contrast_list(var, images):
"""
Perform color contrast on the given list of images.
Args:
var (float): variance.
images (list): list of images to perform color contrast.
Returns:
(array): image that performed color contrast.
"""
alpha = 1.0 + np.random.uniform(-var, var)
out_images = []
for image in images:
img_gray = grayscale(image)
img_gray.fill(np.mean(img_gray[0]))
out_images.append(blend(image, img_gray, alpha))
return out_images
def color_jitter(image, img_brightness=0, img_contrast=0, img_saturation=0):
"""
Perform color jitter on the given image.
Args:
image (array): image to perform color jitter.
img_brightness (float): jitter ratio for brightness.
img_contrast (float): jitter ratio for contrast.
img_saturation (float): jitter ratio for saturation.
Returns:
image (array): the jittered image.
"""
jitter = []
if img_brightness != 0:
jitter.append("brightness")
if img_contrast != 0:
jitter.append("contrast")
if img_saturation != 0:
jitter.append("saturation")
if len(jitter) > 0:
order = np.random.permutation(np.arange(len(jitter)))
for idx in range(0, len(jitter)):
if jitter[order[idx]] == "brightness":
image = brightness(img_brightness, image)
elif jitter[order[idx]] == "contrast":
image = contrast(img_contrast, image)
elif jitter[order[idx]] == "saturation":
image = saturation(img_saturation, image)
return image
def revert_scaled_boxes(size, boxes, img_height, img_width):
"""
Revert scaled input boxes to match the original image size.
Args:
size (int): size of the cropped image.
boxes (array): shape (num_boxes, 4).
img_height (int): height of original image.
img_width (int): width of original image.
Returns:
reverted_boxes (array): boxes scaled back to the original image size.
"""
scaled_aspect = np.min([img_height, img_width])
scale_ratio = scaled_aspect / size
reverted_boxes = boxes * scale_ratio
return reverted_boxes
================================================
FILE: build/lib/slowfast/datasets/decoder.py
================================================
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
import math
import numpy as np
import random
import torch
import torchvision.io as io
def temporal_sampling(frames, start_idx, end_idx, num_samples):
"""
Given the start and end frame index, sample num_samples frames between
the start and end with equal interval.
Args:
frames (tensor): a tensor of video frames, dimension is
`num video frames` x `channel` x `height` x `width`.
start_idx (int): the index of the start frame.
end_idx (int): the index of the end frame.
num_samples (int): number of frames to sample.
Returns:
frames (tersor): a tensor of temporal sampled video frames, dimension is
`num clip frames` x `channel` x `height` x `width`.
"""
index = torch.linspace(start_idx, end_idx, num_samples)
index = torch.clamp(index, 0, frames.shape[0] - 1).long()
frames = torch.index_select(frames, 0, index)
return frames
def get_start_end_idx(
video_size, clip_size, clip_idx, num_clips, use_offset=False
):
"""
Sample a clip of size clip_size from a video of size video_size and
return the indices of the first and last frame of the clip. If clip_idx is
-1, the clip is randomly sampled, otherwise uniformly split the video to
num_clips clips, and select the start and end index of clip_idx-th video
clip.
Args:
video_size (int): number of overall frames.
clip_size (int): size of the clip to sample from the frames.
clip_idx (int): if clip_idx is -1, perform random jitter sampling. If
clip_idx is larger than -1, uniformly split the video to num_clips
clips, and select the start and end index of the clip_idx-th video
clip.
num_clips (int): overall number of clips to uniformly sample from the
given video for testing.
Returns:
start_idx (int): the start frame index.
end_idx (int): the end frame index.
"""
delta = max(video_size - clip_size, 0)
if clip_idx == -1:
# Random temporal sampling.
start_idx = random.uniform(0, delta)
else:
if use_offset:
if num_clips == 1:
# Take the center clip if num_clips is 1.
start_idx = math.floor(delta / 2)
else:
# Uniformly sample the clip with the given index.
start_idx = clip_idx * math.floor(delta / (num_clips - 1))
else:
# Uniformly sample the clip with the given index.
start_idx = delta * clip_idx / num_clips
end_idx = start_idx + clip_size - 1
return start_idx, end_idx
def pyav_decode_stream(
container, start_pts, end_pts, stream, stream_name, buffer_size=0
):
"""
Decode the video with PyAV decoder.
Args:
container (container): PyAV container.
start_pts (int): the starting Presentation TimeStamp to fetch the
video frames.
end_pts (int): the ending Presentation TimeStamp of the decoded frames.
stream (stream): PyAV stream.
stream_name (dict): a dictionary of streams. For example, {"video": 0}
means video stream at stream index 0.
buffer_size (int): number of additional frames to decode beyond end_pts.
Returns:
result (list): list of frames decoded.
max_pts (int): max Presentation TimeStamp of the video sequence.
"""
# Seeking in the stream is imprecise. Thus, seek to an ealier PTS by a
# margin pts.
margin = 1024
seek_offset = max(start_pts - margin, 0)
container.seek(seek_offset, any_frame=False, backward=True, stream=stream)
frames = {}
buffer_count = 0
max_pts = 0
for frame in container.decode(**stream_name):
max_pts = max(max_pts, frame.pts)
if frame.pts < start_pts:
continue
if frame.pts <= end_pts:
frames[frame.pts] = frame
else:
buffer_count += 1
frames[frame.pts] = frame
if buffer_count >= buffer_size:
break
result = [frames[pts] for pts in sorted(frames)]
return result, max_pts
def torchvision_decode(
video_handle,
sampling_rate,
num_frames,
clip_idx,
video_meta,
num_clips=10,
target_fps=30,
modalities=("visual",),
max_spatial_scale=0,
use_offset=False,
):
"""
If video_meta is not empty, perform temporal selective decoding to sample a
clip from the video with TorchVision decoder. If video_meta is empty, decode
the entire video and update the video_meta.
Args:
video_handle (bytes): raw bytes of the video file.
sampling_rate (int): frame sampling rate (interval between two sampled
frames).
num_frames (int): number of frames to sample.
clip_idx (int): if clip_idx is -1, perform random temporal
sampling. If clip_idx is larger than -1, uniformly split the
video to num_clips clips, and select the clip_idx-th video clip.
video_meta (dict): a dict contains VideoMetaData. Details can be found
at `pytorch/vision/torchvision/io/_video_opt.py`.
num_clips (int): overall number of clips to uniformly sample from the
given video.
target_fps (int): the input video may has different fps, convert it to
the target video fps.
modalities (tuple): tuple of modalities to decode. Currently only
support `visual`, planning to support `acoustic` soon.
max_spatial_scale (int): the maximal resolution of the spatial shorter
edge size during decoding.
Returns:
frames (tensor): decoded frames from the video.
fps (float): the number of frames per second of the video.
decode_all_video (bool): if True, the entire video was decoded.
"""
# Convert the bytes to a tensor.
video_tensor = torch.from_numpy(np.frombuffer(video_handle, dtype=np.uint8))
decode_all_video = True
video_start_pts, video_end_pts = 0, -1
# The video_meta is empty, fetch the meta data from the raw video.
if len(video_meta) == 0:
# Tracking the meta info for selective decoding in the future.
meta = io._probe_video_from_memory(video_tensor)
# Using the information from video_meta to perform selective decoding.
video_meta["video_timebase"] = meta.video_timebase
video_meta["video_numerator"] = meta.video_timebase.numerator
video_meta["video_denominator"] = meta.video_timebase.denominator
video_meta["has_video"] = meta.has_video
video_meta["video_duration"] = meta.video_duration
video_meta["video_fps"] = meta.video_fps
video_meta["audio_timebas"] = meta.audio_timebase
video_meta["audio_numerator"] = meta.audio_timebase.numerator
video_meta["audio_denominator"] = meta.audio_timebase.denominator
video_meta["has_audio"] = meta.has_audio
video_meta["audio_duration"] = meta.audio_duration
video_meta["audio_sample_rate"] = meta.audio_sample_rate
fps = video_meta["video_fps"]
if (
video_meta["has_video"]
and video_meta["video_denominator"] > 0
and video_meta["video_duration"] > 0
):
# try selective decoding.
decode_all_video = False
clip_size = sampling_rate * num_frames / target_fps * fps
start_idx, end_idx = get_start_end_idx(
fps * video_meta["video_duration"],
clip_size,
clip_idx,
num_clips,
use_offset=use_offset,
)
# Convert frame index to pts.
pts_per_frame = video_meta["video_denominator"] / fps
video_start_pts = int(start_idx * pts_per_frame)
video_end_pts = int(end_idx * pts_per_frame)
# Decode the raw video with the tv decoder.
v_frames, _ = io._read_video_from_memory(
video_tensor,
seek_frame_margin=1.0,
read_video_stream="visual" in modalities,
video_width=0,
video_height=0,
video_min_dimension=max_spatial_scale,
video_pts_range=(video_start_pts, video_end_pts),
video_timebase_numerator=video_meta["video_numerator"],
video_timebase_denominator=video_meta["video_denominator"],
)
if v_frames.shape == torch.Size([0]):
# failed selective decoding
decode_all_video = True
video_start_pts, video_end_pts = 0, -1
v_frames, _ = io._read_video_from_memory(
video_tensor,
seek_frame_margin=1.0,
read_video_stream="visual" in modalities,
video_width=0,
video_height=0,
video_min_dimension=max_spatial_scale,
video_pts_range=(video_start_pts, video_end_pts),
video_timebase_numerator=video_meta["video_numerator"],
video_timebase_denominator=video_meta["video_denominator"],
)
return v_frames, fps, decode_all_video
def pyav_decode(
container,
sampling_rate,
num_frames,
clip_idx,
num_clips=10,
target_fps=30,
use_offset=False,
):
"""
Convert the video from its original fps to the target_fps. If the video
support selective decoding (contain decoding information in the video head),
the perform temporal selective decoding and sample a clip from the video
with the PyAV decoder. If the video does not support selective decoding,
decode the entire video.
Args:
container (container): pyav container.
sampling_rate (int): frame sampling rate (interval between two sampled
frames.
num_frames (int): number of frames to sample.
clip_idx (int): if clip_idx is -1, perform random temporal sampling. If
clip_idx is larger than -1, uniformly split the video to num_clips
clips, and select the clip_idx-th video clip.
num_clips (int): overall number of clips to uniformly sample from the
given video.
target_fps (int): the input video may has different fps, convert it to
the target video fps before frame sampling.
Returns:
frames (tensor): decoded frames from the video. Return None if the no
video stream was found.
fps (float): the number of frames per second of the video.
decode_all_video (bool): If True, the entire video was decoded.
"""
# Try to fetch the decoding information from the video head. Some of the
# videos does not support fetching the decoding information, for that case
# it will get None duration.
fps = float(container.streams.video[0].average_rate)
frames_length = container.streams.video[0].frames
duration = container.streams.video[0].duration
if duration is None:
# If failed to fetch the decoding information, decode the entire video.
decode_all_video = True
video_start_pts, video_end_pts = 0, math.inf
else:
# Perform selective decoding.
decode_all_video = False
start_idx, end_idx = get_start_end_idx(
frames_length,
sampling_rate * num_frames / target_fps * fps,
clip_idx,
num_clips,
use_offset=use_offset,
)
timebase = duration / frames_length
video_start_pts = int(start_idx * timebase)
video_end_pts = int(end_idx * timebase)
frames = None
# If video stream was found, fetch video frames from the video.
if container.streams.video:
video_frames, max_pts = pyav_decode_stream(
container,
video_start_pts,
video_end_pts,
container.streams.video[0],
{"video": 0},
)
container.close()
frames = [frame.to_rgb().to_ndarray() for frame in video_frames]
frames = torch.as_tensor(np.stack(frames))
return frames, fps, decode_all_video
def decode(
container,
sampling_rate,
num_frames,
clip_idx=-1,
num_clips=10,
video_meta=None,
target_fps=30,
backend="pyav",
max_spatial_scale=0,
use_offset=False,
):
"""
Decode the video and perform temporal sampling.
Args:
container (container): pyav container.
sampling_rate (int): frame sampling rate (interval between two sampled
frames).
num_frames (int): number of frames to sample.
clip_idx (int): if clip_idx is -1, perform random temporal
sampling. If clip_idx is larger than -1, uniformly split the
video to num_clips clips, and select the
clip_idx-th video clip.
num_clips (int): overall number of clips to uniformly
sample from the given video.
video_meta (dict): a dict contains VideoMetaData. Details can be find
at `pytorch/vision/torchvision/io/_video_opt.py`.
target_fps (int): the input video may have different fps, convert it to
the target video fps before frame sampling.
backend (str): decoding backend includes `pyav` and `torchvision`. The
default one is `pyav`.
max_spatial_scale (int): keep the aspect ratio and resize the frame so
that shorter edge size is max_spatial_scale. Only used in
`torchvision` backend.
Returns:
frames (tensor): decoded frames from the video.
"""
# Currently support two decoders: 1) PyAV, and 2) TorchVision.
assert clip_idx >= -1, "Not valied clip_idx {}".format(clip_idx)
try:
if backend == "pyav":
frames, fps, decode_all_video = pyav_decode(
container,
sampling_rate,
num_frames,
clip_idx,
num_clips,
target_fps,
use_offset=use_offset,
)
elif backend == "torchvision":
frames, fps, decode_all_video = torchvision_decode(
container,
sampling_rate,
num_frames,
clip_idx,
video_meta,
num_clips,
target_fps,
("visual",),
max_spatial_scale,
use_offset=use_offset,
)
elif backend == "decord":
frames = container
else:
raise NotImplementedError(
"Unknown decoding backend {}".format(backend)
)
except Exception as e:
print("Failed to decode by {} with exception: {}".format(backend, e))
return None
# Return None if the frames was not decoded successfully.
if backend in ["pyav", "torchvision"]:
if frames is None or frames.size(0) == 0:
return None
elif backend == "decord":
if frames is None:
return None
if backend in ["pyav", "torchvision"]:
clip_sz = sampling_rate * num_frames / target_fps * fps
start_idx, end_idx = get_start_end_idx(
frames.shape[0],
clip_sz,
clip_idx if decode_all_video else 0,
num_clips if decode_all_video else 1,
use_offset=use_offset,
)
# Perform temporal sampling from the decoded video.
frames = temporal_sampling(frames, start_idx, end_idx, num_frames)
elif backend == "decord":
clip_sz = sampling_rate * num_frames
start_idx, end_idx = get_start_end_idx(
len(frames),
clip_sz,
clip_idx,
num_clips,
use_offset=use_offset,
)
index = torch.linspace(start_idx, end_idx, num_frames)
index = torch.clamp(index, 0, len(frames) - 1).long()
tmp_frames = [frames[i.item()] for i in index]
frames = torch.stack(tmp_frames)
return frames
================================================
FILE: build/lib/slowfast/datasets/imagenet.py
================================================
# Copyright (c) Facebook, Inc. and its affiliates.
import json
import numpy as np
import os
import random
import re
import torch
import torch.utils.data
from PIL import Image
from torchvision import transforms as transforms_tv
import slowfast.datasets.transform as transform
import slowfast.utils.logging as logging
# import cv2
from slowfast.utils.env import pathmgr
from .build import DATASET_REGISTRY
from .transform import transforms_imagenet_train
logger = logging.get_logger(__name__)
@DATASET_REGISTRY.register()
class Imagenet(torch.utils.data.Dataset):
"""ImageNet dataset."""
def __init__(self, cfg, mode, num_retries=10):
self.num_retries = num_retries
self.cfg = cfg
self.mode = mode
self.data_path = cfg.DATA.PATH_TO_DATA_DIR
assert mode in [
"train",
"val",
"test",
], "Split '{}' not supported for ImageNet".format(mode)
logger.info("Constructing ImageNet {}...".format(mode))
if cfg.DATA.PATH_TO_PRELOAD_IMDB == "":
self._construct_imdb()
else:
self._load_imdb()
def _load_imdb(self):
split_path = os.path.join(
self.cfg.DATA.PATH_TO_PRELOAD_IMDB, f"{self.mode}.json"
)
with pathmgr.open(split_path, "r") as f:
data = f.read()
self._imdb = json.loads(data)
def _construct_imdb(self):
"""Constructs the imdb."""
# Compile the split data path
split_path = os.path.join(self.data_path, self.mode)
logger.info("{} data path: {}".format(self.mode, split_path))
# Images are stored per class in subdirs (format: n<number>)
split_files = pathmgr.ls(split_path)
self._class_ids = sorted(
f for f in split_files if re.match(r"^n[0-9]+$", f)
)
# Map ImageNet class ids to contiguous ids
self._class_id_cont_id = {v: i for i, v in enumerate(self._class_ids)}
# Construct the image db
self._imdb = []
for class_id in self._class_ids:
cont_id = self._class_id_cont_id[class_id]
im_dir = os.path.join(split_path, class_id)
for im_name in pathmgr.ls(im_dir):
im_path = os.path.join(im_dir, im_name)
self._imdb.append({"im_path": im_path, "class": cont_id})
logger.info("Number of images: {}".format(len(self._imdb)))
logger.info("Number of classes: {}".format(len(self._class_ids)))
def load_image(self, im_path):
"""Prepares the image for network input with format of CHW RGB float"""
with pathmgr.open(im_path, "rb") as f:
with Image.open(f) as im:
im = im.convert("RGB")
im = torch.from_numpy(np.array(im).astype(np.float32) / 255.0)
# H W C to C H W
im = im.permute([2, 0, 1])
return im
def _prepare_im_res(self, im_path):
# Prepare resnet style augmentation.
im = self.load_image(im_path)
# Train and test setups differ
train_size, test_size = (
self.cfg.DATA.TRAIN_CROP_SIZE,
self.cfg.DATA.TEST_CROP_SIZE,
)
if self.mode == "train":
# For training use random_sized_crop, horizontal_flip, augment, lighting
im = transform.random_sized_crop_img(
im,
train_size,
jitter_scale=self.cfg.DATA.TRAIN_JITTER_SCALES_RELATIVE,
jitter_aspect=self.cfg.DATA.TRAIN_JITTER_ASPECT_RELATIVE,
)
im, _ = transform.horizontal_flip(prob=0.5, images=im)
# im = transforms.augment(im, cfg.TRAIN.AUGMENT)
im = transform.lighting_jitter(
im,
0.1,
self.cfg.DATA.TRAIN_PCA_EIGVAL,
self.cfg.DATA.TRAIN_PCA_EIGVEC,
)
else:
# For testing use scale and center crop
im, _ = transform.uniform_crop(
im, test_size, spatial_idx=1, scale_size=train_size
)
# For training and testing use color normalization
im = transform.color_normalization(
im, self.cfg.DATA.MEAN, self.cfg.DATA.STD
)
# Convert HWC/RGB/float to CHW/BGR/float format
# im = np.ascontiguousarray(im[:, :, ::-1].transpose([2, 0, 1]))
return im
def _prepare_im_tf(self, im_path):
with pathmgr.open(im_path, "rb") as f:
with Image.open(f) as im:
im = im.convert("RGB")
# Convert HWC/BGR/int to HWC/RGB/float format for applying transforms
train_size, test_size = (
self.cfg.DATA.TRAIN_CROP_SIZE,
self.cfg.DATA.TEST_CROP_SIZE,
)
if self.mode == "train":
aug_transform = transforms_imagenet_train(
img_size=(train_size, train_size),
color_jitter=self.cfg.AUG.COLOR_JITTER,
auto_augment=self.cfg.AUG.AA_TYPE,
interpolation=self.cfg.AUG.INTERPOLATION,
re_prob=self.cfg.AUG.RE_PROB,
re_mode=self.cfg.AUG.RE_MODE,
re_count=self.cfg.AUG.RE_COUNT,
mean=self.cfg.DATA.MEAN,
std=self.cfg.DATA.STD,
)
else:
t = []
size = int((256 / 224) * test_size)
t.append(
transforms_tv.Resize(
size, interpolation=3
), # to maintain same ratio w.r.t. 224 images
)
t.append(transforms_tv.CenterCrop(test_size))
t.append(transforms_tv.ToTensor())
t.append(
transforms_tv.Normalize(self.cfg.DATA.MEAN, self.cfg.DATA.STD)
)
aug_transform = transforms_tv.Compose(t)
im = aug_transform(im)
return im
def __load__(self, index):
try:
# Load the image
im_path = self._imdb[index]["im_path"]
# Prepare the image for training / testing
if self.cfg.AUG.ENABLE:
if self.mode == "train" and self.cfg.AUG.NUM_SAMPLE > 1:
im = []
for _ in range(self.cfg.AUG.NUM_SAMPLE):
crop = self._prepare_im_tf(im_path)
im.append(crop)
return im
else:
im = self._prepare_im_tf(im_path)
return im
else:
im = self._prepare_im_res(im_path)
return im
except Exception:
return None
def __getitem__(self, index):
# if the current image is corrupted, load a different image.
for _ in range(self.num_retries):
im = self.__load__(index)
# Data corrupted, retry with a different image.
if im is None:
index = random.randint(0, len(self._imdb) - 1)
else:
break
# Retrieve the label
label = self._imdb[index]["class"]
if isinstance(im, list):
label = [label for _ in range(len(im))]
dummy = [torch.Tensor() for _ in range(len(im))]
return im, label, dummy, dummy, {}
else:
dummy = torch.Tensor()
return [im], label, dummy, dummy, {}
def __len__(self):
return len(self._imdb)
================================================
FILE: build/lib/slowfast/datasets/kinetics.py
================================================
#!/usr/bin/env python3
# modified from https://github.com/facebookresearch/SlowFast
import os
import random
import torch
import torch.utils.data
from iopath.common.file_io import g_pathmgr
from torchvision import transforms
import slowfast.utils.logging as logging
from . import decoder as decoder
from . import utils as utils
from . import video_container as container
from .build import DATASET_REGISTRY
from .random_erasing import RandomErasing
from .transform import create_random_augment
logger = logging.get_logger(__name__)
@DATASET_REGISTRY.register()
class Kinetics(torch.utils.data.Dataset):
"""
Kinetics video loader. Construct the Kinetics video loader, then sample
clips from the videos. For training and validation, a single clip is
randomly sampled from every video with random cropping, scaling, and
flipping. For testing, multiple clips are uniformaly sampled from every
video with uniform cropping. For uniform cropping, we take the left, center,
and right crop if the width is larger than height, or take top, center, and
bottom crop if the height is larger than the width.
"""
def __init__(self, cfg, mode, num_retries=10):
"""
Construct the Kinetics video loader with a given csv file. The format of
the csv file is:
```
path_to_video_1 label_1
path_to_video_2 label_2
...
path_to_video_N label_N
```
Args:
cfg (CfgNode): configs.
mode (string): Options includes `train`, `val`, or `test` mode.
For the train and val mode, the data loader will take data
from the train or val set, and sample one clip per video.
For the test mode, the data loader will take data from test set,
and sample multiple clips per video.
num_retries (int): number of retries.
"""
# Only support train, val, and test mode.
assert mode in [
"train",
"val",
"test",
], "Split '{}' not supported for Kinetics".format(mode)
self.mode = mode
self.cfg = cfg
self._video_meta = {}
self._num_retries = num_retries
# For training or validation mode, one single clip is sampled from every
# video. For testing, NUM_ENSEMBLE_VIEWS clips are sampled from every
# video. For every clip, NUM_SPATIAL_CROPS is cropped spatially from
# the frames.
if self.mode in ["train", "val"]:
self._num_clips = 1
elif self.mode in ["test"]:
self._num_clips = (
cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS
)
logger.info("Constructing Kinetics {}...".format(mode))
self._construct_loader()
self.aug = False
self.rand_erase = False
self.use_temporal_gradient = False
self.temporal_gradient_rate = 0.0
if self.mode == "train" and self.cfg.AUG.ENABLE:
self.aug = True
if self.cfg.AUG.RE_PROB > 0:
self.rand_erase = True
def _construct_loader(self):
"""
Construct the video loader.
"""
path_to_file = os.path.join(
self.cfg.DATA.PATH_TO_DATA_DIR, "{}.csv".format(self.mode)
)
assert g_pathmgr.exists(path_to_file), "{} dir not found".format(
path_to_file
)
self._path_to_videos = []
self._labels = []
self._spatial_temporal_idx = []
with g_pathmgr.open(path_to_file, "r") as f:
for clip_idx, path_label in enumerate(f.read().splitlines()):
assert (
len(path_label.split(self.cfg.DATA.PATH_LABEL_SEPARATOR))
== 2
)
path, label = path_label.split(
self.cfg.DATA.PATH_LABEL_SEPARATOR
)
for idx in range(self._num_clips):
self._path_to_videos.append(
os.path.join(self.cfg.DATA.PATH_PREFIX, path)
)
self._labels.append(int(label))
self._spatial_temporal_idx.append(idx)
self._video_meta[clip_idx * self._num_clips + idx] = {}
assert (
len(self._path_to_videos) > 0
), "Failed to load Kinetics split {} from {}".format(
self._split_idx, path_to_file
)
logger.info(
"Constructing kinetics dataloader (size: {}) from {}".format(
len(self._path_to_videos), path_to_file
)
)
def __getitem__(self, index):
"""
Given the video index, return the list of frames, label, and video
index if the video can be fetched and decoded successfully, otherwise
repeatly find a random video that can be decoded as a replacement.
Args:
index (int): the video index provided by the pytorch sampler.
Returns:
frames (tensor): the frames of sampled from the video. The dimension
is `channel` x `num frames` x `height` x `width`.
label (int): the label of the current video.
index (int): if the video provided by pytorch sampler can be
decoded, then return the index of the video. If not, return the
index of the video replacement that can be decoded.
"""
short_cycle_idx = None
# When short cycle is used, input index is a tupple.
if isinstance(index, tuple):
index, short_cycle_idx = index
if self.mode in ["train", "val"]:
# -1 indicates random sampling.
temporal_sample_index = -1
spatial_sample_index = -1
min_scale = self.cfg.DATA.TRAIN_JITTER_SCALES[0]
max_scale = self.cfg.DATA.TRAIN_JITTER_SCALES[1]
crop_size = self.cfg.DATA.TRAIN_CROP_SIZE
if short_cycle_idx in [0, 1]:
crop_size = int(
round(
self.cfg.MULTIGRID.SHORT_CYCLE_FACTORS[short_cycle_idx]
* self.cfg.MULTIGRID.DEFAULT_S
)
)
if self.cfg.MULTIGRID.DEFAULT_S > 0:
# Decreasing the scale is equivalent to using a larger "span"
# in a sampling grid.
min_scale = int(
round(
float(min_scale)
* crop_size
/ self.cfg.MULTIGRID.DEFAULT_S
)
)
elif self.mode in ["test"]:
temporal_sample_index = (
self._spatial_temporal_idx[index]
// self.cfg.TEST.NUM_SPATIAL_CROPS
)
# spatial_sample_index is in [0, 1, 2]. Corresponding to left,
# center, or right if width is larger than height, and top, middle,
# or bottom if height is larger than width.
spatial_sample_index = (
(
self._spatial_temporal_idx[index]
% self.cfg.TEST.NUM_SPATIAL_CROPS
)
if self.cfg.TEST.NUM_SPATIAL_CROPS > 1
else 1
)
min_scale, max_scale, crop_size = (
[self.cfg.DATA.TEST_CROP_SIZE] * 3
if self.cfg.TEST.NUM_SPATIAL_CROPS > 1
else [self.cfg.DATA.TRAIN_JITTER_SCALES[0]] * 2
+ [self.cfg.DATA.TEST_CROP_SIZE]
)
# The testing is deterministic and no jitter should be performed.
# min_scale, max_scale, and crop_size are expect to be the same.
assert len({min_scale, max_scale}) == 1
else:
raise NotImplementedError(
"Does not support {} mode".format(self.mode)
)
sampling_rate = utils.get_random_sampling_rate(
self.cfg.MULTIGRID.LONG_CYCLE_SAMPLING_RATE,
self.cfg.DATA.SAMPLING_RATE,
)
# Try to decode and sample a clip from a video. If the video can not be
# decoded, repeatly find a random video replacement that can be decoded.
for i_try in range(self._num_retries):
video_container = None
try:
video_container = container.get_video_container(
self._path_to_videos[index],
self.cfg.DATA_LOADER.ENABLE_MULTI_THREAD_DECODE,
self.cfg.DATA.DECODING_BACKEND,
)
except Exception as e:
logger.info(
"Failed to load video from {} with error {}".format(
self._path_to_videos[index], e
)
)
# Select a random video if the current video was not able to access.
if video_container is None:
logger.warning(
"Failed to meta load video idx {} from {}; trial {}".format(
index, self._path_to_videos[index], i_try
)
)
if self.mode not in ["test"] and i_try > self._num_retries // 2:
# let's try another one
index = random.randint(0, len(self._path_to_videos) - 1)
continue
# Decode video. Meta info is used to perform selective decoding.
frames = decoder.decode(
video_container,
sampling_rate,
self.cfg.DATA.NUM_FRAMES,
temporal_sample_index,
self.cfg.TEST.NUM_ENSEMBLE_VIEWS,
video_meta=self._video_meta[index],
target_fps=self.cfg.DATA.TARGET_FPS,
backend=self.cfg.DATA.DECODING_BACKEND,
max_spatial_scale=min_scale,
use_offset=self.cfg.DATA.USE_OFFSET_SAMPLING,
)
# If decoding failed (wrong format, video is too short, and etc),
# select another video.
if frames is None:
logger.warning(
"Failed to decode video idx {} from {}; trial {}".format(
index, self._path_to_videos[index], i_try
)
)
if self.mode not in ["test"] and i_try > self._num_retries // 2:
# let's try another one
index = random.randint(0, len(self._path_to_videos) - 1)
continue
if self.aug:
if self.cfg.AUG.NUM_SAMPLE > 1:
frame_list = []
label_list = []
index_list = []
for _ in range(self.cfg.AUG.NUM_SAMPLE):
new_frames = self._aug_frame(
frames,
spatial_sample_index,
min_scale,
max_scale,
crop_size,
)
label = self._labels[index]
new_frames = utils.pack_pathway_output(
self.cfg, new_frames
)
frame_list.append(new_frames)
label_list.append(label)
index_list.append(index)
return frame_list, label_list, index_list, {}
else:
frames = self._aug_frame(
frames,
spatial_sample_index,
min_scale,
max_scale,
crop_size,
)
else:
frames = utils.tensor_normalize(
frames, self.cfg.DATA.MEAN, self.cfg.DATA.STD
)
# T H W C -> C T H W.
frames = frames.permute(3, 0, 1, 2)
# Perform data augmentation.
frames = utils.spatial_sampling(
frames,
spatial_idx=spatial_sample_index,
min_scale=min_scale,
max_scale=max_scale,
crop_size=crop_size,
random_horizontal_flip=self.cfg.DATA.RANDOM_FLIP,
inverse_uniform_sampling=self.cfg.DATA.INV_UNIFORM_SAMPLE,
)
label = self._labels[index]
frames = utils.pack_pathway_output(self.cfg, frames)
return frames, label, index, {}
else:
raise RuntimeError(
"Failed to fetch video after {} retries.".format(
self._num_retries
)
)
def _aug_frame(
self,
frames,
spatial_sample_index,
min_scale,
max_scale,
crop_size,
):
aug_transform = create_random_augment(
input_size=(frames.size(1), frames.size(2)),
auto_augment=self.cfg.AUG.AA_TYPE,
interpolation=self.cfg.AUG.INTERPOLATION,
)
# T H W C -> T C H W.
frames = frames.permute(0, 3, 1, 2)
list_img = self._frame_to_list_img(frames)
list_img = aug_transform(list_img)
frames = self._list_img_to_frames(list_img)
frames = frames.permute(0, 2, 3, 1)
frames = utils.tensor_normalize(
frames, self.cfg.DATA.MEAN, self.cfg.DATA.STD
)
# T H W C -> C T H W.
frames = frames.permute(3, 0, 1, 2)
# Perform data augmentation.
scl, asp = (
self.cfg.DATA.TRAIN_JITTER_SCALES_RELATIVE,
self.cfg.DATA.TRAIN_JITTER_ASPECT_RELATIVE,
)
relative_scales = (
None if (self.mode not in ["train"] or len(scl) == 0) else scl
)
relative_aspect = (
None if (self.mode not in ["train"] or len(asp) == 0) else asp
)
frames = utils.spatial_sampling(
frames,
spatial_idx=spatial_sample_index,
min_scale=min_scale,
max_scale=max_scale,
crop_size=crop_size,
random_horizontal_flip=self.cfg.DATA.RANDOM_FLIP,
inverse_uniform_sampling=self.cfg.DATA.INV_UNIFORM_SAMPLE,
aspect_ratio=relative_aspect,
scale=relative_scales,
motion_shift=self.cfg.DATA.TRAIN_JITTER_MOTION_SHIFT
if self.mode in ["train"]
else False,
)
if self.rand_erase:
erase_transform = RandomErasing(
self.cfg.AUG.RE_PROB,
mode=self.cfg.AUG.RE_MODE,
max_count=self.cfg.AUG.RE_COUNT,
num_splits=self.cfg.AUG.RE_COUNT,
device="cpu",
)
frames = frames.permute(1, 0, 2, 3)
frames = erase_transform(frames)
frames = frames.permute(1, 0, 2, 3)
return frames
def _frame_to_list_img(self, frames):
img_list = [
transforms.ToPILImage()(frames[i]) for i in range(frames.size(0))
]
return img_list
def _list_img_to_frames(self, img_list):
img_list = [transforms.ToTensor()(img) for img in img_list]
return torch.stack(img_list)
def __len__(self):
"""
Returns:
(int): the number of videos in the dataset.
"""
return self.num_videos
@property
def num_videos(self):
"""
Returns:
(int): the number of videos in the dataset.
"""
return len(self._path_to_videos)
================================================
FILE: build/lib/slowfast/datasets/loader.py
================================================
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
"""Data loader."""
import itertools
import numpy as np
from functools import partial
import torch
from torch.utils.data._utils.collate import default_collate
from torch.utils.data.distributed import DistributedSampler
from torch.utils.data.sampler import RandomSampler
from slowfast.datasets.multigrid_helper import ShortCycleBatchSampler
from . import utils as utils
from .build import build_dataset
def multiple_samples_collate(batch, fold=False):
"""
Collate function for repeated augmentation. Each instance in the batch has
more than one sample.
Args:
batch (tuple or list): data batch to collate.
Returns:
(tuple): collated data batch.
"""
inputs, labels, video_idx, extra_data = zip(*batch)
# print(inputs, flush=True)
# print(labels, flush=True)
# print(video_idx, flush=True)
inputs = [item for sublist in inputs for item in sublist]
labels = [item for sublist in labels for item in sublist]
video_idx = [item for sublist in video_idx for item in sublist]
inputs, labels, video_idx, extra_data = (
default_collate(inputs),
default_collate(labels),
default_collate(video_idx),
default_collate(extra_data),
)
if fold:
return [inputs], labels, video_idx, extra_data
else:
return inputs, labels, video_idx, extra_data
def detection_collate(batch):
"""
Collate function for detection task. Concatanate bboxes, labels and
metadata from different samples in the first dimension instead of
stacking them to have a batch-size dimension.
Args:
batch (tuple or list): data batch to collate.
Returns:
(tuple): collated detection data batch.
"""
inputs, labels, video_idx, extra_data = zip(*batch)
inputs, video_idx = default_collate(inputs), default_collate(video_idx)
labels = torch.tensor(np.concatenate(labels, axis=0)).float()
collated_extra_data = {}
for key in extra_data[0].keys():
data = [d[key] for d in extra_data]
if key == "boxes" or key == "ori_boxes":
# Append idx info to the bboxes before concatenating them.
bboxes = [
np.concatenate(
[np.full((data[i].shape[0], 1), float(i)), data[i]], axis=1
)
for i in range(len(data))
]
bboxes = np.concatenate(bboxes, axis=0)
collated_extra_data[key] = torch.tensor(bboxes).float()
elif key == "metadata":
collated_extra_data[key] = torch.tensor(
list(itertools.chain(*data))
).view(-1, 2)
else:
collated_extra_data[key] = default_collate(data)
return inputs, labels, video_idx, collated_extra_data
def construct_loader(cfg, split, is_precise_bn=False):
"""
Constructs the data loader for the given dataset.
Args:
cfg (CfgNode): configs. Details can be found in
slowfast/config/defaults.py
split (str): the split of the data loader. Options include `train`,
`val`, and `test`.
"""
assert split in ["train", "val", "test"]
if split in ["train"]:
dataset_name = cfg.TRAIN.DATASET
batch_size = int(cfg.TRAIN.BATCH_SIZE / max(1, cfg.NUM_GPUS))
shuffle = True
drop_last = True
elif split in ["val"]:
dataset_name = cfg.TRAIN.DATASET
batch_size = int(cfg.TRAIN.BATCH_SIZE / max(1, cfg.NUM_GPUS))
shuffle = False
drop_last = False
elif split in ["test"]:
dataset_name = cfg.TEST.DATASET
batch_size = int(cfg.TEST.BATCH_SIZE / max(1, cfg.NUM_GPUS))
shuffle = False
drop_last = False
# Construct the dataset
dataset = build_dataset(dataset_name, cfg, split)
if isinstance(dataset, torch.utils.data.IterableDataset):
loader = torch.utils.data.DataLoader(
dataset,
batch_size=batch_size,
num_workers=cfg.DATA_LOADER.NUM_WORKERS,
pin_memory=cfg.DATA_LOADER.PIN_MEMORY,
drop_last=drop_last,
collate_fn=detection_collate if cfg.DETECTION.ENABLE else None,
worker_init_fn=utils.loader_worker_init_fn(dataset),
persistent_workers=True
)
else:
if (
cfg.MULTIGRID.SHORT_CYCLE
and split in ["train"]
and not is_precise_bn
):
# Create a sampler for multi-process training
sampler = utils.create_sampler(dataset, shuffle, cfg)
batch_sampler = ShortCycleBatchSampler(
sampler, batch_size=batch_size, drop_last=drop_last, cfg=cfg
)
# Create a loader
loader = torch.utils.data.DataLoader(
dataset,
batch_sampler=batch_sampler,
num_workers=cfg.DATA_LOADER.NUM_WORKERS,
pin_memory=cfg.DATA_LOADER.PIN_MEMORY,
worker_init_fn=utils.loader_worker_init_fn(dataset),
persistent_workers=True
)
else:
# Create a sampler for multi-process training
sampler = utils.create_sampler(dataset, shuffle, cfg)
# Create a loader
if cfg.DETECTION.ENABLE:
collate_func = detection_collate
elif cfg.AUG.NUM_SAMPLE > 1 and split in ["train"]:
collate_func = partial(
multiple_samples_collate, fold="imagenet" in dataset_name
)
else:
collate_func = None
loader = torch.utils.data.DataLoader(
dataset,
batch_size=batch_size,
shuffle=(False if sampler else shuffle),
sampler=sampler,
num_workers=cfg.DATA_LOADER.NUM_WORKERS,
pin_memory=cfg.DATA_LOADER.PIN_MEMORY,
drop_last=drop_last,
collate_fn=collate_func,
worker_init_fn=utils.loader_worker_init_fn(dataset),
persistent_workers=True
)
return loader
def shuffle_dataset(loader, cur_epoch):
""" "
Shuffles the data.
Args:
loader (loader): data loader to perform shuffle.
cur_epoch (int): number of the current epoch.
"""
if (
loader._dataset_kind
== torch.utils.data.dataloader._DatasetKind.Iterable
):
if hasattr(loader.dataset, "sampler"):
sampler = loader.dataset.sampler
else:
raise RuntimeError(
"Unknown sampler for IterableDataset when shuffling dataset"
)
else:
sampler = (
loader.batch_sampler.sampler
if isinstance(loader.batch_sampler, ShortCycleBatchSampler)
else loader.sampler
)
assert isinstance(
sampler, (RandomSampler, DistributedSampler)
), "Sampler type '{}' not supported".format(type(sampler))
# RandomSampler handles shuffling automatically
if isinstance(sampler, DistributedSampler):
# DistributedSampler shuffles data based on epoch
sampler.set_epoch(cur_epoch)
================================================
FILE: build/lib/slowfast/datasets/mixup.py
================================================
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
"""
This implementation is based on
https://github.com/rwightman/pytorch-image-models/blob/master/timm/data/mixup.py,
published under an Apache License 2.0.
COMMENT FROM ORIGINAL:
Mixup and Cutmix
Papers:
mixup: Beyond Empirical Risk Minimization (https://arxiv.org/abs/1710.09412)
CutMix: Regularization Strategy to Train Strong Classifiers with Localizable Features (https://arxiv.org/abs/1905.04899) # NOQA
Code Reference:
CutMix: https://github.com/clovaai/CutMix-PyTorch
Hacked together by / Copyright 2020 Ross Wightman
"""
import numpy as np
import torch
def convert_to_one_hot(targets, num_classes, on_value=1.0, off_value=0.0):
"""
This function converts target class indices to one-hot vectors, given the
number of classes.
Args:
targets (loader): Class labels.
num_classes (int): Total number of classes.
on_value (float): Target Value for ground truth class.
off_value (float): Target Value for other classes.This value is used for
label smoothing.
"""
targets = targets.long().view(-1, 1)
return torch.full(
(targets.size()[0], num_classes), off_value, device=targets.device
).scatter_(1, targets, on_value)
def mixup_target(target, num_classes, lam=1.0, smoothing=0.0):
"""
This function converts target class indices to one-hot vectors, given the
number of classes.
Args:
targets (loader): Class labels.
num_classes (int): Total number of classes.
lam (float): lamba value for mixup/cutmix.
smoothing (float): Label smoothing value.
"""
off_value = smoothing / num_classes
on_value = 1.0 - smoothing + off_value
target1 = convert_to_one_hot(
target,
num_classes,
on_value=on_value,
off_value=off_value,
)
target2 = convert_to_one_hot(
target.flip(0),
num_classes,
on_value=on_value,
off_value=off_value,
)
return target1 * lam + target2 * (1.0 - lam)
def rand_bbox(img_shape, lam, margin=0.0, count=None):
"""
Generates a random square bbox based on lambda value.
Args:
img_shape (tuple): Image shape as tuple
lam (float): Cutmix lambda value
margin (float): Percentage of bbox dimension to enforce as margin (reduce amount of box outside image)
count (int): Number of bbox to generate
"""
ratio = np.sqrt(1 - lam)
img_h, img_w = img_shape[-2:]
cut_h, cut_w = int(img_h * ratio), int(img_w * ratio)
margin_y, margin_x = int(margin * cut_h), int(margin * cut_w)
cy = np.random.randint(0 + margin_y, img_h - margin_y, size=count)
cx = np.random.randint(0 + margin_x, img_w - margin_x, size=count)
yl = np.clip(cy - cut_h // 2, 0, img_h)
yh = np.clip(cy + cut_h // 2, 0, img_h)
xl = np.clip(cx - cut_w // 2, 0, img_w)
xh = np.clip(cx + cut_w // 2, 0, img_w)
return yl, yh, xl, xh
def get_cutmix_bbox(img_shape, lam, correct_lam=True, count=None):
"""
Generates the box coordinates for cutmix.
Args:
img_shape (tuple): Image shape as tuple
lam (float): Cutmix lambda value
correct_lam (bool): Apply lambda correction when cutmix bbox clipped by
image borders.
count (int): Number of bbox to generate
"""
yl, yu, xl, xu = rand_bbox(img_shape, lam, count=count)
if correct_lam:
bbox_area = (yu - yl) * (xu - xl)
lam = 1.0 - bbox_area / float(img_shape[-2] * img_shape[-1])
return (yl, yu, xl, xu), lam
class MixUp:
"""
Apply mixup and/or cutmix for videos at batch level.
mixup: Beyond Empirical Risk Minimization (https://arxiv.org/abs/1710.09412)
CutMix: Regularization Strategy to Train Strong Classifiers with Localizable
Features (https://arxiv.org/abs/1905.04899)
"""
def __init__(
self,
mixup_alpha=1.0,
cutmix_alpha=0.0,
mix_prob=1.0,
switch_prob=0.5,
correct_lam=True,
label_smoothing=0.1,
num_classes=1000,
):
"""
Args:
mixup_alpha (float): Mixup alpha value.
cutmix_alpha (float): Cutmix alpha value.
mix_prob (float): Probability of applying mixup or cutmix.
switch_prob (float): Probability of switching to cutmix instead of
mixup when both are active.
correct_lam (bool): Apply lambda correction when cutmix bbox
clipped by image borders.
label_smoothing (float): Apply label smoothing to the mixed target
tensor. If label_smoothing is not used, set it to 0.
num_classes (int): Number of classes for target.
"""
self.mixup_alpha = mixup_alpha
self.cutmix_alpha = cutmix_alpha
self.mix_prob = mix_prob
self.switch_prob = switch_prob
self.label_smoothing = label_smoothing
self.num_classes = num_classes
self.correct_lam = correct_lam
def _get_mixup_params(self):
lam = 1.0
use_cutmix = False
if np.random.rand() < self.mix_prob:
if self.mixup_alpha > 0.0 and self.cutmix_alpha > 0.0:
use_cutmix = np.random.rand() < self.switch_prob
lam_mix = (
np.random.beta(self.cutmix_alpha, self.cutmix_alpha)
if use_cutmix
else np.random.beta(self.mixup_alpha, self.mixup_alpha)
)
elif self.mixup_alpha > 0.0:
lam_mix = np.random.beta(self.mixup_alpha, self.mixup_alpha)
elif self.cutmix_alpha > 0.0:
use_cutmix = True
lam_mix = np.random.beta(self.cutmix_alpha, self.cutmix_alpha)
lam = float(lam_mix)
return lam, use_cutmix
def _mix_batch(self, x):
lam, use_cutmix = self._get_mixup_params()
if lam == 1.0:
return 1.0
if use_cutmix:
(yl, yh, xl, xh), lam = get_cutmix_bbox(
x.shape,
lam,
correct_lam=self.correct_lam,
)
x[..., yl:yh, xl:xh] = x.flip(0)[..., yl:yh, xl:xh]
else:
x_flipped = x.flip(0).mul_(1.0 - lam)
x.mul_(lam).add_(x_flipped)
return lam
def __call__(self, x, target):
assert len(x) > 1, "Batch size should be greater than 1 for mixup."
lam = self._mix_batch(x)
target = mixup_target(
target, self.num_classes, lam, self.label_smoothing
)
return x, target
================================================
FILE: build/lib/slowfast/datasets/multigrid_helper.py
================================================
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
"""Helper functions for multigrid training."""
import numpy as np
import torch
from torch.utils.data.sampler import Sampler
TORCH_MAJOR = int(torch.__version__.split(".")[0])
TORCH_MINOR = int(torch.__version__.split(".")[1])
if TORCH_MAJOR >= 1 and TORCH_MINOR >= 8:
_int_classes = int
else:
from torch._six import int_classes as _int_classes
class ShortCycleBatchSampler(Sampler):
"""
Extend Sampler to support "short cycle" sampling.
See paper "A Multigrid Method for Efficiently Training Video Models",
Wu et al., 2019 (https://arxiv.org/abs/1912.00998) for details.
"""
def __init__(self, sampler, batch_size, drop_last, cfg):
if not isinstance(sampler, Sampler):
raise ValueError(
"sampler should be an instance of "
"torch.utils.data.Sampler, but got sampler={}".format(sampler)
)
if (
not isinstance(batch_size, _int_classes)
or isinstance(batch_size, bool)
or batch_size <= 0
):
raise ValueError(
"batch_size should be a positive integer value, "
"but got batch_size={}".format(batch_size)
)
if not isinstance(drop_last, bool):
raise ValueError(
"drop_last should be a boolean value, but got "
"drop_last={}".format(drop_last)
)
self.sampler = sampler
self.drop_last = drop_last
bs_factor = [
int(
round(
(
float(cfg.DATA.TRAIN_CROP_SIZE)
/ (s * cfg.MULTIGRID.DEFAULT_S)
)
** 2
)
)
for s in cfg.MULTIGRID.SHORT_CYCLE_FACTORS
]
self.batch_sizes = [
batch_size * bs_factor[0],
batch_size * bs_factor[1],
batch_size,
]
def __iter__(self):
counter = 0
batch_size = self.batch_sizes[0]
batch = []
for idx in self.sampler:
batch.append((idx, counter % 3))
if len(batch) == batch_size:
yield batch
counter += 1
batch_size = self.batch_sizes[counter % 3]
batch = []
if len(batch) > 0 and not self.drop_last:
yield batch
def __len__(self):
avg_batch_size = sum(self.batch_sizes) / 3.0
if self.drop_last:
return int(np.floor(len(self.sampler) / avg_batch_size))
else:
return int(np.ceil(len(self.sampler) / avg_batch_size))
================================================
FILE: build/lib/slowfast/datasets/ptv_datasets.py
================================================
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
import functools
import os
from typing import Dict
import torch
from torch.utils.data import (
DistributedSampler,
RandomSampler,
SequentialSampler,
)
from torchvision.transforms import Compose, Lambda
from torchvision.transforms._transforms_video import (
NormalizeVideo,
RandomCropVideo,
RandomHorizontalFlipVideo,
)
import slowfast.utils.logging as logging
from pytorchvideo.data import (
Charades,
LabeledVideoDataset,
SSv2,
make_clip_sampler,
)
from pytorchvideo.data.labeled_video_paths import LabeledVideoPaths
from pytorchvideo.transforms import (
ApplyTransformToKey,
RandomShortSideScale,
ShortSideScale,
UniformCropVideo,
UniformTemporalSubsample,
)
from . import utils as utils
from .build import DATASET_REGISTRY
logger = logging.get_logger(__name__)
class PTVDatasetWrapper(torch.utils.data.IterableDataset):
"""
Wrapper for PyTorchVideo datasets.
"""
def __init__(self, num_videos, clips_per_video, crops_per_clip, dataset):
"""
Construct the dataset.
Args:
num_vidoes (int): number of videos in the dataset.
clips_per_video (int): number of clips per video in the dataset.
dataset (torch.utils.data.IterableDataset): a PyTorchVideo dataset.
"""
self._clips_per_video = clips_per_video
self._crops_per_clip = crops_per_clip
self._num_videos = num_videos
self.dataset = dataset
def __next__(self):
"""
Retrieves the next clip from the dataset.
"""
return self.dataset.__next__()
@property
def sampler(self):
"""
Returns:
(torch.utils.data.Sampler): video sampler for the dataset.
"""
return self.dataset.video_sampler
def __len__(self):
"""
Returns:
(int): the number of clips per replica in the IterableDataset.
"""
return len(self.sampler) * self._clips_per_video * self._crops_per_clip
@property
def num_videos(self):
"""
Returns:
(int): the number of clips in total in the dataset.
"""
return self._num_videos * self._clips_per_video * self._crops_per_clip
def __iter__(self):
return self
class PackPathway(torch.nn.Module):
"""
Transform for converting video frames as a list of tensors. Each tensor
corresponding to a unique pathway.
"""
def __init__(self, cfg):
super().__init__()
self.cfg = cfg
def forward(self, x: torch.Tensor):
return utils.pack_pathway_output(self.cfg, x)
class DictToTuple(torch.nn.Module):
"""
Transform for converting output from dict to a tuple following PySlowFast
dataset output format.
"""
def __init__(self, num_clips, num_crops):
super().__init__()
self._num_clips = num_clips
self._num_crops = num_crops
def forward(self, x: Dict[str, torch.Tensor]):
index = (
x["video_index"] * self._num_clips * self._num_crops
+ x["clip_index"] * self._num_crops
+ x["aug_index"]
)
return x["video"], x["label"], index, {}
def div255(x):
"""
Scale clip frames from [0, 255] to [0, 1].
Args:
x (Tensor): A tensor of the clip's RGB frames with shape:
(channel, time, height, width).
Returns:
x (Tensor): Scaled tensor by divide 255.
"""
return x / 255.0
@DATASET_REGISTRY.register()
def Ptvkinetics(cfg, mode):
"""
Construct the Kinetics video loader with a given csv file. The format of
the csv file is:
```
path_to_video_1 label_1
path_to_video_2 label_2
...
path_to_video_N label_N
```
For `train` and `val` mode, a single clip is randomly sampled from every video
with random cropping, scaling, and flipping. For `test` mode, multiple clips are
uniformaly sampled from every video with center cropping.
Args:
cfg (CfgNode): configs.
mode (string): Options includes `train`, `val`, or `test` mode.
For the train and val mode, the data loader will take data
from the train or val set, and sample one clip per video.
For the test mode, the data loader will take data from test set,
and sample multiple clips per video.
"""
# Only support train, val, and test mode.
assert mode in [
"train",
"val",
"test",
], "Split '{}' not supported".format(mode)
logger.info("Constructing Ptvkinetics {}...".format(mode))
clip_duration = (
cfg.DATA.NUM_FRAMES * cfg.DATA.SAMPLING_RATE / cfg.DATA.TARGET_FPS
)
path_to_file = os.path.join(
cfg.DATA.PATH_TO_DATA_DIR, "{}.csv".format(mode)
)
labeled_video_paths = LabeledVideoPaths.from_path(path_to_file)
num_videos = len(labeled_video_paths)
labeled_video_paths.path_prefix = cfg.DATA.PATH_PREFIX
logger.info(
"Constructing kinetics dataloader (size: {}) from {}".format(
num_videos, path_to_file
)
)
if mode in ["train", "val"]:
num_clips = 1
num_crops = 1
transform = Compose(
[
ApplyTransformToKey(
key="video",
transform=Compose(
[
UniformTemporalSubsample(cfg.DATA.NUM_FRAMES),
Lambda(div255),
NormalizeVideo(cfg.DATA.MEAN, cfg.DATA.STD),
RandomShortSideScale(
min_size=cfg.DATA.TRAIN_JITTER_SCALES[0],
max_size=cfg.DATA.TRAIN_JITTER_SCALES[1],
),
RandomCropVideo(cfg.DATA.TRAIN_CROP_SIZE),
]
+ (
[RandomHorizontalFlipVideo(p=0.5)]
if cfg.DATA.RANDOM_FLIP
else []
)
+ [PackPathway(cfg)]
),
),
DictToTuple(num_clips, num_crops),
]
)
clip_sampler = make_clip_sampler("random", clip_duration)
if cfg.NUM_GPUS > 1:
video_sampler = DistributedSampler
else:
video_sampler = (
RandomSampler if mode == "train" else SequentialSampler
)
else:
num_clips = cfg.TEST.NUM_ENSEMBLE_VIEWS
num_crops = cfg.TEST.NUM_SPATIAL_CROPS
transform = Compose(
[
ApplyTransformToKey(
key="video",
transform=Compose(
[
UniformTemporalSubsample(cfg.DATA.NUM_FRAMES),
Lambda(div255),
NormalizeVideo(cfg.DATA.MEAN, cfg.DATA.STD),
ShortSideScale(
size=cfg.DATA.TRAIN_JITTER_SCALES[0]
),
]
),
),
UniformCropVideo(size=cfg.DATA.TEST_CROP_SIZE),
ApplyTransformToKey(key="video", transform=PackPathway(cfg)),
DictToTuple(num_clips, num_crops),
]
)
clip_sampler = make_clip_sampler(
"constant_clips_per_video",
clip_duration,
num_clips,
num_crops,
)
video_sampler = (
DistributedSampler if cfg.NUM_GPUS > 1 else SequentialSampler
)
return PTVDatasetWrapper(
num_videos=num_videos,
clips_per_video=num_clips,
crops_per_clip=num_crops,
dataset=LabeledVideoDataset(
labeled_video_paths=labeled_video_paths,
clip_sampler=clip_sampler,
video_sampler=video_sampler,
transform=transform,
decode_audio=False,
decoder=cfg.DATA.DECODING_BACKEND,
),
)
def process_charades_label(x, mode, num_classes):
"""
Process the video label for Charades dataset. Use video-level label for
training mode, otherwise use clip-level label. Then convert the label into
a binary vector.
Args:
x (dict): a video clip including label index.
mode (string): Options includes `train`, `val`, or `test` mode.
num_classes (int): Number of classes in the dataset.
Returns:
x (dict): video clip with updated label information.
"""
label = (
utils.aggregate_labels(x["label"])
if mode == "train"
else x["video_label"]
)
x["label"] = torch.as_tensor(utils.as_binary_vector(label, num_classes))
return x
def rgb2bgr(x):
"""
Convert clip frames from RGB mode to BRG mode.
Args:
x (Tensor): A tensor of the clip's RGB frames with shape:
(channel, time, height, width).
Returns:
x (Tensor): Converted tensor
"""
return x[[2, 1, 0], ...]
@DATASET_REGISTRY.register()
def Ptvcharades(cfg, mode):
"""
Construct PyTorchVideo Charades video loader.
Load Charades data (frame paths, labels, etc. ) to Charades Dataset object.
The dataset could be downloaded from Chrades official website
(https://allenai.org/plato/charades/).
Please see datasets/DATASET.md for more information about the data format.
For `train` and `val` mode, a single clip is randomly sampled from every video
with random cropping, scaling, and flipping. For `test` mode, multiple clips are
uniformaly sampled from every video with center cropping.
Args:
cfg (CfgNode): configs.
mode (string): Options includes `train`, `val`, or `test` mode.
For the train and val mode, the data loader will take data
from the train or val set, and sample one clip per video.
For the test mode, the data loader will take data from test set,
and sample multiple clips per video.
"""
# Only support train, val, and test mode.
assert mode in [
"train",
"val",
"test",
], "Split '{}' not supported".format(mode)
logger.info("Constructing Ptvcharades {}...".format(mode))
clip_duration = (
(cfg.DATA.NUM_FRAMES - 1) * cfg.DATA.SAMPLING_RATE + 1
) / cfg.DATA.TARGET_FPS
if mode in ["train", "val"]:
num_clips = 1
num_crops = 1
transform = Compose(
[
ApplyTransformToKey(
key="video",
transform=Compose(
[
Lambda(div255),
NormalizeVideo(cfg.DATA.MEAN, cfg.DATA.STD),
RandomShortSideScale(
min_size=cfg.DATA.TRAIN_JITTER_SCALES[0],
max_size=cfg.DATA.TRAIN_JITTER_SCALES[1],
),
RandomCropVideo(cfg.DATA.TRAIN_CROP_SIZE),
Lambda(rgb2bgr),
]
+ (
[RandomHorizontalFlipVideo(p=0.5)]
if cfg.DATA.RANDOM_FLIP
else []
)
+ [PackPathway(cfg)]
),
),
Lambda(
functools.partial(
process_charades_label,
mode=mode,
num_classes=cfg.MODEL.NUM_CLASSES,
)
),
DictToTuple(num_clips, num_crops),
]
)
clip_sampler = make_clip_sampler("random", clip_duration)
if cfg.NUM_GPUS > 1:
video_sampler = DistributedSampler
else:
video_sampler = (
RandomSampler if mode == "train" else SequentialSampler
)
else:
num_clips = cfg.TEST.NUM_ENSEMBLE_VIEWS
num_crops = cfg.TEST.NUM_SPATIAL_CROPS
transform = Compose(
[
ApplyTransformToKey(
key="video",
transform=Compose(
[
Lambda(div255),
NormalizeVideo(cfg.DATA.MEAN, cfg.DATA.STD),
ShortSideScale(size=cfg.DATA.TEST_CROP_SIZE),
]
),
),
UniformCropVideo(size=cfg.DATA.TEST_CROP_SIZE),
Lambda(
functools.partial(
process_charades_label,
mode=mode,
num_classes=cfg.MODEL.NUM_CLASSES,
)
),
ApplyTransformToKey(
key="video",
transform=Compose(
[Lambda(rgb2bgr), PackPathway(cfg)],
),
),
DictToTuple(num_clips, num_crops),
]
)
clip_sampler = make_clip_sampler(
"constant_clips_per_video",
clip_duration,
num_clips,
num_crops,
)
video_sampler = (
DistributedSampler if cfg.NUM_GPUS > 1 else SequentialSampler
)
data_path = os.path.join(cfg.DATA.PATH_TO_DATA_DIR, "{}.csv".format(mode))
dataset = Charades(
data_path=data_path,
clip_sampler=clip_sampler,
video_sampler=video_sampler,
transform=transform,
video_path_prefix=cfg.DATA.PATH_PREFIX,
frames_per_clip=cfg.DATA.NUM_FRAMES,
)
logger.info(
"Constructing charades dataloader (size: {}) from {}".format(
len(dataset._path_to_videos), data_path
)
)
return PTVDatasetWrapper(
num_videos=len(dataset._path_to_videos),
clips_per_video=num_clips,
crops_per_clip=num_crops,
dataset=dataset,
)
@DATASET_REGISTRY.register()
def Ptvssv2(cfg, mode):
"""
Construct PyTorchVideo Something-Something v2 SSv2 video loader.
Load SSv2 data (frame paths, labels, etc. ) to SSv2 Dataset object.
The dataset could be downloaded from Chrades official website
(https://20bn.com/datasets/something-something).
Please see datasets/DATASET.md for more information about the data format.
For training and validation, a single clip is randomly sampled from every
video with random cropping and scaling. For testing, multiple clips are
uniformaly sampled from every video with uniform cropping. For uniform cropping,
we take the left, center, and right crop if the width is larger than height,
or take top, center, and bottom crop if the height is larger than the width.
Args:
cfg (CfgNode): configs.
mode (string): Options includes `train`, `val`, or `test` mode.
"""
# Only support train, val, and test mode.
assert mode in [
"train",
"val",
"test",
], "Split '{}' not supported".format(mode)
logger.info("Constructing Ptvcharades {}...".format(mode))
if mode in ["train", "val"]:
num_clips = 1
num_crops = 1
transform = Compose(
[
ApplyTransformToKey(
key="video",
transform=Compose(
[
Lambda(div255),
NormalizeVideo(cfg.DATA.MEAN, cfg.DATA.STD),
RandomShortSideScale(
min_size=cfg.DATA.TRAIN_JITTER_SCALES[0],
max_size=cfg.DATA.TRAIN_JITTER_SCALES[1],
),
RandomCropVideo(cfg.DATA.TRAIN_CROP_SIZE),
Lambda(rgb2bgr),
]
+ (
[RandomHorizontalFlipVideo(p=0.5)]
if cfg.DATA.RANDOM_FLIP
else []
)
+ [PackPathway(cfg)]
),
),
DictToTuple(num_clips, num_crops),
]
)
clip_sampler = make_clip_sampler(
"constant_clips_per_video",
1, # Put arbitrary duration as ssv2 always needs full video clip.
num_clips,
num_crops,
)
if cfg.NUM_GPUS > 1:
video_sampler = DistributedSampler
else:
video_sampler = (
RandomSampler if mode == "train" else SequentialSampler
)
else:
assert cfg.TEST.NUM_ENSEMBLE_VIEWS == 1
num_clips = cfg.TEST.NUM_ENSEMBLE_VIEWS
num_crops = cfg.TEST.NUM_SPATIAL_CROPS
transform = Compose(
[
ApplyTransformToKey(
key="video",
transform=Compose(
[
Lambda(div255),
NormalizeVideo(cfg.DATA.MEAN, cfg.DATA.STD),
ShortSideScale(size=cfg.DATA.TEST_CROP_SIZE),
]
),
),
UniformCropVideo(size=cfg.DATA.TEST_CROP_SIZE),
ApplyTransformToKey(
key="video",
transform=Compose(
[Lambda(rgb2bgr), PackPathway(cfg)],
),
),
DictToTuple(num_clips, num_crops),
]
)
clip_sampler = make_clip_sampler(
"constant_clips_per_video",
1, # Put arbitrary duration as ssv2 always needs full video clip.
num_clips,
num_crops,
)
video_sampler = (
DistributedSampler if cfg.NUM_GPUS > 1 else SequentialSampler
)
label_name_file = os.path.join(
cfg.DATA.PATH_TO_DATA_DIR, "something-something-v2-labels.json"
)
video_label_file = os.path.join(
cfg.DATA.PATH_TO_DATA_DIR,
"something-something-v2-{}.json".format(
"train" if mode == "train" else "validation"
),
)
data_path = os.path.join(
gitextract_p8oeuhyn/
├── DATASET.md
├── INSTALL.md
├── LICENSE
├── README.md
├── VISUALIZATION_TOOLS.md
├── build/
│ └── lib/
│ └── slowfast/
│ ├── __init__.py
│ ├── config/
│ │ ├── __init__.py
│ │ ├── custom_config.py
│ │ └── defaults.py
│ ├── datasets/
│ │ ├── __init__.py
│ │ ├── ava_dataset.py
│ │ ├── ava_helper.py
│ │ ├── build.py
│ │ ├── charades.py
│ │ ├── cv2_transform.py
│ │ ├── decoder.py
│ │ ├── imagenet.py
│ │ ├── kinetics.py
│ │ ├── loader.py
│ │ ├── mixup.py
│ │ ├── multigrid_helper.py
│ │ ├── ptv_datasets.py
│ │ ├── rand_augment.py
│ │ ├── random_erasing.py
│ │ ├── ssv2.py
│ │ ├── sth.py
│ │ ├── transform.py
│ │ ├── utils.py
│ │ └── video_container.py
│ ├── models/
│ │ ├── __init__.py
│ │ ├── attention.py
│ │ ├── batchnorm_helper.py
│ │ ├── build.py
│ │ ├── common.py
│ │ ├── contrastive.py
│ │ ├── custom_video_model_builder.py
│ │ ├── head_helper.py
│ │ ├── losses.py
│ │ ├── morphmlp.py
│ │ ├── morphmlp_32.py
│ │ ├── nonlocal_helper.py
│ │ ├── operators.py
│ │ ├── optimizer.py
│ │ ├── ptv_model_builder.py
│ │ ├── resnet_helper.py
│ │ ├── stem_helper.py
│ │ ├── utils.py
│ │ └── video_model_builder.py
│ ├── site.py
│ ├── utils/
│ │ ├── __init__.py
│ │ ├── ava_eval_helper.py
│ │ ├── ava_evaluation/
│ │ │ ├── __init__.py
│ │ │ ├── label_map_util.py
│ │ │ ├── metrics.py
│ │ │ ├── np_box_list.py
│ │ │ ├── np_box_list_ops.py
│ │ │ ├── np_box_mask_list.py
│ │ │ ├── np_box_mask_list_ops.py
│ │ │ ├── np_box_ops.py
│ │ │ ├── np_mask_ops.py
│ │ │ ├── object_detection_evaluation.py
│ │ │ ├── per_image_evaluation.py
│ │ │ └── standard_fields.py
│ │ ├── benchmark.py
│ │ ├── bn_helper.py
│ │ ├── c2_model_loading.py
│ │ ├── checkpoint.py
│ │ ├── checkpoint_amp.py
│ │ ├── distributed.py
│ │ ├── env.py
│ │ ├── logging.py
│ │ ├── lr_policy.py
│ │ ├── meters.py
│ │ ├── metrics.py
│ │ ├── misc.py
│ │ ├── multigrid.py
│ │ ├── multiprocessing.py
│ │ ├── parser.py
│ │ └── weight_init_helper.py
│ └── visualization/
│ ├── __init__.py
│ ├── async_predictor.py
│ ├── ava_demo_precomputed_boxes.py
│ ├── demo_loader.py
│ ├── gradcam_utils.py
│ ├── prediction_vis.py
│ ├── predictor.py
│ ├── tensorboard_vis.py
│ ├── utils.py
│ └── video_visualizer.py
├── configs/
│ ├── AVA/
│ │ ├── SLOWFAST_32x2_R50_SHORT.yaml
│ │ ├── SLOW_8x8_R50_SHORT.yaml
│ │ └── c2/
│ │ ├── SLOWFAST_32x2_R101_50_50.yaml
│ │ ├── SLOWFAST_32x2_R101_50_50_v2.1.yaml
│ │ ├── SLOWFAST_32x2_R50.yaml
│ │ ├── SLOWFAST_64x2_R101_50_50.yaml
│ │ └── SLOW_8x8_R50.yaml
│ ├── Charades/
│ │ ├── SLOWFAST_16x8_R50.yaml
│ │ ├── SLOWFAST_16x8_R50_multigrid.yaml
│ │ └── pytorchvideo/
│ │ ├── SLOWFAST_8x8_R50.yaml
│ │ └── SLOW_8x8_R50.yaml
│ ├── ImageNet/
│ │ ├── MVIT_B_16_CONV.yaml
│ │ └── RES_R50.yaml
│ ├── K400/
│ │ ├── K400_MLP_B16x4.yaml
│ │ ├── K400_MLP_B32x4.yaml
│ │ ├── K400_MLP_S16x4.yaml
│ │ └── K400_MLP_S32x4.yaml
│ ├── Kinetics/
│ │ ├── C2D_8x8_R50.yaml
│ │ ├── C2D_8x8_R50_IN1K.yaml
│ │ ├── C2D_NLN_8x8_R50.yaml
│ │ ├── C2D_NLN_8x8_R50_IN1K.yaml
│ │ ├── I3D_8x8_R101.yaml
│ │ ├── I3D_8x8_R50.yaml
│ │ ├── I3D_8x8_R50_IN1K.yaml
│ │ ├── I3D_NLN_8x8_R101.yaml
│ │ ├── I3D_NLN_8x8_R50.yaml
│ │ ├── I3D_NLN_8x8_R50_IN1K.yaml
│ │ ├── MVIT_B_16x4_CONV.yaml
│ │ ├── MVIT_B_32x3_CONV.yaml
│ │ ├── MVITv2_B_32x3.yaml
│ │ ├── MVITv2_L_40x3_test.yaml
│ │ ├── MVITv2_S_16x4.yaml
│ │ ├── SLOWFAST_4x16_R50.yaml
│ │ ├── SLOWFAST_8x8_R50.yaml
│ │ ├── SLOWFAST_8x8_R50_stepwise.yaml
│ │ ├── SLOWFAST_8x8_R50_stepwise_multigrid.yaml
│ │ ├── SLOWFAST_NLN_4x16_R50.yaml
│ │ ├── SLOWFAST_NLN_8x8_R50.yaml
│ │ ├── SLOW_4x16_R50.yaml
│ │ ├── SLOW_8x8_R50.yaml
│ │ ├── SLOW_NLN_4x16_R50.yaml
│ │ ├── SLOW_NLN_8x8_R50.yaml
│ │ ├── X3D_L.yaml
│ │ ├── X3D_M.yaml
│ │ ├── X3D_S.yaml
│ │ ├── X3D_XS.yaml
│ │ ├── c2/
│ │ │ ├── C2D_NOPOOL_8x8_R50.yaml
│ │ │ ├── I3D_8x8_R50.yaml
│ │ │ ├── I3D_NLN_8x8_R50.yaml
│ │ │ ├── SLOWFAST_16x8_R101_50_50.yaml
│ │ │ ├── SLOWFAST_4x16_R50.yaml
│ │ │ ├── SLOWFAST_8x8_R101_101_101.yaml
│ │ │ ├── SLOWFAST_8x8_R101_50_101.yaml
│ │ │ ├── SLOWFAST_8x8_R101_50_50.yaml
│ │ │ ├── SLOWFAST_8x8_R50.yaml
│ │ │ ├── SLOWFAST_NLN_16x8_R101_50_50.yaml
│ │ │ ├── SLOW_4x16_R50.yaml
│ │ │ └── SLOW_8x8_R50.yaml
│ │ └── pytorchvideo/
│ │ ├── C2D_8x8_R50.yaml
│ │ ├── CSN_32x2_R101.yaml
│ │ ├── I3D_8x8_R50.yaml
│ │ ├── MVIT_B_16x4_CONV.yaml
│ │ ├── R2PLUS1D_16x4_R50.yaml
│ │ ├── SLOWFAST_16x8_R101_50_50.yaml
│ │ ├── SLOWFAST_4x16_R50.yaml
│ │ ├── SLOWFAST_8x8_R101.yaml
│ │ ├── SLOWFAST_8x8_R50.yaml
│ │ ├── SLOW_4x16_R50.yaml
│ │ ├── SLOW_8x8_R50.yaml
│ │ ├── X3D_L.yaml
│ │ ├── X3D_M.yaml
│ │ ├── X3D_S.yaml
│ │ └── X3D_XS.yaml
│ ├── SSV1/
│ │ ├── SSV1_MLP_B16.yaml
│ │ ├── SSV1_MLP_B32.yaml
│ │ ├── SSV1_MLP_S16.yaml
│ │ └── SSV1_MLP_S32.yaml
│ ├── SSV2/
│ │ ├── SSV2_MLP_B16.yaml
│ │ ├── SSV2_MLP_B32.yaml
│ │ ├── SSV2_MLP_S16.yaml
│ │ └── SSV2_MLP_S32.yaml
│ ├── SSv2/
│ │ ├── SLOWFAST_16x8_R50.yaml
│ │ ├── SLOWFAST_16x8_R50_multigrid.yaml
│ │ └── pytorchvideo/
│ │ ├── SLOWFAST_8x8_R50.yaml
│ │ └── SLOW_8x8_R50.yaml
│ └── ssl/
│ ├── BYOL_SlowR50_8x8.yaml
│ ├── MoCo_SlowR50_8x8.yaml
│ ├── SimCLR_SlowR50_8x8.yaml
│ ├── SwAV_Slow_R50_8x8.yaml
│ ├── finetune_SSv2_Slow_R50_syn0.yaml
│ ├── finetune_SSv2_Slow_R50_syn8.yaml
│ ├── finetune_ava_Slow_R50_syn0.yaml
│ ├── finetune_ava_Slow_R50_syn8.yaml
│ ├── finetune_ucf_Slow_R50_syn0.yaml
│ ├── finetune_ucf_Slow_R50_syn8.yaml
│ ├── linear_k400_Slow_8x8_R50_syn0.yaml
│ └── linear_k400_Slow_8x8_R50_syn8.yaml
├── data_list/
│ ├── k400/
│ │ ├── kinetics_400_categroies.txt
│ │ ├── test.csv
│ │ ├── train.csv
│ │ └── val.csv
│ ├── sthv1/
│ │ ├── category.txt
│ │ ├── somesomev1_rgb_train_split.txt
│ │ └── somesomev1_rgb_validation_split.txt
│ └── sthv2/
│ ├── category.txt
│ ├── somesomev2_rgb_test_split.txt
│ ├── somesomev2_rgb_train_split.txt
│ └── somesomev2_rgb_validation_split.txt
├── demo/
│ ├── AVA/
│ │ └── SLOWFAST_32x2_R101_50_50.yaml
│ └── Kinetics/
│ └── SLOWFAST_8x8_R50.yaml
├── linter.sh
├── mlp_images/
│ └── IMAGE.md
├── pretrained/
│ └── note.md
├── projects/
│ ├── multigrid/
│ │ └── README.md
│ ├── mvit/
│ │ └── README.md
│ ├── mvitv2/
│ │ └── README.md
│ ├── pytorchvideo/
│ │ └── README.md
│ ├── ssl/
│ │ └── README.md
│ └── x3d/
│ └── README.md
├── setup.cfg
├── setup.py
├── slowfast/
│ ├── __init__.py
│ ├── config/
│ │ ├── __init__.py
│ │ ├── custom_config.py
│ │ └── defaults.py
│ ├── datasets/
│ │ ├── DATASET.md
│ │ ├── __init__.py
│ │ ├── ava_dataset.py
│ │ ├── ava_helper.py
│ │ ├── build.py
│ │ ├── charades.py
│ │ ├── cv2_transform.py
│ │ ├── decoder.py
│ │ ├── imagenet.py
│ │ ├── kinetics.py
│ │ ├── loader.py
│ │ ├── mixup.py
│ │ ├── multigrid_helper.py
│ │ ├── ptv_datasets.py
│ │ ├── rand_augment.py
│ │ ├── random_erasing.py
│ │ ├── ssv2.py
│ │ ├── sth.py
│ │ ├── transform.py
│ │ ├── utils.py
│ │ └── video_container.py
│ ├── models/
│ │ ├── __init__.py
│ │ ├── attention.py
│ │ ├── batchnorm_helper.py
│ │ ├── build.py
│ │ ├── common.py
│ │ ├── contrastive.py
│ │ ├── custom_video_model_builder.py
│ │ ├── head_helper.py
│ │ ├── losses.py
│ │ ├── morphmlp.py
│ │ ├── morphmlp_32.py
│ │ ├── nonlocal_helper.py
│ │ ├── operators.py
│ │ ├── optimizer.py
│ │ ├── ptv_model_builder.py
│ │ ├── resnet_helper.py
│ │ ├── stem_helper.py
│ │ ├── utils.py
│ │ └── video_model_builder.py
│ ├── utils/
│ │ ├── __init__.py
│ │ ├── ava_eval_helper.py
│ │ ├── ava_evaluation/
│ │ │ ├── README.md
│ │ │ ├── __init__.py
│ │ │ ├── ava_action_list_v2.1_for_activitynet_2018.pbtxt.txt
│ │ │ ├── label_map_util.py
│ │ │ ├── metrics.py
│ │ │ ├── np_box_list.py
│ │ │ ├── np_box_list_ops.py
│ │ │ ├── np_box_mask_list.py
│ │ │ ├── np_box_mask_list_ops.py
│ │ │ ├── np_box_ops.py
│ │ │ ├── np_mask_ops.py
│ │ │ ├── object_detection_evaluation.py
│ │ │ ├── per_image_evaluation.py
│ │ │ └── standard_fields.py
│ │ ├── benchmark.py
│ │ ├── bn_helper.py
│ │ ├── c2_model_loading.py
│ │ ├── checkpoint.py
│ │ ├── checkpoint_amp.py
│ │ ├── distributed.py
│ │ ├── env.py
│ │ ├── logging.py
│ │ ├── lr_policy.py
│ │ ├── meters.py
│ │ ├── metrics.py
│ │ ├── misc.py
│ │ ├── multigrid.py
│ │ ├── multiprocessing.py
│ │ ├── parser.py
│ │ └── weight_init_helper.py
│ └── visualization/
│ ├── __init__.py
│ ├── async_predictor.py
│ ├── ava_demo_precomputed_boxes.py
│ ├── demo_loader.py
│ ├── gradcam_utils.py
│ ├── prediction_vis.py
│ ├── predictor.py
│ ├── tensorboard_vis.py
│ ├── utils.py
│ └── video_visualizer.py
└── tools/
├── benchmark.py
├── demo_net.py
├── run_net.py
├── test_net.py
├── train_net.py
└── visualization.py
SYMBOL INDEX (1723 symbols across 157 files)
FILE: build/lib/slowfast/config/custom_config.py
function add_custom_config (line 7) | def add_custom_config(_C):
FILE: build/lib/slowfast/config/defaults.py
function assert_and_infer_cfg (line 990) | def assert_and_infer_cfg(cfg):
function get_cfg (line 1018) | def get_cfg():
FILE: build/lib/slowfast/datasets/ava_dataset.py
class Ava (line 18) | class Ava(torch.utils.data.Dataset):
method __init__ (line 23) | def __init__(self, cfg, split):
method _load_data (line 49) | def _load_data(self, cfg):
method print_summary (line 87) | def print_summary(self):
method __len__ (line 98) | def __len__(self):
method num_videos (line 106) | def num_videos(self):
method _images_and_boxes_preprocessing_cv2 (line 113) | def _images_and_boxes_preprocessing_cv2(self, imgs, boxes):
method _images_and_boxes_preprocessing (line 245) | def _images_and_boxes_preprocessing(self, imgs, boxes):
method __getitem__ (line 352) | def __getitem__(self, idx):
FILE: build/lib/slowfast/datasets/ava_helper.py
function load_image_lists (line 16) | def load_image_lists(cfg, is_train):
function load_boxes_and_labels (line 69) | def load_boxes_and_labels(cfg, mode):
function get_keyframe_data (line 117) | def get_keyframe_data(boxes_and_labels):
function get_num_boxes_used (line 163) | def get_num_boxes_used(keyframe_indices, keyframe_boxes_and_labels):
function parse_bboxes_file (line 182) | def parse_bboxes_file(
FILE: build/lib/slowfast/datasets/build.py
function build_dataset (line 15) | def build_dataset(dataset_name, cfg, split):
FILE: build/lib/slowfast/datasets/charades.py
class Charades (line 20) | class Charades(torch.utils.data.Dataset):
method __init__ (line 31) | def __init__(self, cfg, mode, num_retries=10):
method _construct_loader (line 74) | def _construct_loader(self):
method get_seq_frames (line 113) | def get_seq_frames(self, index):
method __getitem__ (line 154) | def __getitem__(self, index):
method __len__ (line 249) | def __len__(self):
method num_videos (line 257) | def num_videos(self):
FILE: build/lib/slowfast/datasets/cv2_transform.py
function clip_boxes_to_image (line 9) | def clip_boxes_to_image(boxes, height, width):
function random_short_side_scale_jitter_list (line 29) | def random_short_side_scale_jitter_list(images, min_size, max_size, boxe...
function scale (line 77) | def scale(size, image):
function scale_boxes (line 106) | def scale_boxes(size, boxes, height, width):
function horizontal_flip_list (line 134) | def horizontal_flip_list(prob, images, order="CHW", boxes=None):
function spatial_shift_crop_list (line 166) | def spatial_shift_crop_list(size, images, spatial_shift_pos, boxes=None):
function CHW2HWC (line 216) | def CHW2HWC(image):
function HWC2CHW (line 228) | def HWC2CHW(image):
function color_jitter_list (line 240) | def color_jitter_list(
function lighting_list (line 273) | def lighting_list(imgs, alphastd, eigval, eigvec, alpha=None):
function color_normalization (line 302) | def color_normalization(image, mean, stddev):
function pad_image (line 319) | def pad_image(image, pad_size, order="CHW"):
function horizontal_flip (line 344) | def horizontal_flip(prob, image, order="CHW"):
function flip_boxes (line 365) | def flip_boxes(boxes, im_width):
function crop_boxes (line 381) | def crop_boxes(boxes, x_offset, y_offset):
function random_crop_list (line 394) | def random_crop_list(images, size, pad_size=0, order="CHW", boxes=None):
function center_crop (line 458) | def center_crop(size, image):
function random_scale_jitter (line 477) | def random_scale_jitter(image, min_size, max_size):
function random_scale_jitter_list (line 495) | def random_scale_jitter_list(images, min_size, max_size):
function random_sized_crop (line 513) | def random_sized_crop(image, size, area_frac=0.08):
function lighting (line 556) | def lighting(img, alphastd, eigval, eigvec):
function random_sized_crop_list (line 582) | def random_sized_crop_list(images, size, crop_area_fraction=0.08):
function blend (line 632) | def blend(image1, image2, alpha):
function grayscale (line 636) | def grayscale(image):
function saturation (line 654) | def saturation(var, image):
function brightness (line 668) | def brightness(var, image):
function contrast (line 682) | def contrast(var, image):
function saturation_list (line 697) | def saturation_list(var, images):
function brightness_list (line 715) | def brightness_list(var, images):
function contrast_list (line 733) | def contrast_list(var, images):
function color_jitter (line 752) | def color_jitter(image, img_brightness=0, img_contrast=0, img_saturation...
function revert_scaled_boxes (line 783) | def revert_scaled_boxes(size, boxes, img_height, img_width):
FILE: build/lib/slowfast/datasets/decoder.py
function temporal_sampling (line 11) | def temporal_sampling(frames, start_idx, end_idx, num_samples):
function get_start_end_idx (line 31) | def get_start_end_idx(
function pyav_decode_stream (line 72) | def pyav_decode_stream(
function torchvision_decode (line 114) | def torchvision_decode(
function pyav_decode (line 229) | def pyav_decode(
function decode (line 305) | def decode(
FILE: build/lib/slowfast/datasets/imagenet.py
class Imagenet (line 26) | class Imagenet(torch.utils.data.Dataset):
method __init__ (line 29) | def __init__(self, cfg, mode, num_retries=10):
method _load_imdb (line 45) | def _load_imdb(self):
method _construct_imdb (line 53) | def _construct_imdb(self):
method load_image (line 76) | def load_image(self, im_path):
method _prepare_im_res (line 86) | def _prepare_im_res(self, im_path):
method _prepare_im_tf (line 123) | def _prepare_im_tf(self, im_path):
method __load__ (line 162) | def __load__(self, index):
method __getitem__ (line 183) | def __getitem__(self, index):
method __len__ (line 202) | def __len__(self):
FILE: build/lib/slowfast/datasets/kinetics.py
class Kinetics (line 24) | class Kinetics(torch.utils.data.Dataset):
method __init__ (line 35) | def __init__(self, cfg, mode, num_retries=10):
method _construct_loader (line 88) | def _construct_loader(self):
method __getitem__ (line 129) | def __getitem__(self, index):
method _aug_frame (line 320) | def _aug_frame(
method _frame_to_list_img (line 385) | def _frame_to_list_img(self, frames):
method _list_img_to_frames (line 391) | def _list_img_to_frames(self, img_list):
method __len__ (line 395) | def __len__(self):
method num_videos (line 403) | def num_videos(self):
FILE: build/lib/slowfast/datasets/loader.py
function multiple_samples_collate (line 20) | def multiple_samples_collate(batch, fold=False):
function detection_collate (line 48) | def detection_collate(batch):
function construct_loader (line 85) | def construct_loader(cfg, split, is_precise_bn=False):
function shuffle_dataset (line 173) | def shuffle_dataset(loader, cur_epoch):
FILE: build/lib/slowfast/datasets/mixup.py
function convert_to_one_hot (line 22) | def convert_to_one_hot(targets, num_classes, on_value=1.0, off_value=0.0):
function mixup_target (line 40) | def mixup_target(target, num_classes, lam=1.0, smoothing=0.0):
function rand_bbox (line 67) | def rand_bbox(img_shape, lam, margin=0.0, count=None):
function get_cutmix_bbox (line 90) | def get_cutmix_bbox(img_shape, lam, correct_lam=True, count=None):
class MixUp (line 109) | class MixUp:
method __init__ (line 117) | def __init__(
method _get_mixup_params (line 148) | def _get_mixup_params(self):
method _mix_batch (line 167) | def _mix_batch(self, x):
method __call__ (line 183) | def __call__(self, x, target):
FILE: build/lib/slowfast/datasets/multigrid_helper.py
class ShortCycleBatchSampler (line 19) | class ShortCycleBatchSampler(Sampler):
method __init__ (line 26) | def __init__(self, sampler, batch_size, drop_last, cfg):
method __iter__ (line 68) | def __iter__(self):
method __len__ (line 82) | def __len__(self):
FILE: build/lib/slowfast/datasets/ptv_datasets.py
class PTVDatasetWrapper (line 42) | class PTVDatasetWrapper(torch.utils.data.IterableDataset):
method __init__ (line 47) | def __init__(self, num_videos, clips_per_video, crops_per_clip, dataset):
method __next__ (line 61) | def __next__(self):
method sampler (line 68) | def sampler(self):
method __len__ (line 75) | def __len__(self):
method num_videos (line 83) | def num_videos(self):
method __iter__ (line 90) | def __iter__(self):
class PackPathway (line 94) | class PackPathway(torch.nn.Module):
method __init__ (line 100) | def __init__(self, cfg):
method forward (line 104) | def forward(self, x: torch.Tensor):
class DictToTuple (line 108) | class DictToTuple(torch.nn.Module):
method __init__ (line 114) | def __init__(self, num_clips, num_crops):
method forward (line 119) | def forward(self, x: Dict[str, torch.Tensor]):
function div255 (line 129) | def div255(x):
function Ptvkinetics (line 143) | def Ptvkinetics(cfg, mode):
function process_charades_label (line 275) | def process_charades_label(x, mode, num_classes):
function rgb2bgr (line 298) | def rgb2bgr(x):
function Ptvcharades (line 312) | def Ptvcharades(cfg, mode):
function Ptvssv2 (line 455) | def Ptvssv2(cfg, mode):
FILE: build/lib/slowfast/datasets/rand_augment.py
function _interpolation (line 52) | def _interpolation(kwargs):
function _check_args_tf (line 60) | def _check_args_tf(kwargs):
function shear_x (line 66) | def shear_x(img, factor, **kwargs):
function shear_y (line 73) | def shear_y(img, factor, **kwargs):
function translate_x_rel (line 80) | def translate_x_rel(img, pct, **kwargs):
function translate_y_rel (line 88) | def translate_y_rel(img, pct, **kwargs):
function translate_x_abs (line 96) | def translate_x_abs(img, pixels, **kwargs):
function translate_y_abs (line 103) | def translate_y_abs(img, pixels, **kwargs):
function rotate (line 110) | def rotate(img, degrees, **kwargs):
function auto_contrast (line 144) | def auto_contrast(img, **__):
function invert (line 148) | def invert(img, **__):
function equalize (line 152) | def equalize(img, **__):
function solarize (line 156) | def solarize(img, thresh, **__):
function solarize_add (line 160) | def solarize_add(img, add, thresh=128, **__):
function posterize (line 175) | def posterize(img, bits_to_keep, **__):
function contrast (line 181) | def contrast(img, factor, **__):
function color (line 185) | def color(img, factor, **__):
function brightness (line 189) | def brightness(img, factor, **__):
function sharpness (line 193) | def sharpness(img, factor, **__):
function _randomly_negate (line 197) | def _randomly_negate(v):
function _rotate_level_to_arg (line 202) | def _rotate_level_to_arg(level, _hparams):
function _enhance_level_to_arg (line 209) | def _enhance_level_to_arg(level, _hparams):
function _enhance_increasing_level_to_arg (line 214) | def _enhance_increasing_level_to_arg(level, _hparams):
function _shear_level_to_arg (line 222) | def _shear_level_to_arg(level, _hparams):
function _translate_abs_level_to_arg (line 229) | def _translate_abs_level_to_arg(level, hparams):
function _translate_rel_level_to_arg (line 236) | def _translate_rel_level_to_arg(level, hparams):
function _posterize_level_to_arg (line 244) | def _posterize_level_to_arg(level, _hparams):
function _posterize_increasing_level_to_arg (line 251) | def _posterize_increasing_level_to_arg(level, hparams):
function _posterize_original_level_to_arg (line 258) | def _posterize_original_level_to_arg(level, _hparams):
function _solarize_level_to_arg (line 265) | def _solarize_level_to_arg(level, _hparams):
function _solarize_increasing_level_to_arg (line 271) | def _solarize_increasing_level_to_arg(level, _hparams):
function _solarize_add_level_to_arg (line 277) | def _solarize_add_level_to_arg(level, _hparams):
class AugmentOp (line 339) | class AugmentOp:
method __init__ (line 344) | def __init__(self, name, prob=0.5, magnitude=10, hparams=None):
method __call__ (line 366) | def __call__(self, img_list):
function _select_rand_weights (line 446) | def _select_rand_weights(weight_idx=0, transforms=None):
function rand_augment_ops (line 455) | def rand_augment_ops(magnitude=10, hparams=None, transforms=None):
class RandAugment (line 464) | class RandAugment:
method __init__ (line 465) | def __init__(self, ops, num_layers=2, choice_weights=None):
method __call__ (line 470) | def __call__(self, img):
function rand_augment_transform (line 483) | def rand_augment_transform(config_str, hparams):
FILE: build/lib/slowfast/datasets/random_erasing.py
function _get_pixels (line 18) | def _get_pixels(
class RandomErasing (line 34) | class RandomErasing:
method __init__ (line 53) | def __init__(
method _erase (line 87) | def _erase(self, img, chan, img_h, img_w, dtype):
method _erase_cube (line 116) | def _erase_cube(
method __call__ (line 158) | def __call__(self, input):
FILE: build/lib/slowfast/datasets/ssv2.py
class Ssv2 (line 22) | class Ssv2(torch.utils.data.Dataset):
method __init__ (line 33) | def __init__(self, cfg, mode, num_retries=10):
method _construct_loader (line 73) | def _construct_loader(self):
method get_seq_frames (line 159) | def get_seq_frames(self, index):
method __getitem__ (line 182) | def __getitem__(self, index):
method __len__ (line 272) | def __len__(self):
method num_videos (line 280) | def num_videos(self):
FILE: build/lib/slowfast/datasets/sth.py
class Sth (line 26) | class Sth(torch.utils.data.Dataset):
method __init__ (line 37) | def __init__(self, cfg, mode, num_retries=10):
method _construct_loader (line 86) | def _construct_loader(self):
method get_seq_frames (line 141) | def get_seq_frames(self, index, temporal_sample_index):
method __getitem__ (line 170) | def __getitem__(self, index):
method _aug_frame (line 307) | def _aug_frame(
method _frame_to_list_img (line 372) | def _frame_to_list_img(self, frames):
method _list_img_to_frames (line 378) | def _list_img_to_frames(self, img_list):
method __len__ (line 382) | def __len__(self):
method num_videos (line 390) | def num_videos(self):
FILE: build/lib/slowfast/datasets/transform.py
function _pil_interp (line 33) | def _pil_interp(method):
function random_short_side_scale_jitter (line 47) | def random_short_side_scale_jitter(
function crop_boxes (line 104) | def crop_boxes(boxes, x_offset, y_offset):
function random_crop (line 123) | def random_crop(images, size, boxes=None):
function horizontal_flip (line 159) | def horizontal_flip(prob, images, boxes=None):
function uniform_crop (line 194) | def uniform_crop(images, size, spatial_idx, boxes=None, scale_size=None):
function clip_boxes_to_image (line 257) | def clip_boxes_to_image(boxes, height, width):
function blend (line 279) | def blend(images1, images2, alpha):
function grayscale (line 295) | def grayscale(images):
function color_jitter (line 317) | def color_jitter(images, img_brightness=0, img_contrast=0, img_saturatio...
function brightness_jitter (line 352) | def brightness_jitter(var, images):
function contrast_jitter (line 371) | def contrast_jitter(var, images):
function saturation_jitter (line 391) | def saturation_jitter(var, images):
function lighting_jitter (line 410) | def lighting_jitter(images, alphastd, eigval, eigvec):
function color_normalization (line 458) | def color_normalization(images, mean, stddev):
function _get_param_spatial_crop (line 502) | def _get_param_spatial_crop(
function random_resized_crop (line 544) | def random_resized_crop(
function random_resized_crop_with_shift (line 579) | def random_resized_crop_with_shift(
function create_random_augment (line 624) | def create_random_augment(
function random_sized_crop_img (line 660) | def random_sized_crop_img(
class RandomResizedCropAndInterpolation (line 695) | class RandomResizedCropAndInterpolation:
method __init__ (line 708) | def __init__(
method get_params (line 730) | def get_params(img, scale, ratio):
method __call__ (line 770) | def __call__(self, img):
method __repr__ (line 784) | def __repr__(self):
function transforms_imagenet_train (line 802) | def transforms_imagenet_train(
function temporal_difference (line 903) | def temporal_difference(
function color_jitter_video_ssl (line 929) | def color_jitter_video_ssl(
function augment_raw_frames (line 980) | def augment_raw_frames(frames, time_diff_prob=0.0, gaussian_prob=0.0):
class GaussianBlur (line 1003) | class GaussianBlur(object):
method __init__ (line 1006) | def __init__(self, sigma=[0.1, 2.0]):
method __call__ (line 1009) | def __call__(self, x):
class GaussianBlurVideo (line 1018) | class GaussianBlurVideo(object):
method __init__ (line 1019) | def __init__(
method __call__ (line 1025) | def __call__(self, frames):
FILE: build/lib/slowfast/datasets/utils.py
function retry_load_images (line 20) | def retry_load_images(image_paths, retry=10, backend="pytorch"):
function get_sequence (line 51) | def get_sequence(center_idx, half_len, sample_rate, num_frames):
function pack_pathway_output (line 74) | def pack_pathway_output(cfg, frames):
function spatial_sampling (line 110) | def spatial_sampling(
function as_binary_vector (line 184) | def as_binary_vector(labels, num_classes):
function aggregate_labels (line 200) | def aggregate_labels(label_list):
function convert_to_video_level_labels (line 215) | def convert_to_video_level_labels(labels):
function load_image_lists (line 231) | def load_image_lists(frame_list_file, prefix="", return_list=False):
function tensor_normalize (line 276) | def tensor_normalize(tensor, mean, std, func=None):
function get_random_sampling_rate (line 298) | def get_random_sampling_rate(long_cycle_sampling_rate, sampling_rate):
function revert_tensor_normalize (line 310) | def revert_tensor_normalize(tensor, mean, std):
function create_sampler (line 327) | def create_sampler(dataset, shuffle, cfg):
function loader_worker_init_fn (line 344) | def loader_worker_init_fn(dataset):
FILE: build/lib/slowfast/datasets/video_container.py
function get_video_container (line 9) | def get_video_container(path_to_vid, multi_thread_decode=False, backend=...
FILE: build/lib/slowfast/models/attention.py
function attention_pool (line 14) | def attention_pool(tensor, pool, thw_shape, has_cls_embed=True, norm=None):
function get_rel_pos (line 51) | def get_rel_pos(rel_pos, d):
function cal_rel_pos_spatial (line 67) | def cal_rel_pos_spatial(
function cal_rel_pos_temporal (line 120) | def cal_rel_pos_temporal(attn, q, has_cls_embed, q_shape, k_shape, rel_p...
class MultiScaleAttention (line 162) | class MultiScaleAttention(nn.Module):
method __init__ (line 163) | def __init__(
method forward (line 307) | def forward(self, x, thw_shape):
class MultiScaleBlock (line 445) | class MultiScaleBlock(nn.Module):
method __init__ (line 446) | def __init__(
method forward (line 533) | def forward(self, x, thw_shape):
FILE: build/lib/slowfast/models/batchnorm_helper.py
function get_norm (line 16) | def get_norm(cfg):
class SubBatchNorm3d (line 40) | class SubBatchNorm3d(nn.Module):
method __init__ (line 51) | def __init__(self, num_splits, **args):
method _get_aggregated_mean_std (line 72) | def _get_aggregated_mean_std(self, means, stds, n):
method aggregate_stats (line 87) | def aggregate_stats(self):
method forward (line 101) | def forward(self, x):
FILE: build/lib/slowfast/models/build.py
function build_model (line 18) | def build_model(cfg, gpu_id=None):
FILE: build/lib/slowfast/models/common.py
class Mlp (line 7) | class Mlp(nn.Module):
method __init__ (line 8) | def __init__(
method forward (line 26) | def forward(self, x):
class Permute (line 37) | class Permute(nn.Module):
method __init__ (line 38) | def __init__(self, dims):
method forward (line 42) | def forward(self, x):
function drop_path (line 46) | def drop_path(x, drop_prob: float = 0.0, training: bool = False):
class DropPath (line 62) | class DropPath(nn.Module):
method __init__ (line 65) | def __init__(self, drop_prob=None):
method forward (line 69) | def forward(self, x):
FILE: build/lib/slowfast/models/contrastive.py
class ContrastiveModel (line 34) | class ContrastiveModel(nn.Module):
method __init__ (line 39) | def __init__(self, cfg):
method knn_mem_update (line 134) | def knn_mem_update(self, q_knn, index):
method init_knn_labels (line 145) | def init_knn_labels(self, train_loader):
method _update_history (line 161) | def _update_history(self):
method _batch_shuffle (line 177) | def _batch_shuffle(self, x):
method _batch_unshuffle (line 219) | def _batch_unshuffle(self, x, idx_restore):
method eval_knn (line 235) | def eval_knn(self, q_knn, knn_k=200):
method sim_loss (line 245) | def sim_loss(self, q, k):
method momentum_anneal_cosine (line 254) | def momentum_anneal_cosine(self, epoch_exact):
method _dequeue_and_enqueue (line 266) | def _dequeue_and_enqueue(self, keys, extra_keys=None):
method batch_clips (line 297) | def batch_clips(self, clips):
method compute_key_feat (line 311) | def compute_key_feat(
method forward (line 375) | def forward(
method _simclr_precompute_pos_neg_mask_multi (line 808) | def _simclr_precompute_pos_neg_mask_multi(self):
method run_swav_encoder_q (line 850) | def run_swav_encoder_q(self, im):
method get_code (line 858) | def get_code(self, out):
method run_swav_orig_encoder_q (line 867) | def run_swav_orig_encoder_q(self, x):
method sinkhorn (line 875) | def sinkhorn(self, Q, iters):
method distributed_sinkhorn (line 891) | def distributed_sinkhorn(self, Q, nmb_iters):
method KLDivLoss (line 914) | def KLDivLoss(self, out, code):
function l2_loss (line 921) | def l2_loss(x, y):
class Normalize (line 925) | class Normalize(nn.Module):
method __init__ (line 926) | def __init__(self, power=2, dim=1):
method forward (line 931) | def forward(self, x):
class Memory (line 939) | class Memory(nn.Module):
method __init__ (line 940) | def __init__(self, length, duration, dim, cfg):
method resize (line 955) | def resize(self, length, duration, dim):
method get (line 968) | def get(self, ind, time, interp=False):
method update (line 993) | def update(self, mem, momentum, ind, time, interp=False):
method forward (line 1040) | def forward(self, inputs):
class Memory1D (line 1044) | class Memory1D(nn.Module):
method __init__ (line 1045) | def __init__(self, length, duration, dim, cfg):
method get (line 1059) | def get(self, ind, time, interp=False):
method update (line 1071) | def update(self, mem, momentum, ind, time, interp=False):
function cancel_swav_gradients (line 1085) | def cancel_swav_gradients(model, cfg, epoch_exact):
FILE: build/lib/slowfast/models/head_helper.py
class ResNetRoIHead (line 16) | class ResNetRoIHead(nn.Module):
method __init__ (line 21) | def __init__(
method forward (line 114) | def forward(self, inputs, bboxes):
class MLPHead (line 146) | class MLPHead(nn.Module):
method __init__ (line 147) | def __init__(
method forward (line 188) | def forward(self, x):
class ResNetBasicHead (line 197) | class ResNetBasicHead(nn.Module):
method __init__ (line 206) | def __init__(
method forward (line 309) | def forward(self, inputs):
class X3DHead (line 357) | class X3DHead(nn.Module):
method __init__ (line 366) | def __init__(
method _construct_head (line 416) | def _construct_head(self, dim_in, dim_inner, dim_out, norm_module):
method forward (line 467) | def forward(self, inputs):
class TransformerBasicHead (line 497) | class TransformerBasicHead(nn.Module):
method __init__ (line 502) | def __init__(
method forward (line 556) | def forward(self, x):
FILE: build/lib/slowfast/models/losses.py
class ContrastiveLoss (line 16) | class ContrastiveLoss(nn.Module):
method __init__ (line 17) | def __init__(self, reduction="mean"):
method forward (line 21) | def forward(self, inputs, dummy_labels=None):
function get_loss_func (line 38) | def get_loss_func(loss_name):
FILE: build/lib/slowfast/models/morphmlp.py
function conv_3xnxn (line 12) | def conv_3xnxn(inp, oup, kernel_size=3, stride=3):
function conv_1xnxn (line 16) | def conv_1xnxn(inp, oup, kernel_size=3, stride=3):
class Mlp (line 19) | class Mlp(nn.Module):
method __init__ (line 20) | def __init__(self, in_features, hidden_features=None, out_features=Non...
method forward (line 29) | def forward(self, x):
class MorphFC_S2 (line 38) | class MorphFC_S2(nn.Module):
method __init__ (line 39) | def __init__(self, dim, segment_dim=8, qkv_bias=False, qk_scale=None, ...
method forward (line 51) | def forward(self, x):
class MorphFC_S (line 115) | class MorphFC_S(nn.Module):
method __init__ (line 116) | def __init__(self, dim, segment_dim=8, qkv_bias=False, qk_scale=None, ...
method forward (line 130) | def forward(self, x):
class MorphFC_T (line 156) | class MorphFC_T(nn.Module):
method __init__ (line 157) | def __init__(self, dim, segment_dim=8, qkv_bias=False, qk_scale=None, ...
method forward (line 168) | def forward(self, x):
class PermutatorBlock (line 185) | class PermutatorBlock(nn.Module):
method __init__ (line 186) | def __init__(self, dim, segment_dim, mlp_ratio=4., qkv_bias=False, qk_...
method forward (line 203) | def forward(self, x):
class PatchEmbed (line 210) | class PatchEmbed(nn.Module):
method __init__ (line 214) | def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=...
method forward (line 222) | def forward(self, x):
class Downsample (line 230) | class Downsample(nn.Module):
method __init__ (line 234) | def __init__(self, in_embed_dim, out_embed_dim, patch_size):
method forward (line 239) | def forward(self, x):
class MorphMLP (line 246) | class MorphMLP(nn.Module):
method __init__ (line 250) | def __init__(self, cfg):
method _init_weights (line 350) | def _init_weights(self, m):
method get_pretrained_model (line 359) | def get_pretrained_model(self, cfg):
method forward_features (line 369) | def forward_features(self, x):
method forward (line 389) | def forward(self, x):
FILE: build/lib/slowfast/models/morphmlp_32.py
function conv_3xnxn (line 10) | def conv_3xnxn(inp, oup, kernel_size=3, stride=3):
function conv_1xnxn (line 14) | def conv_1xnxn(inp, oup, kernel_size=3, stride=3):
class Mlp (line 17) | class Mlp(nn.Module):
method __init__ (line 18) | def __init__(self, in_features, hidden_features=None, out_features=Non...
method forward (line 27) | def forward(self, x):
class MorphFC_S2 (line 36) | class MorphFC_S2(nn.Module):
method __init__ (line 37) | def __init__(self, dim, segment_dim=8, qkv_bias=False, qk_scale=None, ...
method forward (line 49) | def forward(self, x):
class MorphFC_S (line 113) | class MorphFC_S(nn.Module):
method __init__ (line 114) | def __init__(self, dim, segment_dim=8, qkv_bias=False, qk_scale=None, ...
method forward (line 128) | def forward(self, x):
class MorphFC_T (line 154) | class MorphFC_T(nn.Module):
method __init__ (line 155) | def __init__(self, dim, segment_dim=8, qkv_bias=False, qk_scale=None, ...
method forward (line 169) | def forward(self, x):
class PermutatorBlock (line 186) | class PermutatorBlock(nn.Module):
method __init__ (line 187) | def __init__(self, dim, segment_dim, mlp_ratio=4., qkv_bias=False, qk_...
method forward (line 204) | def forward(self, x):
class PatchEmbed (line 211) | class PatchEmbed(nn.Module):
method __init__ (line 215) | def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=...
method forward (line 223) | def forward(self, x):
class Downsample (line 231) | class Downsample(nn.Module):
method __init__ (line 235) | def __init__(self, in_embed_dim, out_embed_dim, patch_size):
method forward (line 240) | def forward(self, x):
class MorphMLP_32 (line 247) | class MorphMLP_32(nn.Module):
method __init__ (line 251) | def __init__(self, cfg):
method _init_weights (line 348) | def _init_weights(self, m):
method get_pretrained_model (line 357) | def get_pretrained_model(self, cfg):
method forward_features (line 367) | def forward_features(self, x):
method forward (line 387) | def forward(self, x):
FILE: build/lib/slowfast/models/nonlocal_helper.py
class Nonlocal (line 10) | class Nonlocal(nn.Module):
method __init__ (line 20) | def __init__(
method _construct_nonlocal (line 67) | def _construct_nonlocal(
method forward (line 105) | def forward(self, x):
FILE: build/lib/slowfast/models/operators.py
class SE (line 11) | class SE(nn.Module):
method _round_width (line 14) | def _round_width(self, width, multiplier, min_width=8, divisor=8):
method __init__ (line 35) | def __init__(self, dim_in, ratio, relu_act=True):
method forward (line 53) | def forward(self, x):
FILE: build/lib/slowfast/models/optimizer.py
function construct_optimizer (line 11) | def construct_optimizer(model, cfg):
function get_epoch_lr (line 101) | def get_epoch_lr(cur_epoch, cfg):
function set_lr (line 112) | def set_lr(optimizer, new_lr):
FILE: build/lib/slowfast/models/ptv_model_builder.py
function get_head_act (line 37) | def get_head_act(act_func):
class PTVResNet (line 59) | class PTVResNet(nn.Module):
method __init__ (line 64) | def __init__(self, cfg):
method _construct_network (line 90) | def _construct_network(self, cfg):
method forward (line 209) | def forward(self, x, bboxes=None):
class PTVSlowFast (line 225) | class PTVSlowFast(nn.Module):
method __init__ (line 226) | def __init__(self, cfg):
method _construct_network (line 247) | def _construct_network(self, cfg):
method forward (line 410) | def forward(self, x, bboxes=None):
class PTVX3D (line 425) | class PTVX3D(nn.Module):
method __init__ (line 426) | def __init__(self, cfg):
method _construct_network (line 449) | def _construct_network(self, cfg):
method forward (line 505) | def forward(self, x, bboxes=None):
class PTVCSN (line 518) | class PTVCSN(nn.Module):
method __init__ (line 523) | def __init__(self, cfg):
method _construct_network (line 540) | def _construct_network(self, cfg):
method forward (line 590) | def forward(self, x, bboxes=None):
class PTVR2plus1D (line 603) | class PTVR2plus1D(nn.Module):
method __init__ (line 608) | def __init__(self, cfg):
method _construct_network (line 625) | def _construct_network(self, cfg):
method forward (line 691) | def forward(self, x, bboxes=None):
class PTVMViT (line 704) | class PTVMViT(nn.Module):
method __init__ (line 709) | def __init__(self, cfg):
method _construct_network (line 726) | def _construct_network(self, cfg):
method forward (line 770) | def forward(self, x, bboxes=None):
FILE: build/lib/slowfast/models/resnet_helper.py
function get_trans_func (line 14) | def get_trans_func(name):
class BasicTransform (line 29) | class BasicTransform(nn.Module):
method __init__ (line 34) | def __init__(
method _construct (line 77) | def _construct(self, dim_in, dim_out, stride, dilation, norm_module):
method forward (line 110) | def forward(self, x):
class X3DTransform (line 120) | class X3DTransform(nn.Module):
method __init__ (line 127) | def __init__(
method _construct (line 190) | def _construct(
method forward (line 255) | def forward(self, x):
class BottleneckTransform (line 261) | class BottleneckTransform(nn.Module):
method __init__ (line 267) | def __init__(
method _construct (line 321) | def _construct(
method forward (line 379) | def forward(self, x):
class ResBlock (line 397) | class ResBlock(nn.Module):
method __init__ (line 402) | def __init__(
method _construct (line 470) | def _construct(
method forward (line 514) | def forward(self, x):
class ResStage (line 526) | class ResStage(nn.Module):
method __init__ (line 537) | def __init__(
method _construct (line 652) | def _construct(
method forward (line 701) | def forward(self, inputs):
FILE: build/lib/slowfast/models/stem_helper.py
function get_stem_func (line 9) | def get_stem_func(name):
class VideoModelStem (line 20) | class VideoModelStem(nn.Module):
method __init__ (line 26) | def __init__(
method _construct_stem (line 98) | def _construct_stem(self, dim_in, dim_out, norm_module, stem_func_name):
method forward (line 115) | def forward(self, x):
class ResNetBasicStem (line 127) | class ResNetBasicStem(nn.Module):
method __init__ (line 134) | def __init__(
method _construct_stem (line 181) | def _construct_stem(self, dim_in, dim_out, norm_module):
method forward (line 198) | def forward(self, x):
class X3DStem (line 206) | class X3DStem(nn.Module):
method __init__ (line 213) | def __init__(
method _construct_stem (line 260) | def _construct_stem(self, dim_in, dim_out, norm_module):
method forward (line 284) | def forward(self, x):
class PatchEmbed (line 292) | class PatchEmbed(nn.Module):
method __init__ (line 297) | def __init__(
method forward (line 319) | def forward(self, x):
FILE: build/lib/slowfast/models/utils.py
function round_width (line 8) | def round_width(width, multiplier, min_width=1, divisor=1, verbose=False):
function validate_checkpoint_wrapper_import (line 24) | def validate_checkpoint_wrapper_import(checkpoint_wrapper):
FILE: build/lib/slowfast/models/video_model_builder.py
class FuseFastToSlow (line 105) | class FuseFastToSlow(nn.Module):
method __init__ (line 112) | def __init__(
method forward (line 155) | def forward(self, x):
class SlowFast (line 166) | class SlowFast(nn.Module):
method __init__ (line 175) | def __init__(self, cfg):
method _construct_network (line 196) | def _construct_network(self, cfg):
method forward (line 418) | def forward(self, x, bboxes=None):
class ResNet (line 440) | class ResNet(nn.Module):
method __init__ (line 454) | def __init__(self, cfg):
method _construct_network (line 475) | def _construct_network(self, cfg):
method forward (line 638) | def forward(self, x, bboxes=None):
class X3D (line 657) | class X3D(nn.Module):
method __init__ (line 666) | def __init__(self, cfg):
method _round_repeats (line 704) | def _round_repeats(self, repeats, multiplier):
method _construct_network (line 711) | def _construct_network(self, cfg):
method forward (line 796) | def forward(self, x, bboxes=None):
class MViT (line 803) | class MViT(nn.Module):
method __init__ (line 815) | def __init__(self, cfg):
method _init_weights (line 1027) | def _init_weights(self, m):
method no_weight_decay (line 1037) | def no_weight_decay(self):
method _get_pos_embed (line 1060) | def _get_pos_embed(self, pos_embed, bcthw):
method forward (line 1084) | def forward(self, x):
FILE: build/lib/slowfast/site.py
function __boot (line 1) | def __boot():
FILE: build/lib/slowfast/utils/ava_eval_helper.py
function make_image_key (line 49) | def make_image_key(video_id, timestamp):
function read_csv (line 54) | def read_csv(csv_file, class_whitelist=None, load_score=False):
function read_exclusions (line 91) | def read_exclusions(exclusions_file):
function read_labelmap (line 109) | def read_labelmap(labelmap_file):
function evaluate_ava_from_files (line 127) | def evaluate_ava_from_files(labelmap, groundtruth, detections, exclusions):
function evaluate_ava (line 137) | def evaluate_ava(
function run_evaluation (line 174) | def run_evaluation(
function get_ava_eval_data (line 251) | def get_ava_eval_data(
function write_results (line 290) | def write_results(detections, filename):
FILE: build/lib/slowfast/utils/ava_evaluation/label_map_util.py
function _validate_label_map (line 29) | def _validate_label_map(label_map):
function create_category_index (line 43) | def create_category_index(categories):
function get_max_label_map_index (line 62) | def get_max_label_map_index(label_map):
function convert_label_map_to_categories (line 74) | def convert_label_map_to_categories(
function load_labelmap (line 129) | def load_labelmap(path):
function get_label_map_dict (line 148) | def get_label_map_dict(label_map_path, use_display_name=False):
function create_category_index_from_labelmap (line 168) | def create_category_index_from_labelmap(label_map_path):
function create_class_agnostic_category_index (line 185) | def create_class_agnostic_category_index():
FILE: build/lib/slowfast/utils/ava_evaluation/metrics.py
function compute_precision_recall (line 21) | def compute_precision_recall(scores, labels, num_gt):
function compute_average_precision (line 74) | def compute_average_precision(precision, recall):
function compute_cor_loc (line 128) | def compute_cor_loc(
FILE: build/lib/slowfast/utils/ava_evaluation/np_box_list.py
class BoxList (line 27) | class BoxList(object):
method __init__ (line 39) | def __init__(self, data):
method num_boxes (line 64) | def num_boxes(self):
method get_extra_fields (line 68) | def get_extra_fields(self):
method has_field (line 72) | def has_field(self, field):
method add_field (line 75) | def add_field(self, field, field_data):
method get (line 92) | def get(self):
method get_field (line 100) | def get_field(self, field):
method get_coordinates (line 116) | def get_coordinates(self):
method _is_valid_boxes (line 129) | def _is_valid_boxes(self, data):
FILE: build/lib/slowfast/utils/ava_evaluation/np_box_list_ops.py
class SortOrder (line 33) | class SortOrder(object):
function area (line 45) | def area(boxlist):
function intersection (line 58) | def intersection(boxlist1, boxlist2):
function iou (line 71) | def iou(boxlist1, boxlist2):
function ioa (line 84) | def ioa(boxlist1, boxlist2):
function gather (line 101) | def gather(boxlist, indices, fields=None):
function sort_by_field (line 136) | def sort_by_field(boxlist, field, order=SortOrder.DESCEND):
function non_max_suppression (line 167) | def non_max_suppression(
function multi_class_non_max_suppression (line 242) | def multi_class_non_max_suppression(
function scale (line 321) | def scale(boxlist, y_scale, x_scale):
function clip_to_window (line 349) | def clip_to_window(boxlist, window):
function prune_non_overlapping_boxes (line 385) | def prune_non_overlapping_boxes(boxlist1, boxlist2, minoverlap=0.0):
function prune_outside_window (line 410) | def prune_outside_window(boxlist, window):
function concatenate (line 448) | def concatenate(boxlists, fields=None):
function filter_scores_greater_than (line 503) | def filter_scores_greater_than(boxlist, thresh):
function change_coordinate_frame (line 539) | def change_coordinate_frame(boxlist, window):
function _copy_extra_fields (line 572) | def _copy_extra_fields(boxlist_to_copy_to, boxlist_to_copy_from):
function _update_valid_indices_by_removing_high_iou_boxes (line 589) | def _update_valid_indices_by_removing_high_iou_boxes(
FILE: build/lib/slowfast/utils/ava_evaluation/np_box_mask_list.py
class BoxMaskList (line 29) | class BoxMaskList(np_box_list.BoxList):
method __init__ (line 37) | def __init__(self, box_data, mask_data):
method get_masks (line 67) | def get_masks(self):
FILE: build/lib/slowfast/utils/ava_evaluation/np_box_mask_list_ops.py
function box_list_to_box_mask_list (line 33) | def box_list_to_box_mask_list(boxlist):
function area (line 57) | def area(box_mask_list):
function intersection (line 69) | def intersection(box_mask_list1, box_mask_list2):
function iou (line 84) | def iou(box_mask_list1, box_mask_list2):
function ioa (line 99) | def ioa(box_mask_list1, box_mask_list2):
function gather (line 118) | def gather(box_mask_list, indices, fields=None):
function sort_by_field (line 151) | def sort_by_field(
function non_max_suppression (line 174) | def non_max_suppression(
function multi_class_non_max_suppression (line 255) | def multi_class_non_max_suppression(
function prune_non_overlapping_masks (line 337) | def prune_non_overlapping_masks(box_mask_list1, box_mask_list2, minoverl...
function concatenate (line 365) | def concatenate(box_mask_lists, fields=None):
function filter_scores_greater_than (line 395) | def filter_scores_greater_than(box_mask_list, thresh):
FILE: build/lib/slowfast/utils/ava_evaluation/np_box_ops.py
function area (line 31) | def area(boxes):
function intersection (line 43) | def intersection(boxes1, boxes2):
function iou (line 71) | def iou(boxes1, boxes2):
function ioa (line 92) | def ioa(boxes1, boxes2):
FILE: build/lib/slowfast/utils/ava_evaluation/np_mask_ops.py
function area (line 33) | def area(masks):
function intersection (line 51) | def intersection(masks1, masks2):
function iou (line 79) | def iou(masks1, masks2):
function ioa (line 107) | def ioa(masks1, masks2):
FILE: build/lib/slowfast/utils/ava_evaluation/object_detection_evaluation.py
class DetectionEvaluator (line 44) | class DetectionEvaluator(object):
method __init__ (line 64) | def __init__(self, categories):
method add_single_ground_truth_image_info (line 75) | def add_single_ground_truth_image_info(self, image_id, groundtruth_dict):
method add_single_detected_image_info (line 86) | def add_single_detected_image_info(self, image_id, detections_dict):
method evaluate (line 97) | def evaluate(self):
method clear (line 102) | def clear(self):
class ObjectDetectionEvaluator (line 107) | class ObjectDetectionEvaluator(DetectionEvaluator):
method __init__ (line 110) | def __init__(
method add_single_ground_truth_image_info (line 158) | def add_single_ground_truth_image_info(self, image_id, groundtruth_dict):
method add_single_detected_image_info (line 236) | def add_single_detected_image_info(self, image_id, detections_dict):
method evaluate (line 287) | def evaluate(self):
method clear (line 346) | def clear(self):
class PascalDetectionEvaluator (line 357) | class PascalDetectionEvaluator(ObjectDetectionEvaluator):
method __init__ (line 360) | def __init__(self, categories, matching_iou_threshold=0.5):
class WeightedPascalDetectionEvaluator (line 370) | class WeightedPascalDetectionEvaluator(ObjectDetectionEvaluator):
method __init__ (line 384) | def __init__(self, categories, matching_iou_threshold=0.5):
class PascalInstanceSegmentationEvaluator (line 394) | class PascalInstanceSegmentationEvaluator(ObjectDetectionEvaluator):
method __init__ (line 397) | def __init__(self, categories, matching_iou_threshold=0.5):
class WeightedPascalInstanceSegmentationEvaluator (line 408) | class WeightedPascalInstanceSegmentationEvaluator(ObjectDetectionEvaluat...
method __init__ (line 422) | def __init__(self, categories, matching_iou_threshold=0.5):
class OpenImagesDetectionEvaluator (line 433) | class OpenImagesDetectionEvaluator(ObjectDetectionEvaluator):
method __init__ (line 440) | def __init__(
method add_single_ground_truth_image_info (line 460) | def add_single_ground_truth_image_info(self, image_id, groundtruth_dict):
class ObjectDetectionEvaluation (line 534) | class ObjectDetectionEvaluation(object):
method __init__ (line 537) | def __init__(
method _initialize_detections (line 569) | def _initialize_detections(self):
method clear_detections (line 580) | def clear_detections(self):
method add_single_ground_truth_image_info (line 583) | def add_single_ground_truth_image_info(
method add_single_detected_image_info (line 640) | def add_single_detected_image_info(
method _update_ground_truth_statistics (line 727) | def _update_ground_truth_statistics(
method evaluate (line 760) | def evaluate(self):
FILE: build/lib/slowfast/utils/ava_evaluation/per_image_evaluation.py
class PerImageEvaluation (line 39) | class PerImageEvaluation(object):
method __init__ (line 42) | def __init__(self, num_groundtruth_classes, matching_iou_threshold=0.5):
method compute_object_detection_metrics (line 53) | def compute_object_detection_metrics(
method _compute_tp_fp (line 128) | def _compute_tp_fp(
method _get_overlaps_and_scores_box_mode (line 223) | def _get_overlaps_and_scores_box_mode(
method _compute_tp_fp_for_single_class (line 261) | def _compute_tp_fp_for_single_class(
method _get_ith_class_arrays (line 354) | def _get_ith_class_arrays(
method _remove_invalid_boxes (line 411) | def _remove_invalid_boxes(
FILE: build/lib/slowfast/utils/ava_evaluation/standard_fields.py
class InputDataFields (line 35) | class InputDataFields(object):
class DetectionResultFields (line 100) | class DetectionResultFields(object):
class BoxListFields (line 126) | class BoxListFields(object):
class TfExampleFields (line 152) | class TfExampleFields(object):
FILE: build/lib/slowfast/utils/benchmark.py
function benchmark_data_loading (line 20) | def benchmark_data_loading(cfg):
FILE: build/lib/slowfast/utils/bn_helper.py
function compute_and_update_bn_stats (line 11) | def compute_and_update_bn_stats(model, data_loader, num_batches=200):
FILE: build/lib/slowfast/utils/c2_model_loading.py
function get_name_convert_func (line 9) | def get_name_convert_func():
FILE: build/lib/slowfast/utils/checkpoint.py
function make_checkpoint_dir (line 21) | def make_checkpoint_dir(path_to_job):
function get_checkpoint_dir (line 37) | def get_checkpoint_dir(path_to_job):
function get_path_to_checkpoint (line 46) | def get_path_to_checkpoint(path_to_job, epoch):
function get_last_checkpoint (line 57) | def get_last_checkpoint(path_to_job):
function has_checkpoint (line 73) | def has_checkpoint(path_to_job):
function is_checkpoint_epoch (line 84) | def is_checkpoint_epoch(cfg, cur_epoch, multigrid_schedule=None):
function save_checkpoint (line 107) | def save_checkpoint(path_to_job, model, optimizer, loss_scaler, epoch, c...
function inflate_weight (line 141) | def inflate_weight(state_dict_2d, state_dict_3d):
function load_checkpoint (line 180) | def load_checkpoint(
function sub_to_normal_bn (line 353) | def sub_to_normal_bn(sd):
function c2_normal_to_sub_bn (line 394) | def c2_normal_to_sub_bn(key, model_keys):
function normal_to_sub_bn (line 414) | def normal_to_sub_bn(checkpoint_sd, model_sd):
function load_test_checkpoint (line 455) | def load_test_checkpoint(cfg, model):
function load_train_checkpoint (line 495) | def load_train_checkpoint(cfg, model, optimizer, loss_scaler):
FILE: build/lib/slowfast/utils/checkpoint_amp.py
function make_checkpoint_dir (line 21) | def make_checkpoint_dir(path_to_job):
function get_checkpoint_dir (line 37) | def get_checkpoint_dir(path_to_job):
function get_path_to_checkpoint (line 46) | def get_path_to_checkpoint(path_to_job, epoch):
function get_last_checkpoint (line 57) | def get_last_checkpoint(path_to_job):
function has_checkpoint (line 73) | def has_checkpoint(path_to_job):
function is_checkpoint_epoch (line 84) | def is_checkpoint_epoch(cfg, cur_epoch, multigrid_schedule=None):
function save_checkpoint (line 107) | def save_checkpoint(path_to_job, model, optimizer, loss_scaler, epoch, c...
function inflate_weight (line 141) | def inflate_weight(state_dict_2d, state_dict_3d):
function load_checkpoint (line 180) | def load_checkpoint(
function sub_to_normal_bn (line 353) | def sub_to_normal_bn(sd):
function c2_normal_to_sub_bn (line 394) | def c2_normal_to_sub_bn(key, model_keys):
function normal_to_sub_bn (line 414) | def normal_to_sub_bn(checkpoint_sd, model_sd):
function load_test_checkpoint (line 455) | def load_test_checkpoint(cfg, model):
function load_train_checkpoint (line 495) | def load_train_checkpoint(cfg, model, optimizer, loss_scaler):
FILE: build/lib/slowfast/utils/distributed.py
function all_gather (line 22) | def all_gather(tensors):
function all_reduce (line 44) | def all_reduce(tensors, average=True):
function init_process_group (line 63) | def init_process_group(
function is_master_proc (line 101) | def is_master_proc(num_gpus=8):
function is_root_proc (line 111) | def is_root_proc():
function get_rank (line 121) | def get_rank():
function synchronize (line 132) | def synchronize():
function _get_global_gloo_group (line 148) | def _get_global_gloo_group():
function _serialize_to_tensor (line 161) | def _serialize_to_tensor(data, group):
function _pad_to_largest_tensor (line 189) | def _pad_to_largest_tensor(tensor, group):
function all_gather_unaligned (line 225) | def all_gather_unaligned(data, group=None):
class GatherLayer (line 264) | class GatherLayer(torch.autograd.Function):
method forward (line 268) | def forward(ctx, input):
method backward (line 275) | def backward(ctx, *grads):
class AllGatherWithGradient (line 282) | class AllGatherWithGradient(torch.autograd.Function):
method forward (line 286) | def forward(ctx, input):
method backward (line 294) | def backward(ctx, grad_output):
FILE: build/lib/slowfast/utils/env.py
function setup_environment (line 13) | def setup_environment():
FILE: build/lib/slowfast/utils/logging.py
function _suppress_print (line 19) | def _suppress_print():
function _cached_log_stream (line 31) | def _cached_log_stream(filename):
function setup_logging (line 40) | def setup_logging(output_dir=None):
function get_logger (line 77) | def get_logger(name):
function log_json_stats (line 87) | def log_json_stats(stats, output_dir=None):
FILE: build/lib/slowfast/utils/lr_policy.py
function get_lr_at_epoch (line 9) | def get_lr_at_epoch(cfg, cur_epoch):
function lr_func_cosine (line 30) | def lr_func_cosine(cfg, cur_epoch):
function lr_func_steps_with_relative_lrs (line 56) | def lr_func_steps_with_relative_lrs(cfg, cur_epoch):
function get_step_index (line 69) | def get_step_index(cfg, cur_epoch):
function get_lr_func (line 84) | def get_lr_func(lr_policy):
FILE: build/lib/slowfast/utils/meters.py
function get_ava_mini_groundtruth (line 28) | def get_ava_mini_groundtruth(full_groundtruth):
class AVAMeter (line 46) | class AVAMeter(object):
method __init__ (line 51) | def __init__(self, overall_iters, cfg, mode):
method log_iter_stats (line 86) | def log_iter_stats(self, cur_epoch, cur_iter):
method iter_tic (line 138) | def iter_tic(self):
method iter_toc (line 145) | def iter_toc(self):
method data_toc (line 152) | def data_toc(self):
method reset (line 156) | def reset(self):
method update_stats (line 166) | def update_stats(self, preds, ori_boxes, metadata, loss=None, lr=None):
method finalize_metrics (line 185) | def finalize_metrics(self, log=True):
method log_epoch_stats (line 212) | def log_epoch_stats(self, cur_epoch):
class TestMeter (line 231) | class TestMeter(object):
method __init__ (line 239) | def __init__(
method reset (line 287) | def reset(self):
method update_stats (line 297) | def update_stats(self, preds, labels, clip_ids):
method log_iter_stats (line 332) | def log_iter_stats(self, cur_iter):
method iter_tic (line 348) | def iter_tic(self):
method iter_toc (line 355) | def iter_toc(self):
method data_toc (line 362) | def data_toc(self):
method finalize_metrics (line 366) | def finalize_metrics(self, ks=(1, 5)):
class ScalarMeter (line 407) | class ScalarMeter(object):
method __init__ (line 414) | def __init__(self, window_size):
method reset (line 423) | def reset(self):
method add_value (line 431) | def add_value(self, value):
method get_win_median (line 439) | def get_win_median(self):
method get_win_avg (line 445) | def get_win_avg(self):
method get_global_avg (line 451) | def get_global_avg(self):
class TrainMeter (line 458) | class TrainMeter(object):
method __init__ (line 463) | def __init__(self, epoch_iters, cfg):
method reset (line 487) | def reset(self):
method iter_tic (line 500) | def iter_tic(self):
method iter_toc (line 507) | def iter_toc(self):
method data_toc (line 514) | def data_toc(self):
method update_stats (line 518) | def update_stats(self, top1_err, top5_err, loss, lr, mb_size):
method log_iter_stats (line 541) | def log_iter_stats(self, cur_epoch, cur_iter):
method log_epoch_stats (line 571) | def log_epoch_stats(self, cur_epoch):
class ValMeter (line 602) | class ValMeter(object):
method __init__ (line 607) | def __init__(self, max_iter, cfg):
method reset (line 632) | def reset(self):
method iter_tic (line 645) | def iter_tic(self):
method iter_toc (line 652) | def iter_toc(self):
method data_toc (line 659) | def data_toc(self):
method update_stats (line 663) | def update_stats(self, top1_err, top5_err, mb_size):
method update_predictions (line 677) | def update_predictions(self, preds, labels):
method log_iter_stats (line 688) | def log_iter_stats(self, cur_epoch, cur_iter):
method log_epoch_stats (line 712) | def log_epoch_stats(self, cur_epoch):
function get_map (line 744) | def get_map(preds, labels):
class EpochTimer (line 771) | class EpochTimer:
method __init__ (line 776) | def __init__(self) -> None:
method reset (line 781) | def reset(self) -> None:
method epoch_tic (line 788) | def epoch_tic(self):
method epoch_toc (line 794) | def epoch_toc(self):
method last_epoch_time (line 801) | def last_epoch_time(self):
method avg_epoch_time (line 809) | def avg_epoch_time(self):
method median_epoch_time (line 817) | def median_epoch_time(self):
FILE: build/lib/slowfast/utils/metrics.py
function topks_correct (line 9) | def topks_correct(preds, labels, ks):
function topk_errors (line 43) | def topk_errors(preds, labels, ks):
function topk_accuracies (line 55) | def topk_accuracies(preds, labels, ks):
FILE: build/lib/slowfast/utils/misc.py
function check_nan_losses (line 26) | def check_nan_losses(loss):
function params_count (line 36) | def params_count(model, ignore_bn=False):
function gpu_mem_usage (line 53) | def gpu_mem_usage():
function cpu_mem_usage (line 64) | def cpu_mem_usage():
function _get_model_analysis_input (line 78) | def _get_model_analysis_input(cfg, use_train_input):
function get_model_stats (line 137) | def get_model_stats(model, cfg, mode, use_train_input):
function log_model_info (line 172) | def log_model_info(model, cfg, use_train_input=True):
function is_eval_epoch (line 200) | def is_eval_epoch(cfg, cur_epoch, multigrid_schedule):
function plot_input (line 224) | def plot_input(tensor, bboxes=(), texts=(), path="./tmp_vis.png"):
function frozen_bn_stats (line 254) | def frozen_bn_stats(model):
function aggregate_sub_bn_stats (line 265) | def aggregate_sub_bn_stats(module):
function launch_job (line 283) | def launch_job(cfg, init_method, func, daemon=False):
function get_class_names (line 314) | def get_class_names(path, parent_path=None, subset_path=None):
FILE: build/lib/slowfast/utils/multigrid.py
class MultigridSchedule (line 13) | class MultigridSchedule(object):
method init_multigrid (line 18) | def init_multigrid(self, cfg):
method update_long_cycle (line 63) | def update_long_cycle(self, cfg, cur_epoch):
method get_long_cycle_schedule (line 123) | def get_long_cycle_schedule(self, cfg):
function print_schedule (line 215) | def print_schedule(schedule):
function get_current_long_cycle_shape (line 224) | def get_current_long_cycle_shape(schedule, epoch):
FILE: build/lib/slowfast/utils/multiprocessing.py
function run (line 9) | def run(
FILE: build/lib/slowfast/utils/parser.py
function parse_args (line 13) | def parse_args():
function load_config (line 67) | def load_config(args):
FILE: build/lib/slowfast/utils/weight_init_helper.py
function init_weights (line 10) | def init_weights(
FILE: build/lib/slowfast/visualization/async_predictor.py
class AsycnActionPredictor (line 17) | class AsycnActionPredictor:
class _Predictor (line 18) | class _Predictor(mp.Process):
method __init__ (line 19) | def __init__(self, cfg, task_queue, result_queue, gpu_id=None):
method run (line 41) | def run(self):
method __init__ (line 54) | def __init__(self, cfg, result_queue=None):
method put (line 78) | def put(self, task):
method get (line 88) | def get(self):
method __call__ (line 106) | def __call__(self, task):
method shutdown (line 110) | def shutdown(self):
method result_available (line 115) | def result_available(self):
method default_buffer_size (line 122) | def default_buffer_size(self):
class AsyncVis (line 126) | class AsyncVis:
class _VisWorker (line 127) | class _VisWorker(mp.Process):
method __init__ (line 128) | def __init__(self, video_vis, task_queue, result_queue):
method run (line 141) | def run(self):
method __init__ (line 154) | def __init__(self, video_vis, n_workers=None):
method put (line 183) | def put(self, task):
method get (line 193) | def get(self):
method __call__ (line 213) | def __call__(self, task):
method shutdown (line 220) | def shutdown(self):
method result_available (line 225) | def result_available(self):
method default_buffer_size (line 229) | def default_buffer_size(self):
class _StopToken (line 233) | class _StopToken:
class AsyncDemo (line 237) | class AsyncDemo:
method __init__ (line 242) | def __init__(self, cfg, async_vis):
method put (line 254) | def put(self, task):
method get (line 264) | def get(self):
function draw_predictions (line 276) | def draw_predictions(task, video_vis):
FILE: build/lib/slowfast/visualization/ava_demo_precomputed_boxes.py
class AVAVisualizerWithPrecomputedBox (line 24) | class AVAVisualizerWithPrecomputedBox:
method __init__ (line 30) | def __init__(self, cfg):
method get_output_file (line 77) | def get_output_file(self, path):
method get_input_clip (line 91) | def get_input_clip(self, keyframe_idx):
method get_predictions (line 124) | def get_predictions(self):
method draw_video (line 193) | def draw_video(self):
method __call__ (line 313) | def __call__(self):
method display (line 317) | def display(self, frame):
method _get_keyframe_clip (line 327) | def _get_keyframe_clip(self, keyframe_idx):
method _get_frame_range (line 339) | def _get_frame_range(self, start_idx, num_frames):
function merge_pred_gt_boxes (line 362) | def merge_pred_gt_boxes(pred_dict, gt_dict=None):
function load_boxes_labels (line 390) | def load_boxes_labels(cfg, video_name, fps, img_width, img_height):
FILE: build/lib/slowfast/visualization/demo_loader.py
class VideoManager (line 17) | class VideoManager:
method __init__ (line 22) | def __init__(self, cfg):
method __iter__ (line 67) | def __iter__(self):
method __next__ (line 70) | def __next__(self):
method get_output_file (line 100) | def get_output_file(self, path, fps=30):
method display (line 115) | def display(self, task):
method clean (line 130) | def clean(self):
method start (line 140) | def start(self):
method join (line 143) | def join(self):
class ThreadVideoManager (line 147) | class ThreadVideoManager:
method __init__ (line 153) | def __init__(self, cfg):
method get_output_file (line 211) | def get_output_file(self, path, fps=30):
method __iter__ (line 226) | def __iter__(self):
method put_fn (line 229) | def put_fn(self):
method __next__ (line 268) | def __next__(self):
method get_fn (line 295) | def get_fn(self):
method display (line 325) | def display(self, task):
method start (line 335) | def start(self):
method join (line 350) | def join(self):
method clean (line 353) | def clean(self):
FILE: build/lib/slowfast/visualization/gradcam_utils.py
class GradCAM (line 12) | class GradCAM:
method __init__ (line 19) | def __init__(
method _register_single_hook (line 45) | def _register_single_hook(self, layer_name):
method _register_hooks (line 63) | def _register_hooks(self):
method _calculate_localization_map (line 70) | def _calculate_localization_map(self, inputs, labels=None):
method __call__ (line 142) | def __call__(self, inputs, labels=None, alpha=0.5):
FILE: build/lib/slowfast/visualization/prediction_vis.py
class WrongPredictionVis (line 16) | class WrongPredictionVis:
method __init__ (line 22) | def __init__(self, cfg):
method _pick_wrong_preds (line 46) | def _pick_wrong_preds(self, labels, preds):
method visualize_vid (line 72) | def visualize_vid(self, video_input, labels, preds, batch_idx):
method wrong_class_prediction (line 122) | def wrong_class_prediction(self):
method clean (line 131) | def clean(self):
FILE: build/lib/slowfast/visualization/predictor.py
class Predictor (line 20) | class Predictor:
method __init__ (line 25) | def __init__(self, cfg, gpu_id=None):
method __call__ (line 49) | def __call__(self, task):
class ActionPredictor (line 119) | class ActionPredictor:
method __init__ (line 124) | def __init__(self, cfg, async_vis=None, gpu_id=None):
method put (line 135) | def put(self, task):
method get (line 146) | def get(self):
class Detectron2Predictor (line 158) | class Detectron2Predictor:
method __init__ (line 164) | def __init__(self, cfg, gpu_id=None):
method __call__ (line 189) | def __call__(self, task):
FILE: build/lib/slowfast/visualization/tensorboard_vis.py
class TensorboardWriter (line 20) | class TensorboardWriter(object):
method __init__ (line 25) | def __init__(self, cfg):
method add_scalars (line 90) | def add_scalars(self, data_dict, global_step=None):
method plot_eval (line 101) | def plot_eval(self, preds, labels, global_step=None):
method add_video (line 171) | def add_video(self, vid_tensor, tag="Video Input", global_step=None, f...
method plot_weights_and_activations (line 183) | def plot_weights_and_activations(
method flush (line 227) | def flush(self):
method close (line 230) | def close(self):
function add_confusion_matrix (line 235) | def add_confusion_matrix(
function plot_hist (line 280) | def plot_hist(
function add_ndim_array (line 332) | def add_ndim_array(
function add_heatmap (line 408) | def add_heatmap(tensor):
FILE: build/lib/slowfast/visualization/utils.py
function get_confusion_matrix (line 16) | def get_confusion_matrix(preds, labels, num_classes, normalize="true"):
function plot_confusion_matrix (line 48) | def plot_confusion_matrix(cmtx, num_classes, class_names=None, figsize=N...
function plot_topk_histogram (line 92) | def plot_topk_histogram(tag, array, k=10, class_names=None, figsize=None):
class GetWeightAndActivation (line 158) | class GetWeightAndActivation:
method __init__ (line 163) | def __init__(self, model, layers):
method _get_layer (line 178) | def _get_layer(self, layer_name):
method _register_single_hook (line 191) | def _register_single_hook(self, layer_name):
method _register_hooks (line 204) | def _register_hooks(self):
method get_activations (line 211) | def get_activations(self, input, bboxes=None):
method get_weights (line 236) | def get_weights(self):
function get_indexing (line 255) | def get_indexing(string):
function process_layer_index_data (line 278) | def process_layer_index_data(layer_ls, layer_name_prefix=""):
function process_cv2_inputs (line 304) | def process_cv2_inputs(frames, cfg):
function get_layer (line 325) | def get_layer(model, layer_name):
class TaskInfo (line 343) | class TaskInfo:
method __init__ (line 344) | def __init__(self):
method add_frames (line 355) | def add_frames(self, idx, frames):
method add_bboxes (line 365) | def add_bboxes(self, bboxes):
method add_action_preds (line 371) | def add_action_preds(self, preds):
FILE: build/lib/slowfast/visualization/video_visualizer.py
function _create_text_labels (line 18) | def _create_text_labels(classes, scores, class_names, ground_truth=False):
class ImgVisualizer (line 45) | class ImgVisualizer(Visualizer):
method __init__ (line 46) | def __init__(self, img_rgb, meta, **kwargs):
method draw_text (line 61) | def draw_text(
method draw_multiple_text (line 109) | def draw_multiple_text(
method draw_multiple_text_upward (line 184) | def draw_multiple_text_upward(
method draw_multiple_text_downward (line 237) | def draw_multiple_text_downward(
method _align_x_coordinate (line 290) | def _align_x_coordinate(self, box_coordinate):
method _align_y_top (line 312) | def _align_y_top(self, box_coordinate, num_text, textbox_width):
method _align_y_bottom (line 330) | def _align_y_bottom(self, box_coordinate, num_text, textbox_width):
class VideoVisualizer (line 349) | class VideoVisualizer:
method __init__ (line 350) | def __init__(
method _get_color (line 396) | def _get_color(self, class_id):
method draw_one_frame (line 404) | def draw_one_frame(
method draw_clip_range (line 514) | def draw_clip_range(
method draw_clip (line 568) | def draw_clip(
method _adjust_frames_type (line 635) | def _adjust_frames_type(self, frames):
method _get_thres_array (line 657) | def _get_thres_array(self, common_class_names=None):
FILE: slowfast/config/custom_config.py
function add_custom_config (line 7) | def add_custom_config(_C):
FILE: slowfast/config/defaults.py
function assert_and_infer_cfg (line 990) | def assert_and_infer_cfg(cfg):
function get_cfg (line 1018) | def get_cfg():
FILE: slowfast/datasets/ava_dataset.py
class Ava (line 18) | class Ava(torch.utils.data.Dataset):
method __init__ (line 23) | def __init__(self, cfg, split):
method _load_data (line 49) | def _load_data(self, cfg):
method print_summary (line 87) | def print_summary(self):
method __len__ (line 98) | def __len__(self):
method num_videos (line 106) | def num_videos(self):
method _images_and_boxes_preprocessing_cv2 (line 113) | def _images_and_boxes_preprocessing_cv2(self, imgs, boxes):
method _images_and_boxes_preprocessing (line 245) | def _images_and_boxes_preprocessing(self, imgs, boxes):
method __getitem__ (line 352) | def __getitem__(self, idx):
FILE: slowfast/datasets/ava_helper.py
function load_image_lists (line 16) | def load_image_lists(cfg, is_train):
function load_boxes_and_labels (line 69) | def load_boxes_and_labels(cfg, mode):
function get_keyframe_data (line 117) | def get_keyframe_data(boxes_and_labels):
function get_num_boxes_used (line 163) | def get_num_boxes_used(keyframe_indices, keyframe_boxes_and_labels):
function parse_bboxes_file (line 182) | def parse_bboxes_file(
FILE: slowfast/datasets/build.py
function build_dataset (line 15) | def build_dataset(dataset_name, cfg, split):
FILE: slowfast/datasets/charades.py
class Charades (line 20) | class Charades(torch.utils.data.Dataset):
method __init__ (line 31) | def __init__(self, cfg, mode, num_retries=10):
method _construct_loader (line 74) | def _construct_loader(self):
method get_seq_frames (line 113) | def get_seq_frames(self, index):
method __getitem__ (line 154) | def __getitem__(self, index):
method __len__ (line 249) | def __len__(self):
method num_videos (line 257) | def num_videos(self):
FILE: slowfast/datasets/cv2_transform.py
function clip_boxes_to_image (line 9) | def clip_boxes_to_image(boxes, height, width):
function random_short_side_scale_jitter_list (line 29) | def random_short_side_scale_jitter_list(images, min_size, max_size, boxe...
function scale (line 77) | def scale(size, image):
function scale_boxes (line 106) | def scale_boxes(size, boxes, height, width):
function horizontal_flip_list (line 134) | def horizontal_flip_list(prob, images, order="CHW", boxes=None):
function spatial_shift_crop_list (line 166) | def spatial_shift_crop_list(size, images, spatial_shift_pos, boxes=None):
function CHW2HWC (line 216) | def CHW2HWC(image):
function HWC2CHW (line 228) | def HWC2CHW(image):
function color_jitter_list (line 240) | def color_jitter_list(
function lighting_list (line 273) | def lighting_list(imgs, alphastd, eigval, eigvec, alpha=None):
function color_normalization (line 302) | def color_normalization(image, mean, stddev):
function pad_image (line 319) | def pad_image(image, pad_size, order="CHW"):
function horizontal_flip (line 344) | def horizontal_flip(prob, image, order="CHW"):
function flip_boxes (line 365) | def flip_boxes(boxes, im_width):
function crop_boxes (line 381) | def crop_boxes(boxes, x_offset, y_offset):
function random_crop_list (line 394) | def random_crop_list(images, size, pad_size=0, order="CHW", boxes=None):
function center_crop (line 458) | def center_crop(size, image):
function random_scale_jitter (line 477) | def random_scale_jitter(image, min_size, max_size):
function random_scale_jitter_list (line 495) | def random_scale_jitter_list(images, min_size, max_size):
function random_sized_crop (line 513) | def random_sized_crop(image, size, area_frac=0.08):
function lighting (line 556) | def lighting(img, alphastd, eigval, eigvec):
function random_sized_crop_list (line 582) | def random_sized_crop_list(images, size, crop_area_fraction=0.08):
function blend (line 632) | def blend(image1, image2, alpha):
function grayscale (line 636) | def grayscale(image):
function saturation (line 654) | def saturation(var, image):
function brightness (line 668) | def brightness(var, image):
function contrast (line 682) | def contrast(var, image):
function saturation_list (line 697) | def saturation_list(var, images):
function brightness_list (line 715) | def brightness_list(var, images):
function contrast_list (line 733) | def contrast_list(var, images):
function color_jitter (line 752) | def color_jitter(image, img_brightness=0, img_contrast=0, img_saturation...
function revert_scaled_boxes (line 783) | def revert_scaled_boxes(size, boxes, img_height, img_width):
FILE: slowfast/datasets/decoder.py
function temporal_sampling (line 11) | def temporal_sampling(frames, start_idx, end_idx, num_samples):
function get_start_end_idx (line 31) | def get_start_end_idx(
function pyav_decode_stream (line 72) | def pyav_decode_stream(
function torchvision_decode (line 114) | def torchvision_decode(
function pyav_decode (line 229) | def pyav_decode(
function decode (line 305) | def decode(
FILE: slowfast/datasets/imagenet.py
class Imagenet (line 26) | class Imagenet(torch.utils.data.Dataset):
method __init__ (line 29) | def __init__(self, cfg, mode, num_retries=10):
method _load_imdb (line 45) | def _load_imdb(self):
method _construct_imdb (line 53) | def _construct_imdb(self):
method load_image (line 76) | def load_image(self, im_path):
method _prepare_im_res (line 86) | def _prepare_im_res(self, im_path):
method _prepare_im_tf (line 123) | def _prepare_im_tf(self, im_path):
method __load__ (line 162) | def __load__(self, index):
method __getitem__ (line 183) | def __getitem__(self, index):
method __len__ (line 202) | def __len__(self):
FILE: slowfast/datasets/kinetics.py
class Kinetics (line 24) | class Kinetics(torch.utils.data.Dataset):
method __init__ (line 35) | def __init__(self, cfg, mode, num_retries=10):
method _construct_loader (line 88) | def _construct_loader(self):
method __getitem__ (line 129) | def __getitem__(self, index):
method _aug_frame (line 320) | def _aug_frame(
method _frame_to_list_img (line 385) | def _frame_to_list_img(self, frames):
method _list_img_to_frames (line 391) | def _list_img_to_frames(self, img_list):
method __len__ (line 395) | def __len__(self):
method num_videos (line 403) | def num_videos(self):
FILE: slowfast/datasets/loader.py
function multiple_samples_collate (line 20) | def multiple_samples_collate(batch, fold=False):
function detection_collate (line 48) | def detection_collate(batch):
function construct_loader (line 85) | def construct_loader(cfg, split, is_precise_bn=False):
function shuffle_dataset (line 173) | def shuffle_dataset(loader, cur_epoch):
FILE: slowfast/datasets/mixup.py
function convert_to_one_hot (line 22) | def convert_to_one_hot(targets, num_classes, on_value=1.0, off_value=0.0):
function mixup_target (line 40) | def mixup_target(target, num_classes, lam=1.0, smoothing=0.0):
function rand_bbox (line 67) | def rand_bbox(img_shape, lam, margin=0.0, count=None):
function get_cutmix_bbox (line 90) | def get_cutmix_bbox(img_shape, lam, correct_lam=True, count=None):
class MixUp (line 109) | class MixUp:
method __init__ (line 117) | def __init__(
method _get_mixup_params (line 148) | def _get_mixup_params(self):
method _mix_batch (line 167) | def _mix_batch(self, x):
method __call__ (line 183) | def __call__(self, x, target):
FILE: slowfast/datasets/multigrid_helper.py
class ShortCycleBatchSampler (line 19) | class ShortCycleBatchSampler(Sampler):
method __init__ (line 26) | def __init__(self, sampler, batch_size, drop_last, cfg):
method __iter__ (line 68) | def __iter__(self):
method __len__ (line 82) | def __len__(self):
FILE: slowfast/datasets/ptv_datasets.py
class PTVDatasetWrapper (line 42) | class PTVDatasetWrapper(torch.utils.data.IterableDataset):
method __init__ (line 47) | def __init__(self, num_videos, clips_per_video, crops_per_clip, dataset):
method __next__ (line 61) | def __next__(self):
method sampler (line 68) | def sampler(self):
method __len__ (line 75) | def __len__(self):
method num_videos (line 83) | def num_videos(self):
method __iter__ (line 90) | def __iter__(self):
class PackPathway (line 94) | class PackPathway(torch.nn.Module):
method __init__ (line 100) | def __init__(self, cfg):
method forward (line 104) | def forward(self, x: torch.Tensor):
class DictToTuple (line 108) | class DictToTuple(torch.nn.Module):
method __init__ (line 114) | def __init__(self, num_clips, num_crops):
method forward (line 119) | def forward(self, x: Dict[str, torch.Tensor]):
function div255 (line 129) | def div255(x):
function Ptvkinetics (line 143) | def Ptvkinetics(cfg, mode):
function process_charades_label (line 275) | def process_charades_label(x, mode, num_classes):
function rgb2bgr (line 298) | def rgb2bgr(x):
function Ptvcharades (line 312) | def Ptvcharades(cfg, mode):
function Ptvssv2 (line 455) | def Ptvssv2(cfg, mode):
FILE: slowfast/datasets/rand_augment.py
function _interpolation (line 52) | def _interpolation(kwargs):
function _check_args_tf (line 60) | def _check_args_tf(kwargs):
function shear_x (line 66) | def shear_x(img, factor, **kwargs):
function shear_y (line 73) | def shear_y(img, factor, **kwargs):
function translate_x_rel (line 80) | def translate_x_rel(img, pct, **kwargs):
function translate_y_rel (line 88) | def translate_y_rel(img, pct, **kwargs):
function translate_x_abs (line 96) | def translate_x_abs(img, pixels, **kwargs):
function translate_y_abs (line 103) | def translate_y_abs(img, pixels, **kwargs):
function rotate (line 110) | def rotate(img, degrees, **kwargs):
function auto_contrast (line 144) | def auto_contrast(img, **__):
function invert (line 148) | def invert(img, **__):
function equalize (line 152) | def equalize(img, **__):
function solarize (line 156) | def solarize(img, thresh, **__):
function solarize_add (line 160) | def solarize_add(img, add, thresh=128, **__):
function posterize (line 175) | def posterize(img, bits_to_keep, **__):
function contrast (line 181) | def contrast(img, factor, **__):
function color (line 185) | def color(img, factor, **__):
function brightness (line 189) | def brightness(img, factor, **__):
function sharpness (line 193) | def sharpness(img, factor, **__):
function _randomly_negate (line 197) | def _randomly_negate(v):
function _rotate_level_to_arg (line 202) | def _rotate_level_to_arg(level, _hparams):
function _enhance_level_to_arg (line 209) | def _enhance_level_to_arg(level, _hparams):
function _enhance_increasing_level_to_arg (line 214) | def _enhance_increasing_level_to_arg(level, _hparams):
function _shear_level_to_arg (line 222) | def _shear_level_to_arg(level, _hparams):
function _translate_abs_level_to_arg (line 229) | def _translate_abs_level_to_arg(level, hparams):
function _translate_rel_level_to_arg (line 236) | def _translate_rel_level_to_arg(level, hparams):
function _posterize_level_to_arg (line 244) | def _posterize_level_to_arg(level, _hparams):
function _posterize_increasing_level_to_arg (line 251) | def _posterize_increasing_level_to_arg(level, hparams):
function _posterize_original_level_to_arg (line 258) | def _posterize_original_level_to_arg(level, _hparams):
function _solarize_level_to_arg (line 265) | def _solarize_level_to_arg(level, _hparams):
function _solarize_increasing_level_to_arg (line 271) | def _solarize_increasing_level_to_arg(level, _hparams):
function _solarize_add_level_to_arg (line 277) | def _solarize_add_level_to_arg(level, _hparams):
class AugmentOp (line 339) | class AugmentOp:
method __init__ (line 344) | def __init__(self, name, prob=0.5, magnitude=10, hparams=None):
method __call__ (line 366) | def __call__(self, img_list):
function _select_rand_weights (line 446) | def _select_rand_weights(weight_idx=0, transforms=None):
function rand_augment_ops (line 455) | def rand_augment_ops(magnitude=10, hparams=None, transforms=None):
class RandAugment (line 464) | class RandAugment:
method __init__ (line 465) | def __init__(self, ops, num_layers=2, choice_weights=None):
method __call__ (line 470) | def __call__(self, img):
function rand_augment_transform (line 483) | def rand_augment_transform(config_str, hparams):
FILE: slowfast/datasets/random_erasing.py
function _get_pixels (line 18) | def _get_pixels(
class RandomErasing (line 34) | class RandomErasing:
method __init__ (line 53) | def __init__(
method _erase (line 87) | def _erase(self, img, chan, img_h, img_w, dtype):
method _erase_cube (line 116) | def _erase_cube(
method __call__ (line 158) | def __call__(self, input):
FILE: slowfast/datasets/ssv2.py
class Ssv2 (line 22) | class Ssv2(torch.utils.data.Dataset):
method __init__ (line 33) | def __init__(self, cfg, mode, num_retries=10):
method _construct_loader (line 73) | def _construct_loader(self):
method get_seq_frames (line 159) | def get_seq_frames(self, index):
method __getitem__ (line 182) | def __getitem__(self, index):
method __len__ (line 272) | def __len__(self):
method num_videos (line 280) | def num_videos(self):
FILE: slowfast/datasets/sth.py
class Sth (line 26) | class Sth(torch.utils.data.Dataset):
method __init__ (line 37) | def __init__(self, cfg, mode, num_retries=10):
method _construct_loader (line 86) | def _construct_loader(self):
method get_seq_frames (line 141) | def get_seq_frames(self, index, temporal_sample_index):
method __getitem__ (line 170) | def __getitem__(self, index):
method _aug_frame (line 307) | def _aug_frame(
method _frame_to_list_img (line 372) | def _frame_to_list_img(self, frames):
method _list_img_to_frames (line 378) | def _list_img_to_frames(self, img_list):
method __len__ (line 382) | def __len__(self):
method num_videos (line 390) | def num_videos(self):
FILE: slowfast/datasets/transform.py
function _pil_interp (line 33) | def _pil_interp(method):
function random_short_side_scale_jitter (line 47) | def random_short_side_scale_jitter(
function crop_boxes (line 104) | def crop_boxes(boxes, x_offset, y_offset):
function random_crop (line 123) | def random_crop(images, size, boxes=None):
function horizontal_flip (line 159) | def horizontal_flip(prob, images, boxes=None):
function uniform_crop (line 194) | def uniform_crop(images, size, spatial_idx, boxes=None, scale_size=None):
function clip_boxes_to_image (line 257) | def clip_boxes_to_image(boxes, height, width):
function blend (line 279) | def blend(images1, images2, alpha):
function grayscale (line 295) | def grayscale(images):
function color_jitter (line 317) | def color_jitter(images, img_brightness=0, img_contrast=0, img_saturatio...
function brightness_jitter (line 352) | def brightness_jitter(var, images):
function contrast_jitter (line 371) | def contrast_jitter(var, images):
function saturation_jitter (line 391) | def saturation_jitter(var, images):
function lighting_jitter (line 410) | def lighting_jitter(images, alphastd, eigval, eigvec):
function color_normalization (line 458) | def color_normalization(images, mean, stddev):
function _get_param_spatial_crop (line 502) | def _get_param_spatial_crop(
function random_resized_crop (line 544) | def random_resized_crop(
function random_resized_crop_with_shift (line 579) | def random_resized_crop_with_shift(
function create_random_augment (line 624) | def create_random_augment(
function random_sized_crop_img (line 660) | def random_sized_crop_img(
class RandomResizedCropAndInterpolation (line 695) | class RandomResizedCropAndInterpolation:
method __init__ (line 708) | def __init__(
method get_params (line 730) | def get_params(img, scale, ratio):
method __call__ (line 770) | def __call__(self, img):
method __repr__ (line 784) | def __repr__(self):
function transforms_imagenet_train (line 802) | def transforms_imagenet_train(
function temporal_difference (line 903) | def temporal_difference(
function color_jitter_video_ssl (line 929) | def color_jitter_video_ssl(
function augment_raw_frames (line 980) | def augment_raw_frames(frames, time_diff_prob=0.0, gaussian_prob=0.0):
class GaussianBlur (line 1003) | class GaussianBlur(object):
method __init__ (line 1006) | def __init__(self, sigma=[0.1, 2.0]):
method __call__ (line 1009) | def __call__(self, x):
class GaussianBlurVideo (line 1018) | class GaussianBlurVideo(object):
method __init__ (line 1019) | def __init__(
method __call__ (line 1025) | def __call__(self, frames):
FILE: slowfast/datasets/utils.py
function retry_load_images (line 20) | def retry_load_images(image_paths, retry=10, backend="pytorch"):
function get_sequence (line 51) | def get_sequence(center_idx, half_len, sample_rate, num_frames):
function pack_pathway_output (line 74) | def pack_pathway_output(cfg, frames):
function spatial_sampling (line 110) | def spatial_sampling(
function as_binary_vector (line 184) | def as_binary_vector(labels, num_classes):
function aggregate_labels (line 200) | def aggregate_labels(label_list):
function convert_to_video_level_labels (line 215) | def convert_to_video_level_labels(labels):
function load_image_lists (line 231) | def load_image_lists(frame_list_file, prefix="", return_list=False):
function tensor_normalize (line 276) | def tensor_normalize(tensor, mean, std, func=None):
function get_random_sampling_rate (line 298) | def get_random_sampling_rate(long_cycle_sampling_rate, sampling_rate):
function revert_tensor_normalize (line 310) | def revert_tensor_normalize(tensor, mean, std):
function create_sampler (line 327) | def create_sampler(dataset, shuffle, cfg):
function loader_worker_init_fn (line 344) | def loader_worker_init_fn(dataset):
FILE: slowfast/datasets/video_container.py
function get_video_container (line 9) | def get_video_container(path_to_vid, multi_thread_decode=False, backend=...
FILE: slowfast/models/attention.py
function attention_pool (line 14) | def attention_pool(tensor, pool, thw_shape, has_cls_embed=True, norm=None):
function get_rel_pos (line 51) | def get_rel_pos(rel_pos, d):
function cal_rel_pos_spatial (line 67) | def cal_rel_pos_spatial(
function cal_rel_pos_temporal (line 120) | def cal_rel_pos_temporal(attn, q, has_cls_embed, q_shape, k_shape, rel_p...
class MultiScaleAttention (line 162) | class MultiScaleAttention(nn.Module):
method __init__ (line 163) | def __init__(
method forward (line 307) | def forward(self, x, thw_shape):
class MultiScaleBlock (line 445) | class MultiScaleBlock(nn.Module):
method __init__ (line 446) | def __init__(
method forward (line 533) | def forward(self, x, thw_shape):
FILE: slowfast/models/batchnorm_helper.py
function get_norm (line 16) | def get_norm(cfg):
class SubBatchNorm3d (line 40) | class SubBatchNorm3d(nn.Module):
method __init__ (line 51) | def __init__(self, num_splits, **args):
method _get_aggregated_mean_std (line 72) | def _get_aggregated_mean_std(self, means, stds, n):
method aggregate_stats (line 87) | def aggregate_stats(self):
method forward (line 101) | def forward(self, x):
FILE: slowfast/models/build.py
function build_model (line 18) | def build_model(cfg, gpu_id=None):
FILE: slowfast/models/common.py
class Mlp (line 7) | class Mlp(nn.Module):
method __init__ (line 8) | def __init__(
method forward (line 26) | def forward(self, x):
class Permute (line 37) | class Permute(nn.Module):
method __init__ (line 38) | def __init__(self, dims):
method forward (line 42) | def forward(self, x):
function drop_path (line 46) | def drop_path(x, drop_prob: float = 0.0, training: bool = False):
class DropPath (line 62) | class DropPath(nn.Module):
method __init__ (line 65) | def __init__(self, drop_prob=None):
method forward (line 69) | def forward(self, x):
FILE: slowfast/models/contrastive.py
class ContrastiveModel (line 34) | class ContrastiveModel(nn.Module):
method __init__ (line 39) | def __init__(self, cfg):
method knn_mem_update (line 134) | def knn_mem_update(self, q_knn, index):
method init_knn_labels (line 145) | def init_knn_labels(self, train_loader):
method _update_history (line 161) | def _update_history(self):
method _batch_shuffle (line 177) | def _batch_shuffle(self, x):
method _batch_unshuffle (line 219) | def _batch_unshuffle(self, x, idx_restore):
method eval_knn (line 235) | def eval_knn(self, q_knn, knn_k=200):
method sim_loss (line 245) | def sim_loss(self, q, k):
method momentum_anneal_cosine (line 254) | def momentum_anneal_cosine(self, epoch_exact):
method _dequeue_and_enqueue (line 266) | def _dequeue_and_enqueue(self, keys, extra_keys=None):
method batch_clips (line 297) | def batch_clips(self, clips):
method compute_key_feat (line 311) | def compute_key_feat(
method forward (line 375) | def forward(
method _simclr_precompute_pos_neg_mask_multi (line 808) | def _simclr_precompute_pos_neg_mask_multi(self):
method run_swav_encoder_q (line 850) | def run_swav_encoder_q(self, im):
method get_code (line 858) | def get_code(self, out):
method run_swav_orig_encoder_q (line 867) | def run_swav_orig_encoder_q(self, x):
method sinkhorn (line 875) | def sinkhorn(self, Q, iters):
method distributed_sinkhorn (line 891) | def distributed_sinkhorn(self, Q, nmb_iters):
method KLDivLoss (line 914) | def KLDivLoss(self, out, code):
function l2_loss (line 921) | def l2_loss(x, y):
class Normalize (line 925) | class Normalize(nn.Module):
method __init__ (line 926) | def __init__(self, power=2, dim=1):
method forward (line 931) | def forward(self, x):
class Memory (line 939) | class Memory(nn.Module):
method __init__ (line 940) | def __init__(self, length, duration, dim, cfg):
method resize (line 955) | def resize(self, length, duration, dim):
method get (line 968) | def get(self, ind, time, interp=False):
method update (line 993) | def update(self, mem, momentum, ind, time, interp=False):
method forward (line 1040) | def forward(self, inputs):
class Memory1D (line 1044) | class Memory1D(nn.Module):
method __init__ (line 1045) | def __init__(self, length, duration, dim, cfg):
method get (line 1059) | def get(self, ind, time, interp=False):
method update (line 1071) | def update(self, mem, momentum, ind, time, interp=False):
function cancel_swav_gradients (line 1085) | def cancel_swav_gradients(model, cfg, epoch_exact):
FILE: slowfast/models/head_helper.py
class ResNetRoIHead (line 16) | class ResNetRoIHead(nn.Module):
method __init__ (line 21) | def __init__(
method forward (line 114) | def forward(self, inputs, bboxes):
class MLPHead (line 146) | class MLPHead(nn.Module):
method __init__ (line 147) | def __init__(
method forward (line 188) | def forward(self, x):
class ResNetBasicHead (line 197) | class ResNetBasicHead(nn.Module):
method __init__ (line 206) | def __init__(
method forward (line 309) | def forward(self, inputs):
class X3DHead (line 357) | class X3DHead(nn.Module):
method __init__ (line 366) | def __init__(
method _construct_head (line 416) | def _construct_head(self, dim_in, dim_inner, dim_out, norm_module):
method forward (line 467) | def forward(self, inputs):
class TransformerBasicHead (line 497) | class TransformerBasicHead(nn.Module):
method __init__ (line 502) | def __init__(
method forward (line 556) | def forward(self, x):
FILE: slowfast/models/losses.py
class ContrastiveLoss (line 16) | class ContrastiveLoss(nn.Module):
method __init__ (line 17) | def __init__(self, reduction="mean"):
method forward (line 21) | def forward(self, inputs, dummy_labels=None):
function get_loss_func (line 38) | def get_loss_func(loss_name):
FILE: slowfast/models/morphmlp.py
function conv_3xnxn (line 12) | def conv_3xnxn(inp, oup, kernel_size=3, stride=3):
function conv_1xnxn (line 16) | def conv_1xnxn(inp, oup, kernel_size=3, stride=3):
class Mlp (line 19) | class Mlp(nn.Module):
method __init__ (line 20) | def __init__(self, in_features, hidden_features=None, out_features=Non...
method forward (line 29) | def forward(self, x):
class MorphFC_S2 (line 38) | class MorphFC_S2(nn.Module):
method __init__ (line 39) | def __init__(self, dim, segment_dim=8, qkv_bias=False, qk_scale=None, ...
method forward (line 51) | def forward(self, x):
class MorphFC_S (line 76) | class MorphFC_S(nn.Module):
method __init__ (line 77) | def __init__(self, dim, segment_dim=8, qkv_bias=False, qk_scale=None, ...
method forward (line 91) | def forward(self, x):
class MorphFC_T (line 117) | class MorphFC_T(nn.Module):
method __init__ (line 118) | def __init__(self, dim, segment_dim=8, qkv_bias=False, qk_scale=None, ...
method forward (line 129) | def forward(self, x):
class PermutatorBlock (line 146) | class PermutatorBlock(nn.Module):
method __init__ (line 147) | def __init__(self, dim, segment_dim, mlp_ratio=4., qkv_bias=False, qk_...
method forward (line 164) | def forward(self, x):
class PatchEmbed (line 171) | class PatchEmbed(nn.Module):
method __init__ (line 175) | def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=...
method forward (line 183) | def forward(self, x):
class Downsample (line 191) | class Downsample(nn.Module):
method __init__ (line 195) | def __init__(self, in_embed_dim, out_embed_dim, patch_size):
method forward (line 200) | def forward(self, x):
class MorphMLP (line 207) | class MorphMLP(nn.Module):
method __init__ (line 211) | def __init__(self, cfg):
method _init_weights (line 311) | def _init_weights(self, m):
method get_pretrained_model (line 320) | def get_pretrained_model(self, cfg):
method forward_features (line 330) | def forward_features(self, x):
method forward (line 350) | def forward(self, x):
FILE: slowfast/models/morphmlp_32.py
function conv_3xnxn (line 10) | def conv_3xnxn(inp, oup, kernel_size=3, stride=3):
function conv_1xnxn (line 14) | def conv_1xnxn(inp, oup, kernel_size=3, stride=3):
class Mlp (line 17) | class Mlp(nn.Module):
method __init__ (line 18) | def __init__(self, in_features, hidden_features=None, out_features=Non...
method forward (line 27) | def forward(self, x):
class MorphFC_S2 (line 36) | class MorphFC_S2(nn.Module):
method __init__ (line 37) | def __init__(self, dim, segment_dim=8, qkv_bias=False, qk_scale=None, ...
method forward (line 49) | def forward(self, x):
class MorphFC_S (line 74) | class MorphFC_S(nn.Module):
method __init__ (line 75) | def __init__(self, dim, segment_dim=8, qkv_bias=False, qk_scale=None, ...
method forward (line 89) | def forward(self, x):
class MorphFC_T (line 115) | class MorphFC_T(nn.Module):
method __init__ (line 116) | def __init__(self, dim, segment_dim=8, qkv_bias=False, qk_scale=None, ...
method forward (line 130) | def forward(self, x):
class PermutatorBlock (line 147) | class PermutatorBlock(nn.Module):
method __init__ (line 148) | def __init__(self, dim, segment_dim, mlp_ratio=4., qkv_bias=False, qk_...
method forward (line 165) | def forward(self, x):
class PatchEmbed (line 172) | class PatchEmbed(nn.Module):
method __init__ (line 176) | def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=...
method forward (line 184) | def forward(self, x):
class Downsample (line 192) | class Downsample(nn.Module):
method __init__ (line 196) | def __init__(self, in_embed_dim, out_embed_dim, patch_size):
method forward (line 201) | def forward(self, x):
class MorphMLP_32 (line 208) | class MorphMLP_32(nn.Module):
method __init__ (line 212) | def __init__(self, cfg):
method _init_weights (line 309) | def _init_weights(self, m):
method get_pretrained_model (line 318) | def get_pretrained_model(self, cfg):
method forward_features (line 328) | def forward_features(self, x):
method forward (line 348) | def forward(self, x):
FILE: slowfast/models/nonlocal_helper.py
class Nonlocal (line 10) | class Nonlocal(nn.Module):
method __init__ (line 20) | def __init__(
method _construct_nonlocal (line 67) | def _construct_nonlocal(
method forward (line 105) | def forward(self, x):
FILE: slowfast/models/operators.py
class SE (line 11) | class SE(nn.Module):
method _round_width (line 14) | def _round_width(self, width, multiplier, min_width=8, divisor=8):
method __init__ (line 35) | def __init__(self, dim_in, ratio, relu_act=True):
method forward (line 53) | def forward(self, x):
FILE: slowfast/models/optimizer.py
function construct_optimizer (line 11) | def construct_optimizer(model, cfg):
function get_epoch_lr (line 101) | def get_epoch_lr(cur_epoch, cfg):
function set_lr (line 112) | def set_lr(optimizer, new_lr):
FILE: slowfast/models/ptv_model_builder.py
function get_head_act (line 37) | def get_head_act(act_func):
class PTVResNet (line 59) | class PTVResNet(nn.Module):
method __init__ (line 64) | def __init__(self, cfg):
method _construct_network (line 90) | def _construct_network(self, cfg):
method forward (line 209) | def forward(self, x, bboxes=None):
class PTVSlowFast (line 225) | class PTVSlowFast(nn.Module):
method __init__ (line 226) | def __init__(self, cfg):
method _construct_network (line 247) | def _construct_network(self, cfg):
method forward (line 410) | def forward(self, x, bboxes=None):
class PTVX3D (line 425) | class PTVX3D(nn.Module):
method __init__ (line 426) | def __init__(self, cfg):
method _construct_network (line 449) | def _construct_network(self, cfg):
method forward (line 505) | def forward(self, x, bboxes=None):
class PTVCSN (line 518) | class PTVCSN(nn.Module):
method __init__ (line 523) | def __init__(self, cfg):
method _construct_network (line 540) | def _construct_network(self, cfg):
method forward (line 590) | def forward(self, x, bboxes=None):
class PTVR2plus1D (line 603) | class PTVR2plus1D(nn.Module):
method __init__ (line 608) | def __init__(self, cfg):
method _construct_network (line 625) | def _construct_network(self, cfg):
method forward (line 691) | def forward(self, x, bboxes=None):
class PTVMViT (line 704) | class PTVMViT(nn.Module):
method __init__ (line 709) | def __init__(self, cfg):
method _construct_network (line 726) | def _construct_network(self, cfg):
method forward (line 770) | def forward(self, x, bboxes=None):
FILE: slowfast/models/resnet_helper.py
function get_trans_func (line 14) | def get_trans_func(name):
class BasicTransform (line 29) | class BasicTransform(nn.Module):
method __init__ (line 34) | def __init__(
method _construct (line 77) | def _construct(self, dim_in, dim_out, stride, dilation, norm_module):
method forward (line 110) | def forward(self, x):
class X3DTransform (line 120) | class X3DTransform(nn.Module):
method __init__ (line 127) | def __init__(
method _construct (line 190) | def _construct(
method forward (line 255) | def forward(self, x):
class BottleneckTransform (line 261) | class BottleneckTransform(nn.Module):
method __init__ (line 267) | def __init__(
method _construct (line 321) | def _construct(
method forward (line 379) | def forward(self, x):
class ResBlock (line 397) | class ResBlock(nn.Module):
method __init__ (line 402) | def __init__(
method _construct (line 470) | def _construct(
method forward (line 514) | def forward(self, x):
class ResStage (line 526) | class ResStage(nn.Module):
method __init__ (line 537) | def __init__(
method _construct (line 652) | def _construct(
method forward (line 701) | def forward(self, inputs):
FILE: slowfast/models/stem_helper.py
function get_stem_func (line 9) | def get_stem_func(name):
class VideoModelStem (line 20) | class VideoModelStem(nn.Module):
method __init__ (line 26) | def __init__(
method _construct_stem (line 98) | def _construct_stem(self, dim_in, dim_out, norm_module, stem_func_name):
method forward (line 115) | def forward(self, x):
class ResNetBasicStem (line 127) | class ResNetBasicStem(nn.Module):
method __init__ (line 134) | def __init__(
method _construct_stem (line 181) | def _construct_stem(self, dim_in, dim_out, norm_module):
method forward (line 198) | def forward(self, x):
class X3DStem (line 206) | class X3DStem(nn.Module):
method __init__ (line 213) | def __init__(
method _construct_stem (line 260) | def _construct_stem(self, dim_in, dim_out, norm_module):
method forward (line 284) | def forward(self, x):
class PatchEmbed (line 292) | class PatchEmbed(nn.Module):
method __init__ (line 297) | def __init__(
method forward (line 319) | def forward(self, x):
FILE: slowfast/models/utils.py
function round_width (line 8) | def round_width(width, multiplier, min_width=1, divisor=1, verbose=False):
function validate_checkpoint_wrapper_import (line 24) | def validate_checkpoint_wrapper_import(checkpoint_wrapper):
FILE: slowfast/models/video_model_builder.py
class FuseFastToSlow (line 105) | class FuseFastToSlow(nn.Module):
method __init__ (line 112) | def __init__(
method forward (line 155) | def forward(self, x):
class SlowFast (line 166) | class SlowFast(nn.Module):
method __init__ (line 175) | def __init__(self, cfg):
method _construct_network (line 196) | def _construct_network(self, cfg):
method forward (line 418) | def forward(self, x, bboxes=None):
class ResNet (line 440) | class ResNet(nn.Module):
method __init__ (line 454) | def __init__(self, cfg):
method _construct_network (line 475) | def _construct_network(self, cfg):
method forward (line 638) | def forward(self, x, bboxes=None):
class X3D (line 657) | class X3D(nn.Module):
method __init__ (line 666) | def __init__(self, cfg):
method _round_repeats (line 704) | def _round_repeats(self, repeats, multiplier):
method _construct_network (line 711) | def _construct_network(self, cfg):
method forward (line 796) | def forward(self, x, bboxes=None):
class MViT (line 803) | class MViT(nn.Module):
method __init__ (line 815) | def __init__(self, cfg):
method _init_weights (line 1027) | def _init_weights(self, m):
method no_weight_decay (line 1037) | def no_weight_decay(self):
method _get_pos_embed (line 1060) | def _get_pos_embed(self, pos_embed, bcthw):
method forward (line 1084) | def forward(self, x):
FILE: slowfast/utils/ava_eval_helper.py
function make_image_key (line 49) | def make_image_key(video_id, timestamp):
function read_csv (line 54) | def read_csv(csv_file, class_whitelist=None, load_score=False):
function read_exclusions (line 91) | def read_exclusions(exclusions_file):
function read_labelmap (line 109) | def read_labelmap(labelmap_file):
function evaluate_ava_from_files (line 127) | def evaluate_ava_from_files(labelmap, groundtruth, detections, exclusions):
function evaluate_ava (line 137) | def evaluate_ava(
function run_evaluation (line 174) | def run_evaluation(
function get_ava_eval_data (line 251) | def get_ava_eval_data(
function write_results (line 290) | def write_results(detections, filename):
FILE: slowfast/utils/ava_evaluation/label_map_util.py
function _validate_label_map (line 29) | def _validate_label_map(label_map):
function create_category_index (line 43) | def create_category_index(categories):
function get_max_label_map_index (line 62) | def get_max_label_map_index(label_map):
function convert_label_map_to_categories (line 74) | def convert_label_map_to_categories(
function load_labelmap (line 129) | def load_labelmap(path):
function get_label_map_dict (line 148) | def get_label_map_dict(label_map_path, use_display_name=False):
function create_category_index_from_labelmap (line 168) | def create_category_index_from_labelmap(label_map_path):
function create_class_agnostic_category_index (line 185) | def create_class_agnostic_category_index():
FILE: slowfast/utils/ava_evaluation/metrics.py
function compute_precision_recall (line 21) | def compute_precision_recall(scores, labels, num_gt):
function compute_average_precision (line 74) | def compute_average_precision(precision, recall):
function compute_cor_loc (line 128) | def compute_cor_loc(
FILE: slowfast/utils/ava_evaluation/np_box_list.py
class BoxList (line 27) | class BoxList(object):
method __init__ (line 39) | def __init__(self, data):
method num_boxes (line 64) | def num_boxes(self):
method get_extra_fields (line 68) | def get_extra_fields(self):
method has_field (line 72) | def has_field(self, field):
method add_field (line 75) | def add_field(self, field, field_data):
method get (line 92) | def get(self):
method get_field (line 100) | def get_field(self, field):
method get_coordinates (line 116) | def get_coordinates(self):
method _is_valid_boxes (line 129) | def _is_valid_boxes(self, data):
FILE: slowfast/utils/ava_evaluation/np_box_list_ops.py
class SortOrder (line 33) | class SortOrder(object):
function area (line 45) | def area(boxlist):
function intersection (line 58) | def intersection(boxlist1, boxlist2):
function iou (line 71) | def iou(boxlist1, boxlist2):
function ioa (line 84) | def ioa(boxlist1, boxlist2):
function gather (line 101) | def gather(boxlist, indices, fields=None):
function sort_by_field (line 136) | def sort_by_field(boxlist, field, order=SortOrder.DESCEND):
function non_max_suppression (line 167) | def non_max_suppression(
function multi_class_non_max_suppression (line 242) | def multi_class_non_max_suppression(
function scale (line 321) | def scale(boxlist, y_scale, x_scale):
function clip_to_window (line 349) | def clip_to_window(boxlist, window):
function prune_non_overlapping_boxes (line 385) | def prune_non_overlapping_boxes(boxlist1, boxlist2, minoverlap=0.0):
function prune_outside_window (line 410) | def prune_outside_window(boxlist, window):
function concatenate (line 448) | def concatenate(boxlists, fields=None):
function filter_scores_greater_than (line 503) | def filter_scores_greater_than(boxlist, thresh):
function change_coordinate_frame (line 539) | def change_coordinate_frame(boxlist, window):
function _copy_extra_fields (line 572) | def _copy_extra_fields(boxlist_to_copy_to, boxlist_to_copy_from):
function _update_valid_indices_by_removing_high_iou_boxes (line 589) | def _update_valid_indices_by_removing_high_iou_boxes(
FILE: slowfast/utils/ava_evaluation/np_box_mask_list.py
class BoxMaskList (line 29) | class BoxMaskList(np_box_list.BoxList):
method __init__ (line 37) | def __init__(self, box_data, mask_data):
method get_masks (line 67) | def get_masks(self):
FILE: slowfast/utils/ava_evaluation/np_box_mask_list_ops.py
function box_list_to_box_mask_list (line 33) | def box_list_to_box_mask_list(boxlist):
function area (line 57) | def area(box_mask_list):
function intersection (line 69) | def intersection(box_mask_list1, box_mask_list2):
function iou (line 84) | def iou(box_mask_list1, box_mask_list2):
function ioa (line 99) | def ioa(box_mask_list1, box_mask_list2):
function gather (line 118) | def gather(box_mask_list, indices, fields=None):
function sort_by_field (line 151) | def sort_by_field(
function non_max_suppression (line 174) | def non_max_suppression(
function multi_class_non_max_suppression (line 255) | def multi_class_non_max_suppression(
function prune_non_overlapping_masks (line 337) | def prune_non_overlapping_masks(box_mask_list1, box_mask_list2, minoverl...
function concatenate (line 365) | def concatenate(box_mask_lists, fields=None):
function filter_scores_greater_than (line 395) | def filter_scores_greater_than(box_mask_list, thresh):
FILE: slowfast/utils/ava_evaluation/np_box_ops.py
function area (line 31) | def area(boxes):
function intersection (line 43) | def intersection(boxes1, boxes2):
function iou (line 71) | def iou(boxes1, boxes2):
function ioa (line 92) | def ioa(boxes1, boxes2):
FILE: slowfast/utils/ava_evaluation/np_mask_ops.py
function area (line 33) | def area(masks):
function intersection (line 51) | def intersection(masks1, masks2):
function iou (line 79) | def iou(masks1, masks2):
function ioa (line 107) | def ioa(masks1, masks2):
FILE: slowfast/utils/ava_evaluation/object_detection_evaluation.py
class DetectionEvaluator (line 44) | class DetectionEvaluator(object):
method __init__ (line 64) | def __init__(self, categories):
method add_single_ground_truth_image_info (line 75) | def add_single_ground_truth_image_info(self, image_id, groundtruth_dict):
method add_single_detected_image_info (line 86) | def add_single_detected_image_info(self, image_id, detections_dict):
method evaluate (line 97) | def evaluate(self):
method clear (line 102) | def clear(self):
class ObjectDetectionEvaluator (line 107) | class ObjectDetectionEvaluator(DetectionEvaluator):
method __init__ (line 110) | def __init__(
method add_single_ground_truth_image_info (line 158) | def add_single_ground_truth_image_info(self, image_id, groundtruth_dict):
method add_single_detected_image_info (line 236) | def add_single_detected_image_info(self, image_id, detections_dict):
method evaluate (line 287) | def evaluate(self):
method clear (line 346) | def clear(self):
class PascalDetectionEvaluator (line 357) | class PascalDetectionEvaluator(ObjectDetectionEvaluator):
method __init__ (line 360) | def __init__(self, categories, matching_iou_threshold=0.5):
class WeightedPascalDetectionEvaluator (line 370) | class WeightedPascalDetectionEvaluator(ObjectDetectionEvaluator):
method __init__ (line 384) | def __init__(self, categories, matching_iou_threshold=0.5):
class PascalInstanceSegmentationEvaluator (line 394) | class PascalInstanceSegmentationEvaluator(ObjectDetectionEvaluator):
method __init__ (line 397) | def __init__(self, categories, matching_iou_threshold=0.5):
class WeightedPascalInstanceSegmentationEvaluator (line 408) | class WeightedPascalInstanceSegmentationEvaluator(ObjectDetectionEvaluat...
method __init__ (line 422) | def __init__(self, categories, matching_iou_threshold=0.5):
class OpenImagesDetectionEvaluator (line 433) | class OpenImagesDetectionEvaluator(ObjectDetectionEvaluator):
method __init__ (line 440) | def __init__(
method add_single_ground_truth_image_info (line 460) | def add_single_ground_truth_image_info(self, image_id, groundtruth_dict):
class ObjectDetectionEvaluation (line 534) | class ObjectDetectionEvaluation(object):
method __init__ (line 537) | def __init__(
method _initialize_detections (line 569) | def _initialize_detections(self):
method clear_detections (line 580) | def clear_detections(self):
method add_single_ground_truth_image_info (line 583) | def add_single_ground_truth_image_info(
method add_single_detected_image_info (line 640) | def add_single_detected_image_info(
method _update_ground_truth_statistics (line 727) | def _update_ground_truth_statistics(
method evaluate (line 760) | def evaluate(self):
FILE: slowfast/utils/ava_evaluation/per_image_evaluation.py
class PerImageEvaluation (line 39) | class PerImageEvaluation(object):
method __init__ (line 42) | def __init__(self, num_groundtruth_classes, matching_iou_threshold=0.5):
method compute_object_detection_metrics (line 53) | def compute_object_detection_metrics(
method _compute_tp_fp (line 128) | def _compute_tp_fp(
method _get_overlaps_and_scores_box_mode (line 223) | def _get_overlaps_and_scores_box_mode(
method _compute_tp_fp_for_single_class (line 261) | def _compute_tp_fp_for_single_class(
method _get_ith_class_arrays (line 354) | def _get_ith_class_arrays(
method _remove_invalid_boxes (line 411) | def _remove_invalid_boxes(
FILE: slowfast/utils/ava_evaluation/standard_fields.py
class InputDataFields (line 35) | class InputDataFields(object):
class DetectionResultFields (line 100) | class DetectionResultFields(object):
class BoxListFields (line 126) | class BoxListFields(object):
class TfExampleFields (line 152) | class TfExampleFields(object):
FILE: slowfast/utils/benchmark.py
function benchmark_data_loading (line 20) | def benchmark_data_loading(cfg):
FILE: slowfast/utils/bn_helper.py
function compute_and_update_bn_stats (line 11) | def compute_and_update_bn_stats(model, data_loader, num_batches=200):
FILE: slowfast/utils/c2_model_loading.py
function get_name_convert_func (line 9) | def get_name_convert_func():
FILE: slowfast/utils/checkpoint.py
function make_checkpoint_dir (line 21) | def make_checkpoint_dir(path_to_job):
function get_checkpoint_dir (line 37) | def get_checkpoint_dir(path_to_job):
function get_path_to_checkpoint (line 46) | def get_path_to_checkpoint(path_to_job, epoch):
function get_last_checkpoint (line 57) | def get_last_checkpoint(path_to_job):
function has_checkpoint (line 73) | def has_checkpoint(path_to_job):
function is_checkpoint_epoch (line 84) | def is_checkpoint_epoch(cfg, cur_epoch, multigrid_schedule=None):
function save_checkpoint (line 107) | def save_checkpoint(path_to_job, model, optimizer, loss_scaler, epoch, c...
function inflate_weight (line 141) | def inflate_weight(state_dict_2d, state_dict_3d):
function load_checkpoint (line 180) | def load_checkpoint(
function sub_to_normal_bn (line 353) | def sub_to_normal_bn(sd):
function c2_normal_to_sub_bn (line 394) | def c2_normal_to_sub_bn(key, model_keys):
function normal_to_sub_bn (line 414) | def normal_to_sub_bn(checkpoint_sd, model_sd):
function load_test_checkpoint (line 455) | def load_test_checkpoint(cfg, model):
function load_train_checkpoint (line 495) | def load_train_checkpoint(cfg, model, optimizer, loss_scaler):
FILE: slowfast/utils/checkpoint_amp.py
function make_checkpoint_dir (line 21) | def make_checkpoint_dir(path_to_job):
function get_checkpoint_dir (line 37) | def get_checkpoint_dir(path_to_job):
function get_path_to_checkpoint (line 46) | def get_path_to_checkpoint(path_to_job, epoch):
function get_last_checkpoint (line 57) | def get_last_checkpoint(path_to_job):
function has_checkpoint (line 73) | def has_checkpoint(path_to_job):
function is_checkpoint_epoch (line 84) | def is_checkpoint_epoch(cfg, cur_epoch, multigrid_schedule=None):
function save_checkpoint (line 107) | def save_checkpoint(path_to_job, model, optimizer, loss_scaler, epoch, c...
function inflate_weight (line 141) | def inflate_weight(state_dict_2d, state_dict_3d):
function load_checkpoint (line 180) | def load_checkpoint(
function sub_to_normal_bn (line 353) | def sub_to_normal_bn(sd):
function c2_normal_to_sub_bn (line 394) | def c2_normal_to_sub_bn(key, model_keys):
function normal_to_sub_bn (line 414) | def normal_to_sub_bn(checkpoint_sd, model_sd):
function load_test_checkpoint (line 455) | def load_test_checkpoint(cfg, model):
function load_train_checkpoint (line 495) | def load_train_checkpoint(cfg, model, optimizer, loss_scaler):
FILE: slowfast/utils/distributed.py
function all_gather (line 22) | def all_gather(tensors):
function all_reduce (line 44) | def all_reduce(tensors, average=True):
function init_process_group (line 63) | def init_process_group(
function is_master_proc (line 101) | def is_master_proc(num_gpus=8):
function is_root_proc (line 111) | def is_root_proc():
function get_rank (line 121) | def get_rank():
function synchronize (line 132) | def synchronize():
function _get_global_gloo_group (line 148) | def _get_global_gloo_group():
function _serialize_to_tensor (line 161) | def _serialize_to_tensor(data, group):
function _pad_to_largest_tensor (line 189) | def _pad_to_largest_tensor(tensor, group):
function all_gather_unaligned (line 225) | def all_gather_unaligned(data, group=None):
class GatherLayer (line 264) | class GatherLayer(torch.autograd.Function):
method forward (line 268) | def forward(ctx, input):
method backward (line 275) | def backward(ctx, *grads):
class AllGatherWithGradient (line 282) | class AllGatherWithGradient(torch.autograd.Function):
method forward (line 286) | def forward(ctx, input):
method backward (line 294) | def backward(ctx, grad_output):
FILE: slowfast/utils/env.py
function setup_environment (line 13) | def setup_environment():
FILE: slowfast/utils/logging.py
function _suppress_print (line 19) | def _suppress_print():
function _cached_log_stream (line 31) | def _cached_log_stream(filename):
function setup_logging (line 40) | def setup_logging(output_dir=None):
function get_logger (line 77) | def get_logger(name):
function log_json_stats (line 87) | def log_json_stats(stats, output_dir=None):
FILE: slowfast/utils/lr_policy.py
function get_lr_at_epoch (line 9) | def get_lr_at_epoch(cfg, cur_epoch):
function lr_func_cosine (line 30) | def lr_func_cosine(cfg, cur_epoch):
function lr_func_steps_with_relative_lrs (line 56) | def lr_func_steps_with_relative_lrs(cfg, cur_epoch):
function get_step_index (line 69) | def get_step_index(cfg, cur_epoch):
function get_lr_func (line 84) | def get_lr_func(lr_policy):
FILE: slowfast/utils/meters.py
function get_ava_mini_groundtruth (line 28) | def get_ava_mini_groundtruth(full_groundtruth):
class AVAMeter (line 46) | class AVAMeter(object):
method __init__ (line 51) | def __init__(self, overall_iters, cfg, mode):
method log_iter_stats (line 86) | def log_iter_stats(self, cur_epoch, cur_iter):
method iter_tic (line 138) | def iter_tic(self):
method iter_toc (line 145) | def iter_toc(self):
method data_toc (line 152) | def data_toc(self):
method reset (line 156) | def reset(self):
method update_stats (line 166) | def update_stats(self, preds, ori_boxes, metadata, loss=None, lr=None):
method finalize_metrics (line 185) | def finalize_metrics(self, log=True):
method log_epoch_stats (line 212) | def log_epoch_stats(self, cur_epoch):
class TestMeter (line 231) | class TestMeter(object):
method __init__ (line 239) | def __init__(
method reset (line 287) | def reset(self):
method update_stats (line 297) | def update_stats(self, preds, labels, clip_ids):
method log_iter_stats (line 332) | def log_iter_stats(self, cur_iter):
method iter_tic (line 348) | def iter_tic(self):
method iter_toc (line 355) | def iter_toc(self):
method data_toc (line 362) | def data_toc(self):
method finalize_metrics (line 366) | def finalize_metrics(self, ks=(1, 5)):
class ScalarMeter (line 407) | class ScalarMeter(object):
method __init__ (line 414) | def __init__(self, window_size):
method reset (line 423) | def reset(self):
method add_value (line 431) | def add_value(self, value):
method get_win_median (line 439) | def get_win_median(self):
method get_win_avg (line 445) | def get_win_avg(self):
method get_global_avg (line 451) | def get_global_avg(self):
class TrainMeter (line 458) | class TrainMeter(object):
method __init__ (line 463) | def __init__(self, epoch_iters, cfg):
method reset (line 487) | def reset(self):
method iter_tic (line 500) | def iter_tic(self):
method iter_toc (line 507) | def iter_toc(self):
method data_toc (line 514) | def data_toc(self):
method update_stats (line 518) | def update_stats(self, top1_err, top5_err, loss, lr, mb_size):
method log_iter_stats (line 541) | def log_iter_stats(self, cur_epoch, cur_iter):
method log_epoch_stats (line 571) | def log_epoch_stats(self, cur_epoch):
class ValMeter (line 602) | class ValMeter(object):
method __init__ (line 607) | def __init__(self, max_iter, cfg):
method reset (line 632) | def reset(self):
method iter_tic (line 645) | def iter_tic(self):
method iter_toc (line 652) | def iter_toc(self):
method data_toc (line 659) | def data_toc(self):
method update_stats (line 663) | def update_stats(self, top1_err, top5_err, mb_size):
method update_predictions (line 677) | def update_predictions(self, preds, labels):
method log_iter_stats (line 688) | def log_iter_stats(self, cur_epoch, cur_iter):
method log_epoch_stats (line 712) | def log_epoch_stats(self, cur_epoch):
function get_map (line 744) | def get_map(preds, labels):
class EpochTimer (line 771) | class EpochTimer:
method __init__ (line 776) | def __init__(self) -> None:
method reset (line 781) | def reset(self) -> None:
method epoch_tic (line 788) | def epoch_tic(self):
method epoch_toc (line 794) | def epoch_toc(self):
method last_epoch_time (line 801) | def last_epoch_time(self):
method avg_epoch_time (line 809) | def avg_epoch_time(self):
method median_epoch_time (line 817) | def median_epoch_time(self):
FILE: slowfast/utils/metrics.py
function topks_correct (line 9) | def topks_correct(preds, labels, ks):
function topk_errors (line 43) | def topk_errors(preds, labels, ks):
function topk_accuracies (line 55) | def topk_accuracies(preds, labels, ks):
FILE: slowfast/utils/misc.py
function check_nan_losses (line 26) | def check_nan_losses(loss):
function params_count (line 36) | def params_count(model, ignore_bn=False):
function gpu_mem_usage (line 53) | def gpu_mem_usage():
function cpu_mem_usage (line 64) | def cpu_mem_usage():
function _get_model_analysis_input (line 78) | def _get_model_analysis_input(cfg, use_train_input):
function get_model_stats (line 137) | def get_model_stats(model, cfg, mode, use_train_input):
function log_model_info (line 172) | def log_model_info(model, cfg, use_train_input=True):
function is_eval_epoch (line 200) | def is_eval_epoch(cfg, cur_epoch, multigrid_schedule):
function plot_input (line 224) | def plot_input(tensor, bboxes=(), texts=(), path="./tmp_vis.png"):
function frozen_bn_stats (line 254) | def frozen_bn_stats(model):
function aggregate_sub_bn_stats (line 265) | def aggregate_sub_bn_stats(module):
function launch_job (line 283) | def launch_job(cfg, init_method, func, daemon=False):
function get_class_names (line 314) | def get_class_names(path, parent_path=None, subset_path=None):
FILE: slowfast/utils/multigrid.py
class MultigridSchedule (line 13) | class MultigridSchedule(object):
method init_multigrid (line 18) | def init_multigrid(self, cfg):
method update_long_cycle (line 63) | def update_long_cycle(self, cfg, cur_epoch):
method get_long_cycle_schedule (line 123) | def get_long_cycle_schedule(self, cfg):
function print_schedule (line 215) | def print_schedule(schedule):
function get_current_long_cycle_shape (line 224) | def get_current_long_cycle_shape(schedule, epoch):
FILE: slowfast/utils/multiprocessing.py
function run (line 9) | def run(
FILE: slowfast/utils/parser.py
function parse_args (line 13) | def parse_args():
function load_config (line 67) | def load_config(args):
FILE: slowfast/utils/weight_init_helper.py
function init_weights (line 10) | def init_weights(
FILE: slowfast/visualization/async_predictor.py
class AsycnActionPredictor (line 17) | class AsycnActionPredictor:
class _Predictor (line 18) | class _Predictor(mp.Process):
method __init__ (line 19) | def __init__(self, cfg, task_queue, result_queue, gpu_id=None):
method run (line 41) | def run(self):
method __init__ (line 54) | def __init__(self, cfg, result_queue=None):
method put (line 78) | def put(self, task):
method get (line 88) | def get(self):
method __call__ (line 106) | def __call__(self, task):
method shutdown (line 110) | def shutdown(self):
method result_available (line 115) | def result_available(self):
method default_buffer_size (line 122) | def default_buffer_size(self):
class AsyncVis (line 126) | class AsyncVis:
class _VisWorker (line 127) | class _VisWorker(mp.Process):
method __init__ (line 128) | def __init__(self, video_vis, task_queue, result_queue):
method run (line 141) | def run(self):
method __init__ (line 154) | def __init__(self, video_vis, n_workers=None):
method put (line 183) | def put(self, task):
method get (line 193) | def get(self):
method __call__ (line 213) | def __call__(self, task):
method shutdown (line 220) | def shutdown(self):
method result_available (line 225) | def result_available(self):
method default_buffer_size (line 229) | def default_buffer_size(self):
class _StopToken (line 233) | class _StopToken:
class AsyncDemo (line 237) | class AsyncDemo:
method __init__ (line 242) | def __init__(self, cfg, async_vis):
method put (line 254) | def put(self, task):
method get (line 264) | def get(self):
function draw_predictions (line 276) | def draw_predictions(task, video_vis):
FILE: slowfast/visualization/ava_demo_precomputed_boxes.py
class AVAVisualizerWithPrecomputedBox (line 24) | class AVAVisualizerWithPrecomputedBox:
method __init__ (line 30) | def __init__(self, cfg):
method get_output_file (line 77) | def get_output_file(self, path):
method get_input_clip (line 91) | def get_input_clip(self, keyframe_idx):
method get_predictions (line 124) | def get_predictions(self):
method draw_video (line 193) | def draw_video(self):
method __call__ (line 313) | def __call__(self):
method display (line 317) | def display(self, frame):
method _get_keyframe_clip (line 327) | def _get_keyframe_clip(self, keyframe_idx):
method _get_frame_range (line 339) | def _get_frame_range(self, start_idx, num_frames):
function merge_pred_gt_boxes (line 362) | def merge_pred_gt_boxes(pred_dict, gt_dict=None):
function load_boxes_labels (line 390) | def load_boxes_labels(cfg, video_name, fps, img_width, img_height):
FILE: slowfast/visualization/demo_loader.py
class VideoManager (line 17) | class VideoManager:
method __init__ (line 22) | def __init__(self, cfg):
method __iter__ (line 67) | def __iter__(self):
method __next__ (line 70) | def __next__(self):
method get_output_file (line 100) | def get_output_file(self, path, fps=30):
method display (line 115) | def display(self, task):
method clean (line 130) | def clean(self):
method start (line 140) | def start(self):
method join (line 143) | def join(self):
class ThreadVideoManager (line 147) | class ThreadVideoManager:
method __init__ (line 153) | def __init__(self, cfg):
method get_output_file (line 211) | def get_output_file(self, path, fps=30):
method __iter__ (line 226) | def __iter__(self):
method put_fn (line 229) | def put_fn(self):
method __next__ (line 268) | def __next__(self):
method get_fn (line 295) | def get_fn(self):
method display (line 325) | def display(self, task):
method start (line 335) | def start(self):
method join (line 350) | def join(self):
method clean (line 353) | def clean(self):
FILE: slowfast/visualization/gradcam_utils.py
class GradCAM (line 12) | class GradCAM:
method __init__ (line 19) | def __init__(
method _register_single_hook (line 45) | def _register_single_hook(self, layer_name):
method _register_hooks (line 63) | def _register_hooks(self):
method _calculate_localization_map (line 70) | def _calculate_localization_map(self, inputs, labels=None):
method __call__ (line 142) | def __call__(self, inputs, labels=None, alpha=0.5):
FILE: slowfast/visualization/prediction_vis.py
class WrongPredictionVis (line 16) | class WrongPredictionVis:
method __init__ (line 22) | def __init__(self, cfg):
method _pick_wrong_preds (line 46) | def _pick_wrong_preds(self, labels, preds):
method visualize_vid (line 72) | def visualize_vid(self, video_input, labels, preds, batch_idx):
method wrong_class_prediction (line 122) | def wrong_class_prediction(self):
method clean (line 131) | def clean(self):
FILE: slowfast/visualization/predictor.py
class Predictor (line 20) | class Predictor:
method __init__ (line 25) | def __init__(self, cfg, gpu_id=None):
method __call__ (line 49) | def __call__(self, task):
class ActionPredictor (line 119) | class ActionPredictor:
method __init__ (line 124) | def __init__(self, cfg, async_vis=None, gpu_id=None):
method put (line 135) | def put(self, task):
method get (line 146) | def get(self):
class Detectron2Predictor (line 158) | class Detectron2Predictor:
method __init__ (line 164) | def __init__(self, cfg, gpu_id=None):
method __call__ (line 189) | def __call__(self, task):
FILE: slowfast/visualization/tensorboard_vis.py
class TensorboardWriter (line 20) | class TensorboardWriter(object):
method __init__ (line 25) | def __init__(self, cfg):
method add_scalars (line 90) | def add_scalars(self, data_dict, global_step=None):
method plot_eval (line 101) | def plot_eval(self, preds, labels, global_step=None):
method add_video (line 171) | def add_video(self, vid_tensor, tag="Video Input", global_step=None, f...
method plot_weights_and_activations (line 183) | def plot_weights_and_activations(
method flush (line 227) | def flush(self):
method close (line 230) | def close(self):
function add_confusion_matrix (line 235) | def add_confusion_matrix(
function plot_hist (line 280) | def plot_hist(
function add_ndim_array (line 332) | def add_ndim_array(
function add_heatmap (line 408) | def add_heatmap(tensor):
FILE: slowfast/visualization/utils.py
function get_confusion_matrix (line 16) | def get_confusion_matrix(preds, labels, num_classes, normalize="true"):
function plot_confusion_matrix (line 48) | def plot_confusion_matrix(cmtx, num_classes, class_names=None, figsize=N...
function plot_topk_histogram (line 92) | def plot_topk_histogram(tag, array, k=10, class_names=None, figsize=None):
class GetWeightAndActivation (line 158) | class GetWeightAndActivation:
method __init__ (line 163) | def __init__(self, model, layers):
method _get_layer (line 178) | def _get_layer(self, layer_name):
method _register_single_hook (line 191) | def _register_single_hook(self, layer_name):
method _register_hooks (line 204) | def _register_hooks(self):
method get_activations (line 211) | def get_activations(self, input, bboxes=None):
method get_weights (line 236) | def get_weights(self):
function get_indexing (line 255) | def get_indexing(string):
function process_layer_index_data (line 278) | def process_layer_index_data(layer_ls, layer_name_prefix=""):
function process_cv2_inputs (line 304) | def process_cv2_inputs(frames, cfg):
function get_layer (line 325) | def get_layer(model, layer_name):
class TaskInfo (line 343) | class TaskInfo:
method __init__ (line 344) | def __init__(self):
method add_frames (line 355) | def add_frames(self, idx, frames):
method add_bboxes (line 365) | def add_bboxes(self, bboxes):
method add_action_preds (line 371) | def add_action_preds(self, preds):
FILE: slowfast/visualization/video_visualizer.py
function _create_text_labels (line 18) | def _create_text_labels(classes, scores, class_names, ground_truth=False):
class ImgVisualizer (line 45) | class ImgVisualizer(Visualizer):
method __init__ (line 46) | def __init__(self, img_rgb, meta, **kwargs):
method draw_text (line 61) | def draw_text(
method draw_multiple_text (line 109) | def draw_multiple_text(
method draw_multiple_text_upward (line 184) | def draw_multiple_text_upward(
method draw_multiple_text_downward (line 237) | def draw_multiple_text_downward(
method _align_x_coordinate (line 290) | def _align_x_coordinate(self, box_coordinate):
method _align_y_top (line 312) | def _align_y_top(self, box_coordinate, num_text, textbox_width):
method _align_y_bottom (line 330) | def _align_y_bottom(self, box_coordinate, num_text, textbox_width):
class VideoVisualizer (line 349) | class VideoVisualizer:
method __init__ (line 350) | def __init__(
method _get_color (line 396) | def _get_color(self, class_id):
method draw_one_frame (line 404) | def draw_one_frame(
method draw_clip_range (line 514) | def draw_clip_range(
method draw_clip (line 568) | def draw_clip(
method _adjust_frames_type (line 635) | def _adjust_frames_type(self, frames):
method _get_thres_array (line 657) | def _get_thres_array(self, common_class_names=None):
FILE: tools/benchmark.py
function main (line 15) | def main():
FILE: tools/demo_net.py
function run_demo (line 21) | def run_demo(cfg, frame_provider):
function demo (line 96) | def demo(cfg):
FILE: tools/run_net.py
function main (line 13) | def main():
FILE: tools/test_net.py
function perform_test (line 25) | def perform_test(test_loader, model, test_meter, cfg, writer=None):
function test (line 140) | def test(cfg):
FILE: tools/train_net.py
function train_epoch (line 28) | def train_epoch(
function eval_epoch (line 189) | def eval_epoch(val_loader, model, val_meter, loss_scaler, cur_epoch, cfg...
function calculate_and_update_precise_bn (line 310) | def calculate_and_update_precise_bn(loader, model, num_iters=200, use_gp...
function build_trainer (line 334) | def build_trainer(cfg):
function train (line 384) | def train(cfg):
FILE: tools/visualization.py
function run_visualization (line 29) | def run_visualization(vis_loader, model, cfg, writer=None):
function perform_wrong_prediction_vis (line 190) | def perform_wrong_prediction_vis(vis_loader, model, cfg):
function visualize (line 250) | def visualize(cfg):
Copy disabled (too large)
Download .json
Condensed preview — 302 files, each showing path, character count, and a content snippet. Download the .json file for the full structured content (12,943K chars).
[
{
"path": "DATASET.md",
"chars": 1898,
"preview": "# Dataset Preparation\n\nWe provide our labels in `data_list`.\n\n## Kinetics\n\nThe Kinetics Dataset could be downloaded via "
},
{
"path": "INSTALL.md",
"chars": 1716,
"preview": "# Installation\n\n## Requirements\n- Python >= 3.6\n- Numpy\n- PyTorch >= 1.5\n- [fvcore](https://github.com/facebookresearch/"
},
{
"path": "LICENSE",
"chars": 10239,
"preview": "Apache License\nVersion 2.0, January 2004\nhttp://www.apache.org/licenses/\n\nTERMS AND CONDITIONS FOR USE, REPRODUCTION, AN"
},
{
"path": "README.md",
"chars": 7812,
"preview": "# [ECCV2022] MorphMLP \\[[arxiv](https://arxiv.org/abs/2111.12527)\\]\n\nOur MorphMLP paper was accepted to ECCV 2022!!\n\nWe "
},
{
"path": "VISUALIZATION_TOOLS.md",
"chars": 6309,
"preview": "# Visualization Tools for PySlowFast\n\nThis document provides a brief intro for running various visualization tools provi"
},
{
"path": "build/lib/slowfast/__init__.py",
"chars": 166,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\nfrom slowfast.utils.env "
},
{
"path": "build/lib/slowfast/config/__init__.py",
"chars": 95,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n"
},
{
"path": "build/lib/slowfast/config/custom_config.py",
"chars": 217,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\n\"\"\"Add custom configs an"
},
{
"path": "build/lib/slowfast/config/defaults.py",
"chars": 32568,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\n\"\"\"Configs.\"\"\"\nfrom fvco"
},
{
"path": "build/lib/slowfast/datasets/__init__.py",
"chars": 519,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\nfrom .ava_dataset import"
},
{
"path": "build/lib/slowfast/datasets/ava_dataset.py",
"chars": 15320,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\nimport logging\nimport nu"
},
{
"path": "build/lib/slowfast/datasets/ava_helper.py",
"chars": 8177,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\nimport logging\nimport os"
},
{
"path": "build/lib/slowfast/datasets/build.py",
"chars": 1120,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\nfrom fvcore.common.regis"
},
{
"path": "build/lib/slowfast/datasets/charades.py",
"chars": 9901,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\nimport os\nimport random\n"
},
{
"path": "build/lib/slowfast/datasets/cv2_transform.py",
"chars": 26476,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\nimport math\nimport numpy"
},
{
"path": "build/lib/slowfast/datasets/decoder.py",
"chars": 15925,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\nimport math\nimport numpy"
},
{
"path": "build/lib/slowfast/datasets/imagenet.py",
"chars": 7428,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates.\n\nimport json\nimport numpy as np\nimport os\nimport random\nimport re\nimp"
},
{
"path": "build/lib/slowfast/datasets/kinetics.py",
"chars": 15752,
"preview": "#!/usr/bin/env python3\n# modified from https://github.com/facebookresearch/SlowFast\n\nimport os\nimport random\nimport torc"
},
{
"path": "build/lib/slowfast/datasets/loader.py",
"chars": 7282,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\n\"\"\"Data loader.\"\"\"\n\nimpo"
},
{
"path": "build/lib/slowfast/datasets/mixup.py",
"chars": 6659,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\n\"\"\"\nThis implementation is based on\nhttps://git"
},
{
"path": "build/lib/slowfast/datasets/multigrid_helper.py",
"chars": 2753,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\n\"\"\"Helper functions for "
},
{
"path": "build/lib/slowfast/datasets/ptv_datasets.py",
"chars": 19418,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\nimport functools\nimport os\nfrom typing import D"
},
{
"path": "build/lib/slowfast/datasets/rand_augment.py",
"chars": 16199,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\n\"\"\"\nThis implementation is based on\nhttps://git"
},
{
"path": "build/lib/slowfast/datasets/random_erasing.py",
"chars": 6887,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\n\"\"\"\nThis implementation is based on\nhttps://git"
},
{
"path": "build/lib/slowfast/datasets/ssv2.py",
"chars": 10392,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\nimport json\nimport numpy"
},
{
"path": "build/lib/slowfast/datasets/sth.py",
"chars": 14510,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\nimport json\nimport numpy"
},
{
"path": "build/lib/slowfast/datasets/transform.py",
"chars": 34515,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\nimport logging\nimport ma"
},
{
"path": "build/lib/slowfast/datasets/utils.py",
"chars": 11805,
"preview": "#!/usr/bin/env python3\n\nimport logging\nimport numpy as np\nimport os\nimport random\nimport time\nfrom collections import de"
},
{
"path": "build/lib/slowfast/datasets/video_container.py",
"chars": 1261,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\nimport av\nimport decord\n"
},
{
"path": "build/lib/slowfast/models/__init__.py",
"chars": 627,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\nfrom .build import MODEL"
},
{
"path": "build/lib/slowfast/models/attention.py",
"chars": 17513,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\n\nimport numpy\nimport tor"
},
{
"path": "build/lib/slowfast/models/batchnorm_helper.py",
"chars": 3824,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\n\"\"\"BatchNorm (BN) utilit"
},
{
"path": "build/lib/slowfast/models/build.py",
"chars": 1997,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\n\"\"\"Model construction fu"
},
{
"path": "build/lib/slowfast/models/common.py",
"chars": 1900,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates.\n\nimport torch\nimport torch.nn as nn\n\n\nclass Mlp(nn.Module):\n def _"
},
{
"path": "build/lib/slowfast/models/contrastive.py",
"chars": 41007,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\n\"\"\"CSC model.\"\"\"\n\nimport"
},
{
"path": "build/lib/slowfast/models/custom_video_model_builder.py",
"chars": 133,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\n\n\"\"\"A More Flexible Vide"
},
{
"path": "build/lib/slowfast/models/head_helper.py",
"chars": 20679,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\n\"\"\"ResNe(X)t Head helper"
},
{
"path": "build/lib/slowfast/models/losses.py",
"chars": 1245,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\n\"\"\"Loss functions.\"\"\"\n\nf"
},
{
"path": "build/lib/slowfast/models/morphmlp.py",
"chars": 15683,
"preview": "import torch\nimport torch.nn as nn\n\nfrom timm.models.layers import DropPath, trunc_normal_\nfrom .build import MODEL_REGI"
},
{
"path": "build/lib/slowfast/models/morphmlp_32.py",
"chars": 15598,
"preview": "import torch\nimport torch.nn as nn\n\nfrom timm.models.layers import DropPath, trunc_normal_\nfrom .build import MODEL_REGI"
},
{
"path": "build/lib/slowfast/models/nonlocal_helper.py",
"chars": 5416,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\n\"\"\"Non-local helper\"\"\"\n\n"
},
{
"path": "build/lib/slowfast/models/operators.py",
"chars": 1941,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\n\"\"\"Custom operators.\"\"\"\n"
},
{
"path": "build/lib/slowfast/models/optimizer.py",
"chars": 3974,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\n\"\"\"Optimizer.\"\"\"\n\nimport"
},
{
"path": "build/lib/slowfast/models/ptv_model_builder.py",
"chars": 26953,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\n\n\"\"\"Video models using P"
},
{
"path": "build/lib/slowfast/models/resnet_helper.py",
"chars": 24964,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\n\"\"\"Video models.\"\"\"\n\nimp"
},
{
"path": "build/lib/slowfast/models/stem_helper.py",
"chars": 10784,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\n\"\"\"ResNe(X)t 3D stem hel"
},
{
"path": "build/lib/slowfast/models/utils.py",
"chars": 910,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\nimport slowfast.utils.logging as logging\n\nlogge"
},
{
"path": "build/lib/slowfast/models/video_model_builder.py",
"chars": 42144,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\n\n\"\"\"Video models.\"\"\"\n\nimport math\nfrom functool"
},
{
"path": "build/lib/slowfast/site.py",
"chars": 2307,
"preview": "def __boot():\n import sys\n import os\n PYTHONPATH = os.environ.get('PYTHONPATH')\n if PYTHONPATH is None or (s"
},
{
"path": "build/lib/slowfast/utils/__init__.py",
"chars": 95,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n"
},
{
"path": "build/lib/slowfast/utils/ava_eval_helper.py",
"chars": 9926,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n#"
},
{
"path": "build/lib/slowfast/utils/ava_evaluation/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "build/lib/slowfast/utils/ava_evaluation/label_map_util.py",
"chars": 6350,
"preview": "# Copyright 2017 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"L"
},
{
"path": "build/lib/slowfast/utils/ava_evaluation/metrics.py",
"chars": 5710,
"preview": "# Copyright 2017 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"L"
},
{
"path": "build/lib/slowfast/utils/ava_evaluation/np_box_list.py",
"chars": 5028,
"preview": "# Copyright 2017 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"L"
},
{
"path": "build/lib/slowfast/utils/ava_evaluation/np_box_list_ops.py",
"chars": 21645,
"preview": "# Copyright 2017 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"L"
},
{
"path": "build/lib/slowfast/utils/ava_evaluation/np_box_mask_list.py",
"chars": 2705,
"preview": "# Copyright 2017 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"L"
},
{
"path": "build/lib/slowfast/utils/ava_evaluation/np_box_mask_list_ops.py",
"chars": 16307,
"preview": "# Copyright 2017 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"L"
},
{
"path": "build/lib/slowfast/utils/ava_evaluation/np_box_ops.py",
"chars": 3565,
"preview": "# Copyright 2017 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"L"
},
{
"path": "build/lib/slowfast/utils/ava_evaluation/np_mask_ops.py",
"chars": 4525,
"preview": "# Copyright 2017 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"L"
},
{
"path": "build/lib/slowfast/utils/ava_evaluation/object_detection_evaluation.py",
"chars": 34271,
"preview": "# Copyright 2017 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"L"
},
{
"path": "build/lib/slowfast/utils/ava_evaluation/per_image_evaluation.py",
"chars": 19882,
"preview": "# Copyright 2017 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"L"
},
{
"path": "build/lib/slowfast/utils/ava_evaluation/standard_fields.py",
"chars": 9780,
"preview": "# Copyright 2017 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"L"
},
{
"path": "build/lib/slowfast/utils/benchmark.py",
"chars": 3290,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved\n\"\"\"\nFunctions for benchmarks.\n\"\"\"\n\nimport numpy a"
},
{
"path": "build/lib/slowfast/utils/bn_helper.py",
"chars": 2854,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\n\"\"\"bn helper.\"\"\"\n\nimport"
},
{
"path": "build/lib/slowfast/utils/c2_model_loading.py",
"chars": 5005,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\n\"\"\"Caffe2 to PyTorch che"
},
{
"path": "build/lib/slowfast/utils/checkpoint.py",
"chars": 19726,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\n\"\"\"Functions that handle"
},
{
"path": "build/lib/slowfast/utils/checkpoint_amp.py",
"chars": 19715,
"preview": "#!/usr/bin/env python3\n# modified from https://github.com/facebookresearch/SlowFast\n\n\"\"\"Functions that handle saving and"
},
{
"path": "build/lib/slowfast/utils/distributed.py",
"chars": 9032,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\n\"\"\"Distributed helpers.\""
},
{
"path": "build/lib/slowfast/utils/env.py",
"chars": 445,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\n\"\"\"Set up Environment.\"\""
},
{
"path": "build/lib/slowfast/utils/logging.py",
"chars": 3210,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\n\"\"\"Logging.\"\"\"\n\nimport a"
},
{
"path": "build/lib/slowfast/utils/lr_policy.py",
"chars": 3112,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\n\"\"\"Learning rate policy."
},
{
"path": "build/lib/slowfast/utils/meters.py",
"chars": 26479,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\n\"\"\"Meters.\"\"\"\n\nimport da"
},
{
"path": "build/lib/slowfast/utils/metrics.py",
"chars": 2381,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\n\"\"\"Functions for computi"
},
{
"path": "build/lib/slowfast/utils/misc.py",
"chars": 12581,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\nimport json\nimport loggi"
},
{
"path": "build/lib/slowfast/utils/multigrid.py",
"chars": 8752,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\n\"\"\"Helper functions for "
},
{
"path": "build/lib/slowfast/utils/multiprocessing.py",
"chars": 2105,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\n\"\"\"Multiprocessing helpe"
},
{
"path": "build/lib/slowfast/utils/parser.py",
"chars": 3021,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\n\"\"\"Argument parser funct"
},
{
"path": "build/lib/slowfast/utils/weight_init_helper.py",
"chars": 1969,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\n\"\"\"Utility function for "
},
{
"path": "build/lib/slowfast/visualization/__init__.py",
"chars": 95,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n"
},
{
"path": "build/lib/slowfast/visualization/async_predictor.py",
"chars": 9808,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\nimport atexit\nimport num"
},
{
"path": "build/lib/slowfast/visualization/ava_demo_precomputed_boxes.py",
"chars": 17969,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\nimport numpy as np\nimpor"
},
{
"path": "build/lib/slowfast/visualization/demo_loader.py",
"chars": 12402,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\nimport atexit\nimport cop"
},
{
"path": "build/lib/slowfast/visualization/gradcam_utils.py",
"chars": 6958,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\nimport matplotlib.pyplot"
},
{
"path": "build/lib/slowfast/visualization/prediction_vis.py",
"chars": 5296,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\nimport numpy as np\nimpor"
},
{
"path": "build/lib/slowfast/visualization/predictor.py",
"chars": 6853,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\nimport queue\nimport cv2\n"
},
{
"path": "build/lib/slowfast/visualization/tensorboard_vis.py",
"chars": 16010,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\nimport logging as log\nim"
},
{
"path": "build/lib/slowfast/visualization/utils.py",
"chars": 12593,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\nimport itertools\nimport "
},
{
"path": "build/lib/slowfast/visualization/video_visualizer.py",
"chars": 27402,
"preview": "#!/usr/bin/env python3\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\nimport itertools\nimport "
},
{
"path": "configs/AVA/SLOWFAST_32x2_R50_SHORT.yaml",
"chars": 1885,
"preview": "TRAIN:\n ENABLE: True\n DATASET: ava\n BATCH_SIZE: 64\n EVAL_PERIOD: 5\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: True\n # CH"
},
{
"path": "configs/AVA/SLOW_8x8_R50_SHORT.yaml",
"chars": 1615,
"preview": "TRAIN:\n ENABLE: True\n DATASET: ava\n BATCH_SIZE: 64\n EVAL_PERIOD: 5\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: True\n # CH"
},
{
"path": "configs/AVA/c2/SLOWFAST_32x2_R101_50_50.yaml",
"chars": 1570,
"preview": "TRAIN:\n ENABLE: False\n DATASET: ava\n BATCH_SIZE: 16\n EVAL_PERIOD: 1\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: True\n # C"
},
{
"path": "configs/AVA/c2/SLOWFAST_32x2_R101_50_50_v2.1.yaml",
"chars": 1772,
"preview": "TRAIN:\n ENABLE: False\n DATASET: ava\n BATCH_SIZE: 16\n EVAL_PERIOD: 1\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: True\n # C"
},
{
"path": "configs/AVA/c2/SLOWFAST_32x2_R50.yaml",
"chars": 1559,
"preview": "TRAIN:\n ENABLE: False\n DATASET: ava\n BATCH_SIZE: 16\n EVAL_PERIOD: 1\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: True\n # C"
},
{
"path": "configs/AVA/c2/SLOWFAST_64x2_R101_50_50.yaml",
"chars": 1569,
"preview": "TRAIN:\n ENABLE: False\n DATASET: ava\n BATCH_SIZE: 16\n EVAL_PERIOD: 1\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: True\n # C"
},
{
"path": "configs/AVA/c2/SLOW_8x8_R50.yaml",
"chars": 1289,
"preview": "TRAIN:\n ENABLE: False\n DATASET: ava\n BATCH_SIZE: 64\n EVAL_PERIOD: 1\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: True\n # C"
},
{
"path": "configs/Charades/SLOWFAST_16x8_R50.yaml",
"chars": 1700,
"preview": "TRAIN:\n ENABLE: True\n DATASET: charades\n BATCH_SIZE: 16\n EVAL_PERIOD: 6\n CHECKPOINT_PERIOD: 6\n AUTO_RESUME: True\n "
},
{
"path": "configs/Charades/SLOWFAST_16x8_R50_multigrid.yaml",
"chars": 1750,
"preview": "MULTIGRID:\n SHORT_CYCLE: True\n LONG_CYCLE: True\nTRAIN:\n ENABLE: True\n DATASET: charades\n BATCH_SIZE: 16\n EVAL_PERI"
},
{
"path": "configs/Charades/pytorchvideo/SLOWFAST_8x8_R50.yaml",
"chars": 1721,
"preview": "TRAIN:\n ENABLE: True\n DATASET: ptvcharades\n BATCH_SIZE: 64\n EVAL_PERIOD: 6\n CHECKPOINT_PERIOD: 6\n AUTO_RESUME: Tru"
},
{
"path": "configs/Charades/pytorchvideo/SLOW_8x8_R50.yaml",
"chars": 1368,
"preview": "TRAIN:\n ENABLE: True\n DATASET: ptvcharades\n BATCH_SIZE: 64\n EVAL_PERIOD: 6\n CHECKPOINT_PERIOD: 6\n AUTO_RESUME: Tru"
},
{
"path": "configs/ImageNet/MVIT_B_16_CONV.yaml",
"chars": 1659,
"preview": "TRAIN:\n ENABLE: True\n DATASET: imagenet\n BATCH_SIZE: 256\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: True"
},
{
"path": "configs/ImageNet/RES_R50.yaml",
"chars": 1267,
"preview": "TRAIN:\n ENABLE: True\n DATASET: imagenet\n BATCH_SIZE: 256\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: True"
},
{
"path": "configs/K400/K400_MLP_B16x4.yaml",
"chars": 1732,
"preview": "TRAIN:\n ENABLE: True\n DATASET: kinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 5\n CHECKPOINT_PERIOD: 2\n AUTO_RESUME: True\nD"
},
{
"path": "configs/K400/K400_MLP_B32x4.yaml",
"chars": 1741,
"preview": "TRAIN:\n ENABLE: True\n DATASET: kinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 5\n CHECKPOINT_PERIOD: 2\n AUTO_RESUME: True\nD"
},
{
"path": "configs/K400/K400_MLP_S16x4.yaml",
"chars": 1755,
"preview": "TRAIN:\n ENABLE: True\n DATASET: kinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 5\n CHECKPOINT_PERIOD: 2\n AUTO_RESUME: True\nD"
},
{
"path": "configs/K400/K400_MLP_S32x4.yaml",
"chars": 1764,
"preview": "TRAIN:\n ENABLE: True\n DATASET: kinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 5\n CHECKPOINT_PERIOD: 2\n AUTO_RESUME: True\nD"
},
{
"path": "configs/Kinetics/C2D_8x8_R50.yaml",
"chars": 1043,
"preview": "TRAIN:\n ENABLE: True\n DATASET: kinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: True\n"
},
{
"path": "configs/Kinetics/C2D_8x8_R50_IN1K.yaml",
"chars": 1216,
"preview": "TRAIN:\n ENABLE: True\n DATASET: kinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: True\n"
},
{
"path": "configs/Kinetics/C2D_NLN_8x8_R50.yaml",
"chars": 1054,
"preview": "TRAIN:\n ENABLE: True\n DATASET: kinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: True\n"
},
{
"path": "configs/Kinetics/C2D_NLN_8x8_R50_IN1K.yaml",
"chars": 1227,
"preview": "TRAIN:\n ENABLE: True\n DATASET: kinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: True\n"
},
{
"path": "configs/Kinetics/I3D_8x8_R101.yaml",
"chars": 1045,
"preview": "TRAIN:\n ENABLE: True\n DATASET: kinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: True\n"
},
{
"path": "configs/Kinetics/I3D_8x8_R50.yaml",
"chars": 1043,
"preview": "TRAIN:\n ENABLE: True\n DATASET: kinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: True\n"
},
{
"path": "configs/Kinetics/I3D_8x8_R50_IN1K.yaml",
"chars": 1216,
"preview": "TRAIN:\n ENABLE: True\n DATASET: kinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: True\n"
},
{
"path": "configs/Kinetics/I3D_NLN_8x8_R101.yaml",
"chars": 1056,
"preview": "TRAIN:\n ENABLE: True\n DATASET: kinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: True\n"
},
{
"path": "configs/Kinetics/I3D_NLN_8x8_R50.yaml",
"chars": 1054,
"preview": "TRAIN:\n ENABLE: True\n DATASET: kinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: True\n"
},
{
"path": "configs/Kinetics/I3D_NLN_8x8_R50_IN1K.yaml",
"chars": 1227,
"preview": "TRAIN:\n ENABLE: True\n DATASET: kinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: True\n"
},
{
"path": "configs/Kinetics/MVIT_B_16x4_CONV.yaml",
"chars": 1928,
"preview": "TRAIN:\n ENABLE: True\n DATASET: kinetics\n BATCH_SIZE: 16\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 10\n AUTO_RESUME: True"
},
{
"path": "configs/Kinetics/MVIT_B_32x3_CONV.yaml",
"chars": 1828,
"preview": "TRAIN:\n ENABLE: True\n DATASET: kinetics\n BATCH_SIZE: 16\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 10\n AUTO_RESUME: True"
},
{
"path": "configs/Kinetics/MVITv2_B_32x3.yaml",
"chars": 2325,
"preview": "TRAIN:\n ENABLE: True\n DATASET: kinetics\n BATCH_SIZE: 16\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 10\n AUTO_RESUME: True"
},
{
"path": "configs/Kinetics/MVITv2_L_40x3_test.yaml",
"chars": 2403,
"preview": "TRAIN:\n ENABLE: False\nDATA:\n USE_OFFSET_SAMPLING: True\n DECODING_BACKEND: torchvision\n NUM_FRAMES: 40\n SAMPLING_RAT"
},
{
"path": "configs/Kinetics/MVITv2_S_16x4.yaml",
"chars": 2205,
"preview": "TRAIN:\n ENABLE: True\n DATASET: kinetics\n BATCH_SIZE: 16\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 10\n AUTO_RESUME: True"
},
{
"path": "configs/Kinetics/SLOWFAST_4x16_R50.yaml",
"chars": 1292,
"preview": "TRAIN:\n ENABLE: True\n DATASET: kinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: True\n"
},
{
"path": "configs/Kinetics/SLOWFAST_8x8_R50.yaml",
"chars": 1292,
"preview": "TRAIN:\n ENABLE: True\n DATASET: kinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: True\n"
},
{
"path": "configs/Kinetics/SLOWFAST_8x8_R50_stepwise.yaml",
"chars": 1382,
"preview": "TRAIN:\n ENABLE: True\n DATASET: kinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: True\n"
},
{
"path": "configs/Kinetics/SLOWFAST_8x8_R50_stepwise_multigrid.yaml",
"chars": 1432,
"preview": "MULTIGRID:\n SHORT_CYCLE: True\n LONG_CYCLE: True\nTRAIN:\n ENABLE: True\n DATASET: kinetics\n BATCH_SIZE: 64\n EVAL_PERI"
},
{
"path": "configs/Kinetics/SLOWFAST_NLN_4x16_R50.yaml",
"chars": 1303,
"preview": "TRAIN:\n ENABLE: True\n DATASET: kinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: True\n"
},
{
"path": "configs/Kinetics/SLOWFAST_NLN_8x8_R50.yaml",
"chars": 1303,
"preview": "TRAIN:\n ENABLE: True\n DATASET: kinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: True\n"
},
{
"path": "configs/Kinetics/SLOW_4x16_R50.yaml",
"chars": 1049,
"preview": "TRAIN:\n ENABLE: True\n DATASET: kinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: True\n"
},
{
"path": "configs/Kinetics/SLOW_8x8_R50.yaml",
"chars": 1048,
"preview": "TRAIN:\n ENABLE: True\n DATASET: kinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: True\n"
},
{
"path": "configs/Kinetics/SLOW_NLN_4x16_R50.yaml",
"chars": 1060,
"preview": "TRAIN:\n ENABLE: True\n DATASET: kinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: True\n"
},
{
"path": "configs/Kinetics/SLOW_NLN_8x8_R50.yaml",
"chars": 1059,
"preview": "TRAIN:\n ENABLE: True\n DATASET: kinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: True\n"
},
{
"path": "configs/Kinetics/X3D_L.yaml",
"chars": 1277,
"preview": "TRAIN:\n # ENABLE: False # default True\n DATASET: kinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 10\n "
},
{
"path": "configs/Kinetics/X3D_M.yaml",
"chars": 1277,
"preview": "TRAIN:\n # ENABLE: False # default True\n DATASET: kinetics\n BATCH_SIZE: 128\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 10\n"
},
{
"path": "configs/Kinetics/X3D_S.yaml",
"chars": 1278,
"preview": "TRAIN:\n # ENABLE: False # default True\n DATASET: kinetics\n BATCH_SIZE: 128\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 10\n"
},
{
"path": "configs/Kinetics/X3D_XS.yaml",
"chars": 1278,
"preview": "TRAIN:\n # ENABLE: False # default True\n DATASET: kinetics\n BATCH_SIZE: 128\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 10\n"
},
{
"path": "configs/Kinetics/c2/C2D_NOPOOL_8x8_R50.yaml",
"chars": 1129,
"preview": "TRAIN:\n ENABLE: False\n DATASET: kinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: True"
},
{
"path": "configs/Kinetics/c2/I3D_8x8_R50.yaml",
"chars": 1122,
"preview": "TRAIN:\n ENABLE: False\n DATASET: kinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: True"
},
{
"path": "configs/Kinetics/c2/I3D_NLN_8x8_R50.yaml",
"chars": 1133,
"preview": "TRAIN:\n ENABLE: False\n DATASET: kinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: True"
},
{
"path": "configs/Kinetics/c2/SLOWFAST_16x8_R101_50_50.yaml",
"chars": 1372,
"preview": "TRAIN:\n ENABLE: False\n DATASET: kinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: True"
},
{
"path": "configs/Kinetics/c2/SLOWFAST_4x16_R50.yaml",
"chars": 1371,
"preview": "TRAIN:\n ENABLE: False\n DATASET: kinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: True"
},
{
"path": "configs/Kinetics/c2/SLOWFAST_8x8_R101_101_101.yaml",
"chars": 1374,
"preview": "TRAIN:\n ENABLE: False\n DATASET: kinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: True"
},
{
"path": "configs/Kinetics/c2/SLOWFAST_8x8_R101_50_101.yaml",
"chars": 1373,
"preview": "TRAIN:\n ENABLE: False\n DATASET: kinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: True"
},
{
"path": "configs/Kinetics/c2/SLOWFAST_8x8_R101_50_50.yaml",
"chars": 1372,
"preview": "TRAIN:\n ENABLE: False\n DATASET: kinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: True"
},
{
"path": "configs/Kinetics/c2/SLOWFAST_8x8_R50.yaml",
"chars": 1371,
"preview": "TRAIN:\n ENABLE: False\n DATASET: kinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: True"
},
{
"path": "configs/Kinetics/c2/SLOWFAST_NLN_16x8_R101_50_50.yaml",
"chars": 1381,
"preview": "TRAIN:\n ENABLE: False\n DATASET: kinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: True"
},
{
"path": "configs/Kinetics/c2/SLOW_4x16_R50.yaml",
"chars": 1128,
"preview": "TRAIN:\n ENABLE: False\n DATASET: kinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: True"
},
{
"path": "configs/Kinetics/c2/SLOW_8x8_R50.yaml",
"chars": 1127,
"preview": "TRAIN:\n ENABLE: False\n DATASET: kinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: True"
},
{
"path": "configs/Kinetics/pytorchvideo/C2D_8x8_R50.yaml",
"chars": 1149,
"preview": "TRAIN:\n ENABLE: True\n DATASET: ptvkinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: Tr"
},
{
"path": "configs/Kinetics/pytorchvideo/CSN_32x2_R101.yaml",
"chars": 1064,
"preview": "TRAIN:\n ENABLE: True\n DATASET: ptvkinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: Tr"
},
{
"path": "configs/Kinetics/pytorchvideo/I3D_8x8_R50.yaml",
"chars": 1151,
"preview": "TRAIN:\n ENABLE: True\n DATASET: ptvkinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: Tr"
},
{
"path": "configs/Kinetics/pytorchvideo/MVIT_B_16x4_CONV.yaml",
"chars": 1920,
"preview": "TRAIN:\n ENABLE: True\n DATASET: kinetics\n BATCH_SIZE: 16\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 10\n AUTO_RESUME: True"
},
{
"path": "configs/Kinetics/pytorchvideo/R2PLUS1D_16x4_R50.yaml",
"chars": 1078,
"preview": "TRAIN:\n ENABLE: True\n DATASET: ptvkinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: Tr"
},
{
"path": "configs/Kinetics/pytorchvideo/SLOWFAST_16x8_R101_50_50.yaml",
"chars": 1395,
"preview": "TRAIN:\n ENABLE: True\n DATASET: ptvkinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 2\n CHECKPOINT_PERIOD: 10\n AUTO_RESUME: Tr"
},
{
"path": "configs/Kinetics/pytorchvideo/SLOWFAST_4x16_R50.yaml",
"chars": 1396,
"preview": "TRAIN:\n ENABLE: True\n DATASET: ptvkinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: Tr"
},
{
"path": "configs/Kinetics/pytorchvideo/SLOWFAST_8x8_R101.yaml",
"chars": 1399,
"preview": "TRAIN:\n ENABLE: True\n DATASET: ptvkinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 2\n CHECKPOINT_PERIOD: 10\n AUTO_RESUME: Tr"
},
{
"path": "configs/Kinetics/pytorchvideo/SLOWFAST_8x8_R50.yaml",
"chars": 1396,
"preview": "TRAIN:\n ENABLE: True\n DATASET: ptvkinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: Tr"
},
{
"path": "configs/Kinetics/pytorchvideo/SLOW_4x16_R50.yaml",
"chars": 1153,
"preview": "TRAIN:\n ENABLE: True\n DATASET: ptvkinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: Tr"
},
{
"path": "configs/Kinetics/pytorchvideo/SLOW_8x8_R50.yaml",
"chars": 1152,
"preview": "TRAIN:\n ENABLE: True\n DATASET: ptvkinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: Tr"
},
{
"path": "configs/Kinetics/pytorchvideo/X3D_L.yaml",
"chars": 1107,
"preview": "TRAIN:\n DATASET: ptvkinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 2\n CHECKPOINT_PERIOD: 10\n AUTO_RESUME: True\nX3D:\n WIDTH"
},
{
"path": "configs/Kinetics/pytorchvideo/X3D_M.yaml",
"chars": 1256,
"preview": "TRAIN:\n # ENABLE: False # default True\n DATASET: ptvkinetics\n BATCH_SIZE: 128\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: "
},
{
"path": "configs/Kinetics/pytorchvideo/X3D_S.yaml",
"chars": 1291,
"preview": "TRAIN:\n # ENABLE: False # default True\n DATASET: ptvkinetics\n BATCH_SIZE: 128\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: "
},
{
"path": "configs/Kinetics/pytorchvideo/X3D_XS.yaml",
"chars": 1256,
"preview": "TRAIN:\n # ENABLE: False # default True\n DATASET: ptvkinetics\n BATCH_SIZE: 128\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: "
},
{
"path": "configs/SSV1/SSV1_MLP_B16.yaml",
"chars": 1807,
"preview": "TRAIN:\n ENABLE: True\n DATASET: sth\n BATCH_SIZE: 64\n EVAL_PERIOD: 5\n CHECKPOINT_PERIOD: 2\n AUTO_RESUME: True\nDATA:\n"
},
{
"path": "configs/SSV1/SSV1_MLP_B32.yaml",
"chars": 1810,
"preview": "TRAIN:\n ENABLE: True\n DATASET: sth\n BATCH_SIZE: 64\n EVAL_PERIOD: 5\n CHECKPOINT_PERIOD: 2\n AUTO_RESUME: True\nDATA:\n"
},
{
"path": "configs/SSV1/SSV1_MLP_S16.yaml",
"chars": 1806,
"preview": "TRAIN:\n ENABLE: True\n DATASET: sth\n BATCH_SIZE: 64\n EVAL_PERIOD: 5\n CHECKPOINT_PERIOD: 2\n AUTO_RESUME: True\nDATA:\n"
},
{
"path": "configs/SSV1/SSV1_MLP_S32.yaml",
"chars": 1806,
"preview": "TRAIN:\n ENABLE: True\n DATASET: sth\n BATCH_SIZE: 64\n EVAL_PERIOD: 5\n CHECKPOINT_PERIOD: 2\n AUTO_RESUME: True\nDATA:\n"
},
{
"path": "configs/SSV2/SSV2_MLP_B16.yaml",
"chars": 1892,
"preview": "TRAIN:\n ENABLE: True\n DATASET: sth\n BATCH_SIZE: 64\n EVAL_PERIOD: 5\n CHECKPOINT_PERIOD: 2\n AUTO_RESUME: True\nDATA:\n"
},
{
"path": "configs/SSV2/SSV2_MLP_B32.yaml",
"chars": 1895,
"preview": "TRAIN:\n ENABLE: True\n DATASET: sth\n BATCH_SIZE: 64\n EVAL_PERIOD: 5\n CHECKPOINT_PERIOD: 2\n AUTO_RESUME: True\nDATA:\n"
},
{
"path": "configs/SSV2/SSV2_MLP_S16.yaml",
"chars": 1891,
"preview": "TRAIN:\n ENABLE: True\n DATASET: sth\n BATCH_SIZE: 64\n EVAL_PERIOD: 5\n CHECKPOINT_PERIOD: 2\n AUTO_RESUME: True\nDATA:\n"
},
{
"path": "configs/SSV2/SSV2_MLP_S32.yaml",
"chars": 1894,
"preview": "TRAIN:\n ENABLE: True\n DATASET: sth\n BATCH_SIZE: 64\n EVAL_PERIOD: 5\n CHECKPOINT_PERIOD: 2\n AUTO_RESUME: True\nDATA:\n"
},
{
"path": "configs/SSv2/SLOWFAST_16x8_R50.yaml",
"chars": 1652,
"preview": "TRAIN:\n ENABLE: True\n DATASET: ssv2\n BATCH_SIZE: 16\n EVAL_PERIOD: 2\n CHECKPOINT_PERIOD: 2\n AUTO_RESUME: True\n CHE"
},
{
"path": "configs/SSv2/SLOWFAST_16x8_R50_multigrid.yaml",
"chars": 1702,
"preview": "MULTIGRID:\n SHORT_CYCLE: True\n LONG_CYCLE: True\nTRAIN:\n ENABLE: True\n DATASET: ssv2\n BATCH_SIZE: 16\n EVAL_PERIOD: "
},
{
"path": "configs/SSv2/pytorchvideo/SLOWFAST_8x8_R50.yaml",
"chars": 1648,
"preview": "TRAIN:\n ENABLE: True\n DATASET: ptvssv2\n BATCH_SIZE: 64\n EVAL_PERIOD: 2\n CHECKPOINT_PERIOD: 2\n AUTO_RESUME: True\n "
},
{
"path": "configs/SSv2/pytorchvideo/SLOW_8x8_R50.yaml",
"chars": 1299,
"preview": "TRAIN:\n ENABLE: True\n DATASET: ptvssv2\n BATCH_SIZE: 64\n EVAL_PERIOD: 2\n CHECKPOINT_PERIOD: 2\n AUTO_RESUME: True\n "
},
{
"path": "configs/ssl/BYOL_SlowR50_8x8.yaml",
"chars": 1804,
"preview": "TASK: ssl\nTRAIN:\n DATASET: kinetics\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 5\n AUTO_RESUME: True\nMODEL:\n NUM_CLASSES: "
},
{
"path": "configs/ssl/MoCo_SlowR50_8x8.yaml",
"chars": 1498,
"preview": "TASK: ssl\nTRAIN:\n DATASET: kinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 5\n AUTO_RESUME: True\n MIX"
},
{
"path": "configs/ssl/SimCLR_SlowR50_8x8.yaml",
"chars": 1598,
"preview": "TASK: ssl\nTRAIN:\n DATASET: kinetics\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 10\n AUTO_RESUME: True\nMODEL:\n NUM_CLASSES:"
},
{
"path": "configs/ssl/SwAV_Slow_R50_8x8.yaml",
"chars": 1487,
"preview": "TASK: ssl\nTRAIN:\n DATASET: kinetics\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 10\n AUTO_RESUME: True\nMODEL:\n NUM_CLASSES:"
},
{
"path": "configs/ssl/finetune_SSv2_Slow_R50_syn0.yaml",
"chars": 1181,
"preview": "TASK: ssl_eval_ssv2\nTRAIN:\n ENABLE: True\n DATASET: ssv2\n BATCH_SIZE: 64\n EVAL_PERIOD: 4\n CHECKPOINT_PERIOD: 4\n AUT"
},
{
"path": "configs/ssl/finetune_SSv2_Slow_R50_syn8.yaml",
"chars": 1262,
"preview": "TASK: ssl_eval_ssv2\nTRAIN:\n ENABLE: True\n DATASET: ssv2\n BATCH_SIZE: 64\n EVAL_PERIOD: 4\n CHECKPOINT_PERIOD: 4\n AUT"
},
{
"path": "configs/ssl/finetune_ava_Slow_R50_syn0.yaml",
"chars": 1601,
"preview": "TASK: ssl_eval_ava\nTRAIN:\n DATASET: ava\n BATCH_SIZE: 64\n EVAL_PERIOD: 4\n CHECKPOINT_PERIOD: 4\n AUTO_RESUME: True\n "
},
{
"path": "configs/ssl/finetune_ava_Slow_R50_syn8.yaml",
"chars": 1690,
"preview": "TASK: ssl_eval_ava\nTRAIN:\n DATASET: ava\n BATCH_SIZE: 64\n EVAL_PERIOD: 4\n CHECKPOINT_PERIOD: 4\n AUTO_RESUME: True\n "
},
{
"path": "configs/ssl/finetune_ucf_Slow_R50_syn0.yaml",
"chars": 1124,
"preview": "TASK: ssl_eval_ucf\nTRAIN:\n DATASET: kinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 20\n CHECKPOINT_PERIOD: 20\n AUTO_RESUME: "
},
{
"path": "configs/ssl/finetune_ucf_Slow_R50_syn8.yaml",
"chars": 1189,
"preview": "TASK: ssl_eval_ucf\nTRAIN:\n ENABLE: True\n DATASET: kinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 20\n CHECKPOINT_PERIOD: 10\n"
},
{
"path": "configs/ssl/linear_k400_Slow_8x8_R50_syn0.yaml",
"chars": 1002,
"preview": "TASK: ssl_eval_k400\nTRAIN:\n DATASET: kinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 20\n CHECKPOINT_PERIOD: 20\n AUTO_RESUME:"
},
{
"path": "configs/ssl/linear_k400_Slow_8x8_R50_syn8.yaml",
"chars": 1099,
"preview": "TASK: ssl_eval_k400\nTRAIN:\n DATASET: kinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 20\n CHECKPOINT_PERIOD: 20\n AUTO_RESUME:"
},
{
"path": "data_list/k400/kinetics_400_categroies.txt",
"chars": 7446,
"preview": "riding a bike\t0\nmarching\t1\ndodgeball\t2\nplaying cymbals\t3\nchecking tires\t4\nroller skating\t5\ntasting beer\t6\nclapping\t7\ndra"
},
{
"path": "data_list/k400/test.csv",
"chars": 410083,
"preview": "0wR5jVB-WPk.mp4,341\r\n3caPS4FHFF8.mp4,341\r\n3yaoNwz99xM.mp4,341\r\n6IbvOJxXnOo.mp4,341\r\n6_4kjPiQr7w.mp4,341\r\n9EnSwbXxu5g.mp4"
},
{
"path": "data_list/k400/train.csv",
"chars": 4985167,
"preview": "-3B32lodo2M.mp4,341\r\n-7kbO0v4hag.mp4,341\r\n-bwYZwnwb8E.mp4,341\r\n-Cv3NwxG_8g.mp4,341\r\n-hLv_HL6UhY.mp4,341\r\n-hwUQqFadvE.mp4"
},
{
"path": "data_list/k400/val.csv",
"chars": 410083,
"preview": "0wR5jVB-WPk.mp4,341\r\n3caPS4FHFF8.mp4,341\r\n3yaoNwz99xM.mp4,341\r\n6IbvOJxXnOo.mp4,341\r\n6_4kjPiQr7w.mp4,341\r\n9EnSwbXxu5g.mp4"
},
{
"path": "data_list/sthv1/category.txt",
"chars": 7600,
"preview": "Approaching something with your camera\nAttaching something to something\nBending something so that it deforms\nBending som"
},
{
"path": "data_list/sthv1/somesomev1_rgb_train_split.txt",
"chars": 1064154,
"preview": "100218 42 134\n48032 57 72\n10433 63 23\n9426 53 29\n62548 56 126\n64558 51 100\n52373 58 126\n27171 58 0\n72594 48 39\n60276 55 "
},
{
"path": "data_list/sthv1/somesomev1_rgb_validation_split.txt",
"chars": 136254,
"preview": "85 46 30\n1753 47 56\n1091 66 16\n33739 38 5\n3850 68 12\n26408 58 91\n2031 73 149\n87723 64 17\n99373 27 141\n72650 48 166\n5468 "
},
{
"path": "data_list/sthv2/category.txt",
"chars": 8122,
"preview": "Approaching [something] with your camera\nAttaching [something] to [something]\nBending [something] so that it deforms\nBen"
},
{
"path": "data_list/sthv2/somesomev2_rgb_test_split.txt",
"chars": 311959,
"preview": "1420 44 0\n166429 40 0\n53930 40 0\n73548 38 0\n142328 46 0\n118069 28 0\n730 63 0\n204138 33 0\n11361 64 0\n202932 39 0\n132945 3"
},
{
"path": "data_list/sthv2/somesomev2_rgb_train_split.txt",
"chars": 2280857,
"preview": "78687 134 19\n42326 126 141\n100904 78 109\n80715 129 31\n34899 121 16\n184568 76 94\n112783 86 138\n108895 41 100\n169270 119 3"
},
{
"path": "data_list/sthv2/somesomev2_rgb_validation_split.txt",
"chars": 338263,
"preview": "74225 156 140\n116154 129 127\n198186 144 173\n137878 74 99\n151151 78 166\n195025 93 126\n172305 86 151\n92355 114 134\n35671 1"
},
{
"path": "demo/AVA/SLOWFAST_32x2_R101_50_50.yaml",
"chars": 1899,
"preview": "TRAIN:\n ENABLE: False\n DATASET: ava\n BATCH_SIZE: 16\n EVAL_PERIOD: 1\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: True\n CHE"
},
{
"path": "demo/Kinetics/SLOWFAST_8x8_R50.yaml",
"chars": 1468,
"preview": "TRAIN:\n ENABLE: False\n DATASET: kinetics\n BATCH_SIZE: 64\n EVAL_PERIOD: 10\n CHECKPOINT_PERIOD: 1\n AUTO_RESUME: True"
},
{
"path": "linter.sh",
"chars": 344,
"preview": "#!/bin/bash -e\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n# Run this script at project root"
}
]
// ... and 102 more files (download for full content)
About this extraction
This page contains the full source code of the MTLab/MorphMLP GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 302 files (11.4 MB), approximately 3.0M tokens, and a symbol index with 1723 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.