Repository: Nicholasli1995/EvoSkeleton Branch: master Commit: b2b355f4c1fa Files: 81 Total size: 451.1 KB Directory structure: gitextract_6ign6rd0/ ├── .gitignore ├── LICENSE ├── README.md ├── data/ │ └── .gitignore ├── docs/ │ ├── 2DHPE.md │ ├── ANNOTATOR.md │ ├── DATASET.md │ ├── HHR.md │ ├── TRAINING.md │ └── Zoo.md ├── examples/ │ ├── h36m2Dpose/ │ │ └── cfgs.yaml │ ├── h36m2Dpose.py │ └── inference.py ├── libs/ │ ├── __init__.py │ ├── annotator/ │ │ ├── __init__.py │ │ ├── angle.py │ │ ├── fit_3d.py │ │ ├── smpl-spec-list.txt │ │ ├── smpl_webuser/ │ │ │ ├── LICENSE.txt │ │ │ ├── README.txt │ │ │ ├── __init__.py │ │ │ ├── hello_world/ │ │ │ │ ├── hello_smpl.py │ │ │ │ └── render_smpl.py │ │ │ ├── lbs.py │ │ │ ├── posemapper.py │ │ │ ├── serialization.py │ │ │ └── verts.py │ │ └── smplify/ │ │ ├── __init__.py │ │ ├── fit_3d.py │ │ ├── lib/ │ │ │ ├── __init__.py │ │ │ ├── capsule_body.py │ │ │ ├── capsule_ch.py │ │ │ ├── max_mixture_prior.py │ │ │ ├── robustifiers.py │ │ │ └── sphere_collisions.py │ │ └── render_model.py │ ├── dataset/ │ │ ├── __init__.py │ │ └── h36m/ │ │ ├── __init__.py │ │ ├── cameras.py │ │ ├── data_utils.py │ │ ├── h36m_pose.py │ │ ├── pose_dataset.py │ │ └── pth_dataset.py │ ├── evolution/ │ │ ├── __init__.py │ │ ├── genetic.py │ │ └── parameter.py │ ├── hhr/ │ │ ├── __init__.py │ │ ├── config/ │ │ │ ├── __init__.py │ │ │ ├── default.py │ │ │ └── models.py │ │ ├── core/ │ │ │ ├── __init__.py │ │ │ ├── evaluate.py │ │ │ ├── function.py │ │ │ ├── inference.py │ │ │ └── loss.py │ │ └── utils/ │ │ ├── __init__.py │ │ ├── transforms.py │ │ ├── utils.py │ │ └── vis.py │ ├── model/ │ │ ├── __init__.py │ │ ├── model.py │ │ ├── pose_hrnet.py │ │ └── pose_resnet.py │ ├── optimizer/ │ │ └── __init__.py │ ├── parser/ │ │ ├── __init__.py │ │ └── parse.py │ ├── skeleton/ │ │ ├── __init__.py │ │ └── anglelimits.py │ ├── trainer/ │ │ ├── __init__.py │ │ └── trainer.py │ ├── utils/ │ │ ├── __init__.py │ │ └── utils.py │ └── visualization/ │ ├── __init__.py │ └── viz.py ├── resources/ │ └── .gitignore ├── spec-list.txt └── tools/ ├── 2Dto3Dnet.py ├── annotate_2D.py ├── annotate_3D.py ├── evolve.py └── imgto2Dnet.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ #/* **/__pycache__ .spyproject/ /libs/annotator/fitted /libs/annotator/smplify/models /model /examples/imgs /examples/example_annot.npy /examples/example_model.th /examples/stats.npy /examples/h36m2Dpose/final_state.pth *.yml *.log *.ini *.bak *.pth *.csv *.pyc ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2020 Shichao Li Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/cascaded-deep-monocular-3d-human-pose-1/weakly-supervised-3d-human-pose-estimation-on)](https://paperswithcode.com/sota/weakly-supervised-3d-human-pose-estimation-on?p=cascaded-deep-monocular-3d-human-pose-1) [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/cascaded-deep-monocular-3d-human-pose-1/monocular-3d-human-pose-estimation-on-human3)](https://paperswithcode.com/sota/monocular-3d-human-pose-estimation-on-human3?p=cascaded-deep-monocular-3d-human-pose-1) [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/cascaded-deep-monocular-3d-human-pose-1/3d-human-pose-estimation-on-human36m)](https://paperswithcode.com/sota/3d-human-pose-estimation-on-human36m?p=cascaded-deep-monocular-3d-human-pose-1) # EvoSkeleton This is the project website containing relevant files for the CVPR 2020 paper "Cascaded deep monocular 3D human pose estimation with evolutionary training data". The usage and instructions are organized into several parts serving distinct purposes. Please visit the corresponding sub-page for details. For Q&A, go to [discussions](https://github.com/Nicholasli1995/EvoSkeleton/discussions). If you believe there is a technical problem, submit to [issues](https://github.com/Nicholasli1995/EvoSkeleton/issues). News: (2021-04-08): Release v-1.0. The support for pre-trained models is strengthened. More details have been added to the supplementary material. ## Cascaded 2D-to-3D Lifting [This sub-page](https://github.com/Nicholasli1995/EvoSkeleton/blob/master/docs/TRAINING.md) details how to train a cascaded model to lift 2D key-points to 3D skeletons on H36M. If you do not want to prepare synthetic data and train the model by yourself, you can access an examplar pre-trained model [here](https://drive.google.com/file/d/158oCTK-9Y8Bl9qxidoHcXfqfeeA7qT93/view?usp=sharing) and follow the instructions in the [document](https://github.com/Nicholasli1995/EvoSkeleton/blob/master/docs/TRAINING.md). This model can be used for in-the-wild inference as well as reproducing the results on MPI-INF-3DHP. The evaluation metric for MPI-INF-3DHP can be accessed [here](https://github.com/chenxuluo/OriNet-demo/tree/master/src/test_util).

Performance on H36M ([Link to pre-trained models](https://github.com/Nicholasli1995/EvoSkeleton/blob/master/docs/Zoo.md)) | Protocol \#1| Avg.|Dir. | Disc| Eat| Greet| Phone| Photo | Pose | Purch.| Sit| SitD.| Smoke| Wait| WalkD.| Walk | WalkT.| |-------------------------------------------------------------|------------------|------------------|---------------|------------------|---------------|---------------|------|---------------|------------------|------------------|---------------|---------------|---------------|---------------|---------------|---------------| | [Martinez](https://github.com/una-dinosauria/3d-pose-baseline) et al. (ICCV'17) |62.9| 51.8 | 56.2| 58.1| 59.0 | 69.5 | 78.4| 55.2 | 58.1 | 74.0 | 94.6| 62.3 | 59.1 | 65.1 | 49.5 | 52.4 | | Ours (S15678) |**49.7**|**45.6**|**44.6**|**49.3**|**49.3**|**52.5**|**58.5**|**46.4**|**44.3**|**53.8**|**67.5**|**49.4**|**46.1**|**52.5**|**41.4**|**44.4**| | Protocol \#2| Avg.|Dir. | Disc| Eat| Greet| Phone| Photo | Pose | Purch.| Sit| SitD.| Smoke| Wait| WalkD.| Walk | WalkT.| |-------------------------------------------------------------|------------------|------------------|---------------|------------------|---------------|---------------|------|---------------|------------------|------------------|---------------|---------------|---------------|---------------|---------------|---------------| | [Martinez](https://github.com/una-dinosauria/3d-pose-baseline) et al. (ICCV'17) |47.7| 39.5 | 43.2 | 46.4 | 47.0 | 51.0| 56.0 | 41.4 | 40.6 | 56.5 | 69.4 | 49.2 | 45.0 | 49.5 | 38.0 | 43.1 | | Ours (S15678) |**37.7** |**34.2**|**34.6**|**37.3**|**39.3**|**38.5**|**45.6**|**34.5**|**32.7**|**40.5**|**51.3**|**37.7**|**35.4**|**39.9**|**29.9**|**34.5**| ## Hierarchical Human Representation and Data Synthesis [This sub-page](https://github.com/Nicholasli1995/EvoSkeleton/blob/master/docs/HHR.md) gives instructions on how to use the 3D skeleton model and how the evolution algorithm can be used to discover novel data.

## 2D Human Pose Estimation on H3.6M [This page](https://github.com/Nicholasli1995/EvoSkeleton/blob/master/docs/2DHPE.md) shows how to perform 2D human pose estimation on Human 3.6M dataset with the pre-trained high-resolution heatmap regression model. The highly accurate 2D joint predictions may benefit your 3D human pose estimation project.

| Method | Parameters| FLOPs|Average Joint Localization Error (pixels) | | ------------------------- | ---------------| --------------| --------------| | CPN (CVPR' 18) | -|-| 5.4 | | Ours (HRN + U + S) |63.6M| 32.9G | **4.4** | ## Dataset: Unconstrained 3D Pose in the Wild [This sub-page](https://github.com/Nicholasli1995/EvoSkeleton/blob/master/docs/DATASET.md) describs the newly collected dataset Unconstrained 3D Human Pose in the Wild (U3DPW) and gives instructions on how to download it.

## Interactive Annotation Tool [This sub-page](https://github.com/Nicholasli1995/EvoSkeleton/blob/master/docs/ANNOTATOR.md) provides usage of an annotation tool that can be used to label 2D and 3D skeleton for an input image. U3DPW was obtained created with this tool and this tool may help increasing the scale of 3D annotation for in-the-wild images.

## Environment - Python 3.6 - Numpy 1.16 - PyTorch 1.0.1 - CUDA 9 For a complete list of other python packages, please refer to [spec-list.txt](https://github.com/Nicholasli1995/EvoSkeleton/blob/master/spec-list.txt). The recommended environment manager is Anaconda, which can create an environment using the provided spec-list. Certain tool in this project may need other specified environment, which is detailed in its corresponding page. ## License A MIT license is used for this repository. However, certain third-party dataset (Human 3.6M) and tool (SMPLify) are subject to their respective licenses and may not grant commercial use. ## Citation Please star this repository and cite the following paper in your publications if it helps your research: @InProceedings{Li_2020_CVPR, author = {Li, Shichao and Ke, Lei and Pratama, Kevin and Tai, Yu-Wing and Tang, Chi-Keung and Cheng, Kwang-Ting}, title = {Cascaded Deep Monocular 3D Human Pose Estimation With Evolutionary Training Data}, booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2020} } Link to the paper: [Cascaded Deep Monocular 3D Human Pose Estimation With Evolutionary Training Data](https://arxiv.org/abs/2006.07778) Link to the oral presentation video: [Youtube](https://www.youtube.com/watch?v=erYymlWw2bo) ================================================ FILE: data/.gitignore ================================================ # Ignore everything in this directory * # Except this file !.gitignore ================================================ FILE: docs/2DHPE.md ================================================ The pre-trained model can be downloaded from [here](https://drive.google.com/file/d/1NjQFCz0GdS7oIdYrK5ouxEI07wYYh4r8/view?usp=sharing) and placed under "${EvoSkeleton}/examples/h36m2Dpose" folder. Your directory should look like this: ``` ${EvoSkeleton} ├── examples ├── h36m2Dpose ├── cropped (prepared testing images from Human 3.6M) ├── cfgs.yaml (configuration file) ├── final_state.pth (pre-trained high-resolution heatmap regression model) ``` Then run h36m2Dpose.py at ${EvoSkeleton}/examples ```bash python h36m2Dpose.py ``` You should expect to see results like [this](https://github.com/Nicholasli1995/EvoSkeleton/blob/master/imgs/h36m2dpose2.png). I only uploaded a few example images since I cannot upload the whole video due to the license requirement. For your own images, you should crop the humans and prepare your data accordingly.

================================================ FILE: docs/ANNOTATOR.md ================================================ The annotator is composed of three parts: 1. 2D annotation: interactively annotate 2D key-points for RGB images 2. 3D parameter fitting: obtain coarse 3D skeleton fitting results based on SMPLify. 3. 3D annotation: interactively modify 3D parameters. ## 2D Keypoints Annotation Users can annotate 2D Keypoints of images by running the script `annotate_2d.py` under ${EvoSkeleton}/tools. ```bash python annotate_2d.py -d DATASET_PATH ``` DATASET_PATH is the path to the folder containing images.

Users can annotate 2D Keypoints in the following order by clicking on the image: Right Ankle, Right Knee, Right Hip, Left Hip, Left Knee, Left Ankle, Right Wrist, Right Elbow, Right Shoulder, Left Shoulder, Left Elbow, Left Wrist, Neck, Head top, Spine, Thorax, Nose Other keyborad short-cuts are: Press Q to exit the tool. Press N to go to the next image. Press Z to save the annotation. Press C to erase all of the assigned keypoints from the image and start over. ## Coarse 3D Keypoints Estimation Manually annotating 3D skeleton from scratch is time-consuming, thus we use a tool to obtain an initial 3D pose estimation given 2D annotation. Any method that outputs 3D pose inference given 2D key-points can be employed. Here we use SMPLify to estimate coarse 3D skeleton. You need to set up a Python 2.7 environment where a [spec-list](https://github.com/Nicholasli1995/EvoSkeleton/blob/master/libs/annotator/smpl-spec-list.txt) can be used as your reference if you use Anaconda. Then you need to install chumpy and opendr using pip: ```bash pip install chumpy pip install opendr ``` After setting up the environment, you need to download the SMPL model files [here](https://drive.google.com/drive/folders/12qJQP-h4E43FkgE74tybQUjeP_pnqAor?usp=sharing) and organize your project files as follows: ``` ${EvoSkeleton} ├── libs ├── annotator ├── smplify ├── models ├── basicModel_neutral_lbs_10_207_0_v1.0.0.pkl ├── gmm_08.pkl ├── fit_3d.py ``` Then one can run fit_3d under ${EvoSkeleton}/libs/annotator/fit_3d.py to fit the SMPL model ```bash python fit_3d.py -dataset_dir DATASET_PATH -model_dir MODEL_PATH ``` DATASET_PATH is the path to the folder containing the annotated 2D key-point file "annotation.npy". MODEL_PATH is the path to the used SMPL model (for example, basicModel_neutral_lbs_10_207_0_v1.0.0.pkl). There are other available models depending on the gender of the subject. The fitting process can be shown during running and the file annotation.npy will be updated with 3D parameters.

## 3D Skeleton Interactive Annotation After you have obtained fitted parameters for your dataset, you can modify the 3D parameters interactively with this tool. If you just want to try this tool without finishing the above steps, you can play with the pre-fitted parameters [here](https://drive.google.com/file/d/1OJokg844KDpRG3YQsZNpXwFXlVF8iOH5/view?usp=sharing) for U3DPW images. To start the tool, go to ${EvoSkeleton}/tools and run ```bash python annotate_3D.py -dataset_dir DATASET_PATH ``` DATASET_PATH is the path to the folder containing the fitted parameters "fitted.npy". The tool will select one unannotated image, display it and start the interactive process. You can modify the global orientation as well as the limb orientation of the 3D skeleton. A 2D image with projected key-points will be plotted on-line so that you can check if your annotation is reasonable or not. Some important keyborad short-cuts are: Press number keys (2-9) to select which bone vector to rotate. Press 0 so that the 3D skeleton will rotate as a whole. Press arrow keys (up and down) to rotate the bone vector. Press "m" to save an updated annotation file. Other keyboard inputs are detailed in annotate_3D.py

================================================ FILE: docs/DATASET.md ================================================ ## Download You can access the dataset [here](https://drive.google.com/file/d/1JRJuL69J0drZOAUT8VDK5ywmxt-Gm7s-/view?usp=sharing). ## Folder Structure - imgs - Contains the collected images - annotation.npy - Contains the pose annotation ``` ${U3DPW} ├── imgs ├── annotation.npy ``` ## Annotation The annotation file is a Python dictionary that has the following format: p2d is a numpy array of shape (num_keypoints, 2) that stores the image coordinates of the 2D key-points. Each row in the array stores (x, y) coordinate of the corresponding key-point. These key-points are re-annotated with a style similar to that of Human 3.6M, and can be accessed through key 'h36m'. lsp is a boolean flag that indicates whether the image is collected from [Leeds Sport Pose dataset](https://sam.johnson.io/research/lsp.html) or not. ``` { 'image_name1':{'p2d':array1, 'lsp':True/False, 'h36m':array2}, 'image_name2':{'p2d':array3, 'lsp':True/False, 'h36m':array4}, ... } ``` ## Key-point Semantics The name of the Human 3.6M style key-points are: | Index | Keypoint | |---|-------------| | 0 | Hip Center | | 1 | Right Hip | | 2 | Right Knee | | 3 | Right Ankle | | 4 | Left Hip | | 5 | Left Knee | | 6 | Left Ankle | | 7 | Spine | | 8 | Thorax | | 9 | Neck | | 10 | Head Top | | 11 | Left SHoulder | | 12 | Left Elbow | | 13 | Left Wrist | | 14 | Right Shoulder | | 15 | Right Elbow| | 16 | Right Wrist| ================================================ FILE: docs/HHR.md ================================================ ## Data Preparation Please prepare data as instructed in the model training [sub-page](https://github.com/Nicholasli1995/EvoSkeleton/blob/master/docs/TRAINING.md). The training data need to downloaded from [here](https://drive.google.com/drive/folders/1zyW8ryGXLq4bumWnVGUROpDNdubWUExg?usp=sharing) and placed under "${EvoSkeleton}/data" folder: ``` ${EvoSkeleton} ├── data ├── human3.6M ├── your downloaded files ``` ## Model Preparation During data space exploration, a function that evaluates the validity of 3D skeletons is used. This function is parametrized with a model propsosed by Ijaz Akhter in CVPR 2015. You need to download the "constraints" folder from [here](https://drive.google.com/drive/folders/1MUcR9oBNUpTAJ7YUWdyVLKCQW874FszI?usp=sharing) which contains the model parameters and place them under "${EvoSkeleton}/resources" folder: ``` ${EvoSkeleton} ├── recources ├── constraints ``` ## Dataset Evolution To evolve from a population of 3D skeleton (default to Human 3.6M data), go to "${EvoSkeleton}/tools" folder and run ```bash python evolve.py -generate True ``` ### Controling the Initial Population To reproduce the experiments in different settings, you need to specify the choice of initial population. For weakly-supervised experiments, you should only start with subject 1 (S1) data (a subset of H36M training data) as follows ```bash python evolve.py -generate True -WS True -SS "S1" ``` You can even start with extremly scarce data (e.g., 1 percent of S1 data) as follows ```bash python evolve.py -generate True -WS True -SS "0.01S1" ``` After finished dataset evolution, you can use the saved file for [training](https://github.com/Nicholasli1995/EvoSkeleton/blob/master/docs/TRAINING.md) to see how dataset evolution might help improve model generalization especially when the initial population is scarce. ## Reference @inproceedings{akhter2015pose, title={Pose-conditioned joint angle limits for 3D human pose reconstruction}, author={Akhter, Ijaz and Black, Michael J}, booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, pages={1446--1455}, year={2015} } ================================================ FILE: docs/TRAINING.md ================================================ ## Data Preparation Similar to other repositories ([SimpleBaseline](https://github.com/una-dinosauria/3d-pose-baseline), [TemporalConvolution](https://github.com/facebookresearch/VideoPose3D)) on training 2D-to-3D networks, we provide pre-processed 2D detections, camera parameters and 3D poses for training. The 2D detections are produced by our modified high-resolution model, while the camera parameters and the 3D poses are taken from [SimpleBaseline](https://github.com/una-dinosauria/3d-pose-baseline). The training data need to downloaded from [here](https://drive.google.com/drive/folders/1zyW8ryGXLq4bumWnVGUROpDNdubWUExg?usp=sharing) and placed under "${EvoSkeleton}/data" folder. Your directory should look like this: ``` ${EvoSkeleton} ├── data ├── human3.6M ├── cameras.npy (Camera parameters provided by Human 3.6M) ├── threeDPose_train.npy (3D skeletons from Human 3.6M training split) ├── threeDPose_test.npy (3D skeletons from Human 3.6M test split) ├── twoDPose_HRN_train.npy (2D key-point detections obtained from the heatmap regression model for the training split) ├── twoDPose_HRN_test.npy (2D key-point detections obtained from the heatmap regression model for the test split) ``` ## Weakly-Supervised Experiments on Human 3.6M Dataset To compare with other weakly-supervised methods, only a subset of training data (e.g., subject 1 data) is used to simulate an environment where data is scarce. To perform training, go to ./tools and run ```bash python 2Dto3Dnet.py -train True -num_stages 2 -ws True -ws_name "S1" ``` This command performs training on synthetic 2D key-points to remove the influence of the heatmap regression model, whose results correspond to P1* in the performance table. S1 stands for subject 1 data. "num_stages" specify the number of deep learners used in the cascade. To train on real detections obtained by the high-resolution heatmap regression model, run ```bash python 2Dto3Dnet.py -train True -num_stages 2 -ws True -ws_name "S1" -twoD_source "HRN" ``` To train on evolved dataset, you need to specify the path to the evolved data as ```bash python 2Dto3Dnet.py -train True -num_stages 2 -ws True -ws_name "S1" -twoD_source "HRN/synthetic" -evolved_path "YourDataPath" ``` See [this page](https://github.com/Nicholasli1995/EvoSkeleton/blob/master/docs/HHR.md) on how to evolve a dataset. After data augmentation using the evolved data, we noticed the model generalization improves significantly despite the initial population size is small. Other methods utilize multi-view or temporal consistency instead of data augmentation to supervise deep models when data is scarce. Compared to them, we achieve state-of-the-art performance by synthesizing new data to supervise the deep model. P1 and P2 refers to the two protocols used for calculating average MPJPE over all 15 actions in H36M. | Method | Avg. MPJPE (P1) | Avg. MPJPE (P2) | | ------------------------- | --------------- | --------------- | | Rhodin et al. (CVPR' 18) | - | 64.6 | | Kocabas et al. (CVPR' 19) | 65.3 | 57.2 | | Pavllo et al. (CVPR' 19) | 64.7 | - | | Li et al. (ICCV' 19) | 88.8 | 66.5 | | Ours | **60.8** | **46.2** | ## Fully-Supervised Experiments on Human 3.6M Dataset To train on real detections obtained by the high-resolution heatmap regression model, run ```bash python 2Dto3Dnet.py -train True -num_stages 2 -twoD_source "HRN" ``` To train on evolved dataset, you need to specify the path to the evolved data as ```bash python 2Dto3Dnet.py -train True -num_stages 3 -num_blocks 3 -twoD_source "HRN/synthetic" -evolved_path "YourDataPath" ``` Here we increase model capacity with "-num_stages 3 -num_blocks 3" since the training data size is much larger (if you evolve enough generations). While the improvement using data evolution is less obvious in fully-supervised setting compared with weakly-supervised setting, our cascaded model still achieved competitive performance compared with other 2D-to-3D lifting models. | Method | Avg. MPJPE (P1) | Avg. MPJPE (P2) | | -------------------------- | --------------- | --------------- | | [Martinez et al.](https://github.com/una-dinosauria/3d-pose-baseline) (ICCV' 17) | 62.9 | 47.7 | | Yang et al. (CVPR' 18) | 58.6 | **37.7** | | Zhao et al. (CVPR' 19) | 57.6 | - | | Sharma et al. (CVPR' 19) | 58.0 | 40.9 | | Moon et al. (ICCV' 19) | 54.4 | - | | Ours | **50.9** | 38.0 | ## Inference Example If you only want to use a pre-trained model to conduct inference on in-the-wild images (skipping data synthesis and model training), you can download the sample images and a pre-trained checkpoint [here](https://drive.google.com/file/d/158oCTK-9Y8Bl9qxidoHcXfqfeeA7qT93/view?usp=sharing). Un-zip the downloaded file to "${EvoSkeleton}/examples" folder and your directory should look like this: ``` ${EvoSkeleton} ├── examples ├── imgs (sample images) ├── example_annot.npy (2D key-points for the samples) ├── example_model.th (pre-trained model) ├── stats.npy (model statistics) ├── inference.py ``` Then you can run the following command at "${EvoSkeleton}/examples" to perform inference ```bash python inference.py ```

================================================ FILE: docs/Zoo.md ================================================ This page presents model performance on H36M under various settings. Pre-trained models and instructions for reproduction can also be found. ## Fully-supervised Setting (S15678) [Download our pre-trained model](https://drive.google.com/drive/folders/1IRKUWrnheD03Dj30LLGlh_LLT1CK6dr5?usp=sharing) [Download our pre-evolved data](https://drive.google.com/drive/folders/1FKFkmTJQcEdrCvZOSc8cF5OTTjFvyLav?usp=sharing) Inference command: ```bash python 2Dto3Dnet.py -evaluate True -twoD_source "HRN" -ckpt_dir "YourMODELPath" ``` Training command ([Docs](https://github.com/Nicholasli1995/EvoSkeleton/blob/master/docs/TRAINING.md)): ```bash python 2Dto3Dnet.py -train True -num_stages 3 -num_blocks 3 -twoD_source "HRN" -evolved_path "YourDataPath" ``` Data synthesis command ([Docs](https://github.com/Nicholasli1995/EvoSkeleton/blob/master/docs/HHR.md)): ```bash python evolve.py -SS "S15678" -T 1.5 -SD "YourDataPath" -generate True ``` MPJPE (P1) for each action under fully-supervised setting is shown in the table below. | Protocol \#1 | Dir. | Disc | Eat | Greet | Phone | Photo | Pose | Purch. | Sit | SitD. | Smoke | Wait | WalkD. | Walk | WalkT. | Avg. | |-------------------------------------------------------------|------------------|------------------|---------------|------------------|---------------|---------------|------|---------------|------------------|------------------|---------------|---------------|---------------|---------------|---------------|---------------| | [Martinez](https://github.com/una-dinosauria/3d-pose-baseline) et al. (ICCV'17) | 51.8 | 56.2 | 58.1 | 59.0 | 69.5 | 78.4 | 55.2 | 58.1 | 74.0 | 94.6 | 62.3 | 59.1 | 65.1 | 49.5 | 52.4 | 62.9 | | [Fang](https://arxiv.org/abs/1710.06513) et al. (AAAI'18) | 50.1 | 54.3 | 57.0 | 57.1 | 66.6 | 73.3 | 53.4 | 55.7 | 72.8 | 88.6 | 60.3 | 57.7 | 62.7 | 47.5 | 50.6 | 60.4 | | [Yang](https://arxiv.org/abs/1803.09722) et al. (CVPR'18) | 51.5 | 58.9 | 50.4 | 57.0 | 62.1 | 65.4 | 49.8 | 52.7 | 69.2 | 85.2 | 57.4 | 58.4 | 43.6 | 60.1 | 47.7 | 58.6 | | [Pavlakos](https://github.com/geopavlakos/ordinal-pose3d) et al. (CVPR'18) | 48.5 | 54.4 | 54.4 | 52.0 | 59.4 | 65.3 | 49.9 | 52.9 | 65.8 | 71.1 | 56.6 | 52.9 | 60.9 | 44.7 | 47.8 | 56.2 | | [Lee](https://openaccess.thecvf.com/content_ECCV_2018/papers/Kyoungoh_Lee_Propagating_LSTM_3D_ECCV_2018_paper.pdf) et al. (ECCV'18) | 40.2 | 49.2 | 47.8 | 52.6 | 50.1 | 75.0 | 50.2 | 43.0 | 55.8 | 73.9 | 54.1 | 55.6 | 58.2 | 43.3 | 43.3 | 52.8 | | [Zhao](https://arxiv.org/abs/1904.03345) et al. (CVPR'19) | 47.3 | 60.7 | 51.4 | 60.5 | 61.1 | 49.9 | 47.3 | 68.1 | 86.2 | 55.0 | 67.8 | 61.0 | 42.1 | 60.6 | 45.3 | 57.6 | | [Sharma](https://arxiv.org/abs/1904.01324) et al. (ICCV'19) | 48.6 | 54.5 | 54.2 | 55.7 | 62.6 | 72.0 | 50.5 | 54.3 | 70.0 | 78.3 | 58.1 | 55.4 | 61.4 | 45.2 | 49.7 | 58.0 | | [Moon](https://github.com/mks0601/3DMPPE_POSENET_RELEASE) et al. (ICCV'19) | 51.5 | 56.8 | 51.2 | 52.2 | 55.2 | 47.7 | 50.9 | 63.3 | 69.9 | 54.2 | 57.4 | 50.4 | 42.5 | 57.5 | 47.7 | 54.4 | | [Liu](https://www.ecva.net/papers/eccv_2020/papers_ECCV/papers/123550324.pdf) et al. (ECCV'20) | 46.3 | 52.2 | 47.3 | 50.7 | 55.5 | 67.1 | 49.2 | 46.0 | 60.4 | 71.1 | 51.5 | 50.1 | 54.5 | 40.3 | 43.7 | 52.4 | | Ours (S15678) |45.6|44.6|49.3|49.3|52.5|58.5|46.4|44.3|53.8|67.5|49.4|46.1|52.5|41.4|44.4| 49.7 | MPJPE (P2) for each action under fully-supervised setting is shown in the table below. | Protocol \#2 | Dir. | Disc | Eat | Greet | Phone | Photo | Pose | Purch. | Sit | SitD. | Smoke | Wait | WalkD. | Walk | WalkT. | Avg. | |-------------------------------------------------------------|------------------|------------------|---------------|------------------|---------------|---------------|------|---------------|------------------|------------------|---------------|---------------|---------------|---------------|---------------|---------------| | [Martinez](https://github.com/una-dinosauria/3d-pose-baseline) et al. (ICCV'17) | 39.5 | 43.2 | 46.4 | 47.0 | 51.0 | 56.0 | 41.4 | 40.6 | 56.5 | 69.4 | 49.2 | 45.0 | 49.5 | 38.0 | 43.1 | 47.7 | | [Fang](https://arxiv.org/abs/1710.06513) et al. (AAAI'18) | 38.2 | 41.7 | 43.7 | 44.9 | 48.5 | 55.3 | 40.2 | 38.2 | 54.5 | 64.4 | 47.2 | 44.3 | 47.3 | 36.7 | 41.7 | 45.7 | | [Pavlakos](https://github.com/geopavlakos/ordinal-pose3d) et al. (CVPR'18) | 34.7 | 39.8 | 41.8 | 38.6 | 42.5 | 47.5 | 38.0 | 36.6 | 50.7 | 56.8 | 42.6 | 39.6 | 43.9 | 32.1 | 36.5 | 41.8 | | [Yang](https://arxiv.org/abs/1803.09722) et al. (CVPR'18) | 26.9 | 30.9 | 36.3 | 39.9 | 43.9 | 47.4 | 28.8 | 29.4 | 36.9 | 58.4 | 41.5 | 30.5 | 29.5 | 42.5 | 32.2 | 37.7 | | [Sharma](https://arxiv.org/abs/1904.01324) et al. (ICCV'19) | 35.3 | 35.9 | 45.8 | 42.0 | 40.9 | 52.6 | 36.9 | 35.8 | 43.5 | 51.9 | 44.3 | 38.8 | 45.5 | 29.4 | 34.3 | 40.9 | | [Cai](https://openaccess.thecvf.com/content_ICCV_2019/papers/Cai_Exploiting_Spatial-Temporal_Relationships_for_3D_Pose_Estimation_via_Graph_Convolutional_ICCV_2019_paper.pdf) et al. (ICCV'19) | 35.7 | 37.8 | 36.9 | 40.7 | 39.6 | 45.2 | 37.4 | 34.5 | 46.9 | 50.1 | 40.5 | 36.1 | 41.0 | 29.6 | 33.2 | 39.0 | | [Liu](https://www.ecva.net/papers/eccv_2020/papers_ECCV/papers/123550324.pdf) et al. (ECCV'20) | 35.9 | 40.0 | 38.0 | 41.5 | 42.5 | 51.4 | 37.8 | 36.0 | 48.6 | 56.6 | 41.8 | 38.3 | 42.7 | 31.7 | 36.2 | 41.2 | | Ours (S15678) |34.2|34.6|37.3|39.3|38.5|45.6|34.5|32.7|40.5|51.3|37.7|35.4|39.9|29.9|34.5| 37.7 | ## Weakly-supervised Setting (S1) [Download our pre-trained model](https://drive.google.com/drive/folders/1PZoiizPKeoFTsvnFKIxaRDNbyb0Csx50?usp=sharing) [Download our pre-evolved data](https://drive.google.com/drive/folders/1nTW2CCCT_sbJ1CejhuiQLTgDDU5sJjZj?usp=sharing) Inference command: ```bash python 2Dto3Dnet.py -evaluate True -twoD_source "HRN" -ckpt_dir "YourMODELPath" ``` Training command ([Docs](https://github.com/Nicholasli1995/EvoSkeleton/blob/master/docs/TRAINING.md)): ```bash python 2Dto3Dnet.py -train True -num_stages 2 -ws True -ws_name "S1" -twoD_source "HRN" -evolved_path "YourDataPath" ``` Data synthesis command ([Docs](https://github.com/Nicholasli1995/EvoSkeleton/blob/master/docs/HHR.md)): ```bash python evolve.py -generate True -WS True -SS "S1" ``` MPJPE (P1) for each action under weakly-supervised setting is shown in the table below. | Protocol \#1 | Dir. | Disc | Eat | Greet | Phone | Photo | Pose | Purch. | Sit | SitD. | Smoke | Wait | WalkD. | Walk | WalkT. | Avg. | |--------------------------------------------------------|------------------|------------------|---------------|------------------|---------------|------------------|------|---------------|------------------|-------|---------------|---------------|---------------|---------------|---------------|---------------| | [Kocabas](https://arxiv.org/abs/1903.02330) et al. (CVPR'19) | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | 65.3 | | [Pavllo](https://arxiv.org/abs/1811.11742) et al. (CVPR'19) | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | 64.7 | | [Li](https://openaccess.thecvf.com/content_ICCV_2019/papers/Li_On_Boosting_Single-Frame_3D_Human_Pose_Estimation_via_Monocular_Videos_ICCV_2019_paper.pdf) et al. (ICCV'19) | 70.4 | 83.6 | 76.6 | 78.0 | 85.4 | 106.1 | 72.2 | 103.0 | 115.8 | 165.0 | 82.4 | 74.3 | 94.6 | 60.1 | 70.6 | 88.8 | | Ours (S1) | 52.8 | 56.6 | 54.0 | 57.5 | 62.8 | 72.0 | 55.0 | 61.3 | 65.8 | 80.7 | 58.9 | 56.7 | 69.7 | 51.6 | 57.2 | 60.8 | MPJPE (P2) for each action under fully-supervised setting is shown in the table below. | Protocol \#2 | Dir. | Disc | Eat | Greet | Phone | Photo | Pose | Purch. | Sit | SitD. | Smoke | Wait | WalkD. | Walk | WalkT. | Avg. | |--------------------------------------------------------|------------------|------------------|---------------|------------------|---------------|------------------|------|---------------|------------------|-------|---------------|---------------|---------------|---------------|---------------|---------------| | [Rhodin](https://arxiv.org/abs/1803.04775) et al. (CVPR'18) | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | 64.6 | | [Kocabas](https://arxiv.org/abs/1903.02330) et al. (CVPR'19) | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | 57.2 | | [Li](https://openaccess.thecvf.com/content_ICCV_2019/papers/Li_On_Boosting_Single-Frame_3D_Human_Pose_Estimation_via_Monocular_Videos_ICCV_2019_paper.pdf) et al. (ICCV'19) | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | 66.5 | | Ours (S1) | 40.2 | 43.4 | 41.9| 46.1 | 48.2 | 55.1 | 42.8 | 42.6 | 49.6 | 61.1 | 44.5 | 43.2 | 51.5 | 38.1 | 44.4 | 46.2 | ================================================ FILE: examples/h36m2Dpose/cfgs.yaml ================================================ CUDNN: BENCHMARK: true DETERMINISTIC: false ENABLED: true GPUS: (0,) MODEL: NAME: pose_hrnet NUM_JOINTS: 17 TARGET_TYPE: 'coordinate' IMAGE_SIZE: - 288 - 384 HEATMAP_SIZE: - 288 - 384 EXTRA: PRETRAINED_LAYERS: - 'conv1' - 'bn1' - 'conv2' - 'bn2' - 'layer1' - 'transition1' - 'stage2' - 'transition2' - 'stage3' - 'transition3' - 'stage4' FINAL_CONV_KERNEL: 1 STAGE2: NUM_MODULES: 1 NUM_BRANCHES: 2 BLOCK: BASIC NUM_BLOCKS: - 4 - 4 NUM_CHANNELS: - 48 - 96 FUSE_METHOD: SUM STAGE3: NUM_MODULES: 4 NUM_BRANCHES: 3 BLOCK: BASIC NUM_BLOCKS: - 4 - 4 - 4 NUM_CHANNELS: - 48 - 96 - 192 FUSE_METHOD: SUM STAGE4: NUM_MODULES: 3 NUM_BRANCHES: 4 BLOCK: BASIC NUM_BLOCKS: - 4 - 4 - 4 - 4 NUM_CHANNELS: - 48 - 96 - 192 - 384 FUSE_METHOD: SUM ================================================ FILE: examples/h36m2Dpose.py ================================================ """ Examplar code showing how to use pre-trained heatmap regression model H() to perform 2D pose estimation on Human 3.6M images. """ import sys sys.path.append("../") from libs.hhr.config import cfg from libs.hhr.config import update_config from libs.hhr.utils.utils import get_model_summary from libs.model.pose_hrnet import get_pose_net from libs.hhr.utils.transforms import get_affine_transform from libs.hhr.core.loss import get_max_preds_soft_pt import torch import torch.nn.parallel import torch.backends.cudnn as cudnn import torchvision.transforms as transforms import argparse import os import logging import cv2 import numpy as np import matplotlib.pyplot as plt pose_connection = np.array([[0,1], [1,2], [2,3], [0,4], [4,5], [5,6], [0,7], [7,8], [8,9], [9,10], [8,11], [11,12], [12,13], [8,14], [14,15], [15,16]], dtype=np.int ) I = pose_connection[:, 0] J = pose_connection[:, 1] pose_color = np.array([[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], [170, 0, 255], [255, 0, 255] ])/255. re_order = [3, 12, 14, 16, 11, 13, 15, 1, 2, 0, 4, 5, 7, 9, 6, 8, 10] def show2Dpose(vals, ax, lcolor="#3498db", rcolor="#e74c3c", add_labels=False): for i in np.arange( len(I) ): x, y = [np.array([vals[I[i], j], vals[J[i], j]]) for j in range(2)] ax.plot(x, y, c=pose_color[i]) def parse_args(): parser = argparse.ArgumentParser(description='2D pose estimation example') parser.add_argument('--cfg', help='configuration file', default='./h36m2Dpose/cfgs.yaml', type=str) parser.add_argument('--data_path', help='path to pre-processed testing images', default='./h36m2Dpose/cropped', type=str) args = parser.parse_args() return args def xywh2cs(x, y, w, h, aspect_ratio=0.75): center = np.zeros((2), dtype=np.float32) center[0] = x + w * 0.5 center[1] = y + h * 0.5 if w > aspect_ratio * h: h = w * 1.0 / aspect_ratio elif w < aspect_ratio * h: w = h * aspect_ratio scale = np.array([w * 1.0 / 200, h * 1.0 / 200], dtype=np.float32) return center, scale def gather_inputs(args, logger, image_size = (288, 384)): root = args.data_path img_names = os.listdir(root) normalize = transforms.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] ) transform = transforms.Compose([ transforms.ToTensor(), normalize, ]) inputs = [] # these testing images were cropped from videos of subject 9 and 11 for name in img_names: pass image_file = os.path.join(root, name) data_numpy = cv2.imread(image_file, 1 | 128) data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB) if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) c, s = xywh2cs(0, 0, data_numpy.shape[1], data_numpy.shape[0]) r = 0 trans = get_affine_transform(c, s, r, image_size) input = cv2.warpAffine( data_numpy, trans, (image_size[0], image_size[1]), flags=cv2.INTER_LINEAR) inputs.append(transform(input).unsqueeze(0)) return torch.cat(inputs) def unnormalize(tensor): img = tensor.data.cpu().numpy() mean = np.array([0.485, 0.456, 0.406]).reshape(3, 1, 1) std = np.array([0.229, 0.224, 0.225]).reshape(3, 1, 1) img = np.transpose((((img * std) + mean) * 255).astype(np.uint8), (1, 2, 0)) return img def visualize(inputs, model): output = model(inputs) pred, max_vals = get_max_preds_soft_pt(output) pred = pred.data.cpu().numpy() pred = pred[:, re_order, :] plt.figure() for i in range(len(pred)): ax = plt.subplot(1, len(pred), i+1) ax.imshow(unnormalize(inputs[i])) ax.plot(pred[i][:, 0], pred[i][:, 1], 'ro') show2Dpose(pred[i], ax) return def main(): args = parse_args() update_config(cfg, args) logging.basicConfig(format='%(asctime)-15s %(message)s') logger = logging.getLogger() logger.setLevel(logging.INFO) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED model = get_pose_net(cfg, is_train=False) # load the pre-trained weights if not os.path.exists('./h36m2Dpose/final_state.pth'): logger.info('Please download the pre-trained model first.') return checkpoint = torch.load('./h36m2Dpose/final_state.pth') model.load_state_dict(checkpoint) dump_input = torch.rand( (1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0]) ) logger.info(get_model_summary(model, dump_input)) # modify the configuration file for multiple GPUs model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() inputs = gather_inputs(args, logger) visualize(inputs, model) if __name__ == '__main__': main() ================================================ FILE: examples/inference.py ================================================ """ Am examplar script showing inference on the newly collected images in U3DPW. """ import sys sys.path.append("../") import libs.model.model as libm from libs.dataset.h36m.data_utils import unNormalizeData import torch import numpy as np import imageio import matplotlib.pyplot as plt num_joints = 16 gt_3d = False pose_connection = [[0,1], [1,2], [2,3], [0,4], [4,5], [5,6], [0,7], [7,8], [8,9], [9,10], [8,11], [11,12], [12,13], [8, 14], [14, 15], [15,16]] # 16 out of 17 key-points are used as inputs in this examplar model re_order_indices= [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16] # paths data_dic_path = './example_annot.npy' model_path = './example_model.th' stats = np.load('./stats.npy', allow_pickle=True).item() dim_used_2d = stats['dim_use_2d'] mean_2d = stats['mean_2d'] std_2d = stats['std_2d'] # load the checkpoint and statistics ckpt = torch.load(model_path) data_dic = np.load(data_dic_path, allow_pickle=True).item() # initialize the model cascade = libm.get_cascade() input_size = 32 output_size = 48 for stage_id in range(2): # initialize a single deep learner stage_model = libm.get_model(stage_id + 1, refine_3d=False, norm_twoD=False, num_blocks=2, input_size=input_size, output_size=output_size, linear_size=1024, dropout=0.5, leaky=False) cascade.append(stage_model) cascade.load_state_dict(ckpt) cascade.eval() # process and show total_to_show examples count = 0 total_to_show = 10 def draw_skeleton(ax, skeleton, gt=False, add_index=True): for segment_idx in range(len(pose_connection)): point1_idx = pose_connection[segment_idx][0] point2_idx = pose_connection[segment_idx][1] point1 = skeleton[point1_idx] point2 = skeleton[point2_idx] color = 'k' if gt else 'r' plt.plot([int(point1[0]),int(point2[0])], [int(point1[1]),int(point2[1])], c=color, linewidth=2) if add_index: for (idx, re_order_idx) in enumerate(re_order_indices): plt.text(skeleton[re_order_idx][0], skeleton[re_order_idx][1], str(idx+1), color='b' ) return def normalize(skeleton, re_order=None): norm_skel = skeleton.copy() if re_order is not None: norm_skel = norm_skel[re_order].reshape(32) norm_skel = norm_skel.reshape(16, 2) mean_x = np.mean(norm_skel[:,0]) std_x = np.std(norm_skel[:,0]) mean_y = np.mean(norm_skel[:,1]) std_y = np.std(norm_skel[:,1]) denominator = (0.5*(std_x + std_y)) norm_skel[:,0] = (norm_skel[:,0] - mean_x)/denominator norm_skel[:,1] = (norm_skel[:,1] - mean_y)/denominator norm_skel = norm_skel.reshape(32) return norm_skel def get_pred(cascade, data): """ Get prediction from a cascaded model """ # forward pass to get prediction for the first stage num_stages = len(cascade) # for legacy code that does not have the num_blocks attribute for i in range(len(cascade)): cascade[i].num_blocks = len(cascade[i].res_blocks) prediction = cascade[0](data) # prediction for later stages for stage_idx in range(1, num_stages): prediction += cascade[stage_idx](data) return prediction def show3Dpose(channels, ax, lcolor="#3498db", rcolor="#e74c3c", add_labels=True, gt=False, pred=False ): vals = np.reshape( channels, (32, -1) ) I = np.array([1,2,3,1,7,8,1, 13,14,15,14,18,19,14,26,27])-1 # start points J = np.array([2,3,4,7,8,9,13,14,15,16,18,19,20,26,27,28])-1 # end points LR = np.array([1,1,1,0,0,0,0, 0, 0, 0, 0, 0, 0, 1, 1, 1], dtype=bool) # Make connection matrix for i in np.arange( len(I) ): x, y, z = [np.array( [vals[I[i], j], vals[J[i], j]] ) for j in range(3)] if gt or pred: color = 'k' if gt else 'r' ax.plot(x,y, z, lw=2, c=color) else: ax.plot(x,y, z, lw=2, c=lcolor if LR[i] else rcolor) RADIUS = 750 # space around the subject xroot, yroot, zroot = vals[0,0], vals[0,1], vals[0,2] ax.set_xlim3d([-RADIUS+xroot, RADIUS+xroot]) ax.set_zlim3d([-RADIUS+zroot, RADIUS+zroot]) ax.set_ylim3d([-RADIUS+yroot, RADIUS+yroot]) if add_labels: ax.set_xlabel("x") ax.set_ylabel("z") ax.set_zlabel("y") ax.set_aspect('equal') # Get rid of the panes (actually, make them white) white = (1.0, 1.0, 1.0, 0.0) ax.w_xaxis.set_pane_color(white) ax.w_yaxis.set_pane_color(white) # Get rid of the lines in 3d ax.w_xaxis.line.set_color(white) ax.w_yaxis.line.set_color(white) ax.w_zaxis.line.set_color(white) ax.invert_zaxis() return def re_order(skeleton): skeleton = skeleton.copy().reshape(-1,3) # permute the order of x,y,z axis skeleton[:,[0,1,2]] = skeleton[:, [0,2,1]] return skeleton.reshape(96) def plot_3d_ax(ax, elev, azim, pred, title=None ): ax.view_init(elev=elev, azim=azim) show3Dpose(re_order(pred), ax) plt.title(title) return def adjust_figure(left = 0, right = 1, bottom = 0.01, top = 0.95, wspace = 0, hspace = 0.4 ): plt.subplots_adjust(left, bottom, right, top, wspace, hspace) return for image_name in data_dic.keys(): image_path = './imgs/' + image_name img = imageio.imread(image_path) f = plt.figure(figsize=(9, 3)) ax1 = plt.subplot(131) ax1.imshow(img) plt.title('Input image') ax2 = plt.subplot(132) plt.title('2D key-point inputs: {:d}*2'.format(num_joints)) ax2.set_aspect('equal') ax2.invert_yaxis() skeleton_pred = None skeleton_2d = data_dic[image_name]['p2d'] # The order for the 2D keypoints is: # 'Hip', 'RHip', 'RKnee', 'RFoot', 'LHip', 'LKnee', 'LFoot', 'Spine', # 'Thorax', 'Neck/Nose', 'Head', 'LShoulder', 'LElbow', 'LWrist', 'RShoulder' # 'RElbow', 'RWrist' draw_skeleton(ax2, skeleton_2d, gt=True) plt.plot(skeleton_2d[:,0], skeleton_2d[:,1], 'ro', 2) # Nose was not used for this examplar model norm_ske_gt = normalize(skeleton_2d, re_order_indices).reshape(1,-1) pred = get_pred(cascade, torch.from_numpy(norm_ske_gt.astype(np.float32))) pred = unNormalizeData(pred.data.numpy(), stats['mean_3d'], stats['std_3d'], stats['dim_ignore_3d'] ) ax3 = plt.subplot(133, projection='3d') plot_3d_ax(ax=ax3, pred=pred, elev=10., azim=-90, title='3D prediction' ) adjust_figure(left = 0.05, right = 0.95, bottom = 0.08, top = 0.92, wspace = 0.3, hspace = 0.3 ) count += 1 if count >= total_to_show: break ================================================ FILE: libs/__init__.py ================================================ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Empty file. """ ================================================ FILE: libs/annotator/__init__.py ================================================ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Empty file. """ #import libs.dataset.h36m ================================================ FILE: libs/annotator/angle.py ================================================ import numpy as np import scipy.io as sio import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D bone_name = { 1: 'thorax to head top', 2: 'left shoulder to left elbow', 3: 'left elbow to left wrist', 4: 'right shoulder to right elbow', 5: 'right elbow to right wrist', 6: 'left hip to left knee', 7: 'left knee to left ankle', 8: 'right hip to right knee', 9: 'right knee to right ankle' } static_pose_path = "/media/nicholas/Database/Github/EvoSkeleton/resources/constraints/staticPose.npy" static_pose = np.load(static_pose_path, allow_pickle=True).item() di = static_pose['di'] a = static_pose['a'].reshape(3) di_indices = {2:5, 4:2, 6:13, 8:9} nt_parent_indices = [13, 17, 18, 25, 26, 6, 7, 1, 2] nt_child_indices = [15, 18, 19, 26, 27, 7, 8, 2, 3] def gram_schmidt_columns(X): B = np.zeros(X.shape) B[:, 0] = (1/np.linalg.norm(X[:, 0]))*X[:, 0] for i in range(1, 3): v = X[:, i] U = B[:, 0:i] # subspace basis which has already been orthonormalized pc = U.T @ v # orthogonal projection coefficients of v onto U p = U@pc v = v - p if np.linalg.norm(v) < 2e-16: # vectors are not linearly independent! raise ValueError else: v = normalize(v) B[:, i] = v return B def normalize(vector): return vector/np.linalg.norm(vector) def get_basis1(skeleton): """ get local coordinate system """ # compute the vector from the left shoulder to the right shoulder left_shoulder = skeleton[17] right_shoulder = skeleton[25] v1 = normalize(right_shoulder - left_shoulder) # compute the backbone vector from the thorax to the spine thorax = skeleton[13] spine = skeleton[12] v2 = normalize(spine - thorax) # v3 is the cross product of v1 and v2 (front-facing vector for upper-body) v3 = normalize(np.cross(v1, v2)) return v1, v2, v3 def get_normal(x1, a, x): nth = 1e-4 # x and a are parallel if np.linalg.norm(x - a) < nth or np.linalg.norm(x + a) < nth: n = np.cross(x, x1) flag = True else: n = np.cross(a, x) flag = False return normalize(n), flag def to_spherical(xyz): """ convert from Cartisian coordinate to spherical coordinate theta: [-pi, pi] phi: [-pi/2, pi/2] note that xyz should be float number """ # return in r, phi, and theta (elevation angle from z axis down) return_value = np.zeros(xyz.shape, dtype=xyz.dtype) xy = xyz[:,0]**2 + xyz[:,1]**2 return_value[:,0] = np.sqrt(xy + xyz[:,2]**2) # r return_value[:,1] = np.arctan2(xyz[:,1], xyz[:,0]) # theta return_value[:,2] = np.arctan2(xyz[:,2], np.sqrt(xy)) # phi return return_value def to_xyz(rthetaphi): """ convert from spherical coordinate to Cartisian coordinate theta: [0, 2*pi] or [-pi, pi] phi: [-pi/2, pi/2] """ return_value = np.zeros(rthetaphi.shape, dtype=rthetaphi.dtype) sintheta = np.sin(rthetaphi[:,1]) costheta = np.cos(rthetaphi[:,1]) sinphi = np.sin(rthetaphi[:,2]) cosphi = np.cos(rthetaphi[:,2]) return_value[:,0] = rthetaphi[:,0]*costheta*cosphi # x return_value[:,1] = rthetaphi[:,0]*sintheta*cosphi # y return_value[:,2] = rthetaphi[:,0]*sinphi #z return return_value ================================================ FILE: libs/annotator/fit_3d.py ================================================ """ Obtain 3D skeleton with 2D key-points as inputs using SMPLify Please run this script in Python 2 environment for now. TODO: transfer this tool to Python 3. """ from smpl_webuser.serialization import load_model from smplify.fit_3d import run_single_fit # you can use multi-processing to fit the images in parallel #from multiprocessing import Pool import matplotlib.pyplot as plt import numpy as np import argparse import cv2 import os # whether to use the regularization terms sph_regs = None # number of blend shapes to use n_betas = 1 # focal length of the camera flength = 5000 pix_thsh = 25 scale_factor = 1 # viewpoints for rendering do_degrees = [0.] def main(opt): model = load_model(opt.model_dir) annotation_path = os.path.join(opt.dataset_dir, 'annotation.npy') assert os.path.exists(annotation_path), "Please prepare the 2D annotation first." annotation = np.load(annotation_path, allow_pickle=True).item() if opt.save_image and not os.path.exists(os.path.join(opt.dataset_dir, 'fitted')): os.makedirs(os.path.join(opt.dataset_dir, 'fitted')) for (image_name, annots) in annotation.items(): assert 'p2d' in annots, "The image must be annotated with 2D key-points" joints_2d = annots['p2d'] # use 14 key-points for model fitting # one may adjust this number by considering different 2D-3D corespondence if joints_2d.shape[0] > 14: joints_2d = joints_2d[:14, :] # Prepare fitting parameters filling_list = range(12) + [13] conf = np.zeros(joints_2d.shape[0]) conf[filling_list] = 1.0 img = cv2.imread(os.path.join(opt.dataset_dir, image_name)) # Run single fit params, vis = run_single_fit( img, joints_2d, conf, model, regs=sph_regs, n_betas=n_betas, flength=flength, pix_thsh=pix_thsh, scale_factor=scale_factor, viz=opt.viz, do_degrees=do_degrees ) # Show result then close after 1 second f = plt.figure(figsize=(6, 3)) plt.subplot(121) plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) for di, deg in enumerate(do_degrees): plt.subplot(122) plt.cla() plt.imshow(vis[di]) plt.draw() plt.pause(1) # record the params annotation[image_name]['fitting_params'] = params # save fitted image if opt.save_image: image_name = image_name.replace(".jpg", ".png") img_save_path = os.path.join(opt.dataset_dir, 'fitted', image_name) f.savefig(img_save_path) plt.close(f) print("fitted image saved at ", img_save_path) # save the annotation dictionary with fitted parameters np.save(os.path.join(opt.dataset_dir, "fitted.npy"), annotation) print('3D prameters saved at ' + os.path.join(opt.dataset_dir, "fitted.npy")) if __name__ == '__main__': parser = argparse.ArgumentParser(description='2D Annotation') parser.add_argument('-dataset_dir', type=str) parser.add_argument('-model_dir', type=str) parser.add_argument('-save_image',default=True, type=bool) # visualize intermeadiate results parser.add_argument('-viz',default=False, type=bool) opt = parser.parse_args() main(opt) ================================================ FILE: libs/annotator/smpl-spec-list.txt ================================================ # This file may be used to create an environment using: # $ conda create --name --file # platform: linux-64 @EXPLICIT https://repo.anaconda.com/pkgs/main/linux-64/_libgcc_mutex-0.1-main.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/blas-1.0-mkl.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/ca-certificates-2020.6.24-0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/intel-openmp-2020.1-217.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/libgfortran-ng-7.3.0-hdf63c60_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/libstdcxx-ng-9.1.0-hdf63c60_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pandoc-2.9.2.1-0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/libgcc-ng-9.1.0-hdf63c60_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/mkl-2020.1-217.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/expat-2.2.9-he6710b0_2.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/icu-58.2-he6710b0_3.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/jpeg-9b-h024ee3a_2.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/libffi-3.3-he6710b0_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/libsodium-1.0.18-h7b6447c_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/libspatialindex-1.9.3-he6710b0_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/libuuid-1.0.3-h1bed415_2.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/libxcb-1.14-h7b6447c_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/ncurses-6.2-he6710b0_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/openssl-1.1.1g-h7b6447c_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pcre-8.44-he6710b0_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pixman-0.40.0-h7b6447c_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/xz-5.2.5-h7b6447c_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/yaml-0.1.7-had09818_2.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/zlib-1.2.11-h7b6447c_3.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/glib-2.65.0-h3eb4bd4_0.tar.bz2 https://repo.anaconda.com/pkgs/free/linux-64/hdf5-1.8.17-2.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/libedit-3.1.20191231-h7b6447c_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/libpng-1.6.37-hbc83047_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/libxml2-2.9.10-he19cac6_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/readline-8.0-h7b6447c_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/tk-8.6.10-hbc83047_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/zeromq-4.3.2-he6710b0_2.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/zstd-1.3.7-h0b5b093_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/dbus-1.13.16-hb2f20db_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/freetype-2.10.2-h5ab3b9f_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/gstreamer-1.14.0-hb31296c_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/libtiff-4.0.10-h2733197_2.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/sqlite-3.32.3-h62c20be_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/fontconfig-2.13.0-h9420a91_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/gst-plugins-base-1.14.0-hbbd80ab_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/python-2.7.18-h15b4118_1.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/alabaster-0.7.12-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/argh-0.26.2-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/asn1crypto-1.3.0-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/atomicwrites-1.4.0-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/attrs-19.3.0-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/backports-1.0-py_2.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/backports_abc-0.5-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/cairo-1.14.12-h8948797_3.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/certifi-2019.11.28-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/chardet-3.0.4-py27_1003.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/cloudpickle-1.2.2-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/contextlib2-0.6.0.post1-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/decorator-4.4.2-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/defusedxml-0.6.0-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/diff-match-patch-20181111-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/docutils-0.15.2-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/enum34-1.1.6-py27_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/functools32-3.2.3.2-py27_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/future-0.18.2-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/futures-3.3.0-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/idna-2.10-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/imagesize-1.2.0-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/ipaddress-1.0.23-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/ipython_genutils-0.2.0-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/lazy-object-proxy-1.4.3-py27h7b6447c_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/markupsafe-1.1.1-py27h7b6447c_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/mccabe-0.6.1-py27_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/mistune-0.8.4-py27h7b6447c_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pandocfilters-1.4.2-py27_1.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/parso-0.5.2-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/pathtools-0.1.2-py_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/psutil-5.6.7-py27h7b6447c_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/ptyprocess-0.6.0-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pycodestyle-2.5.0-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/pycparser-2.20-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pyflakes-2.1.1-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/pyparsing-2.4.7-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pysocks-1.7.1-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/pytz-2020.1-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/pyxdg-0.26-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pyyaml-5.2-py27h7b6447c_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pyzmq-18.1.0-py27he6710b0_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/qdarkstyle-2.8.1-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/qt-5.9.7-h5867ecd_1.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/qtpy-1.9.0-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/rope-0.17.0-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/rtree-0.8.3-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/scandir-1.10.0-py27h7b6447c_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/selectors2-2.0.1-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/simplegeneric-0.8.1-py27_2.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/sip-4.19.8-py27hf484d3e_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/six-1.15.0-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/snowballstemmer-2.0.0-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/sortedcontainers-2.2.2-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/sphinxcontrib-1.0-py27_1.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/testpath-0.4.4-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/typing-3.7.4.1-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/ujson-1.35-py27h14c3975_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/wcwidth-0.2.5-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/webencodings-0.5.1-py27_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/wrapt-1.11.2-py27h7b6447c_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/yapf-0.30.0-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/autopep8-1.4.4-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/babel-2.8.0-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/backports.shutil_get_terminal_size-1.0.0-py27_2.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/cffi-1.14.0-py27he30daa8_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/configparser-4.0.2-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/free/linux-64/harfbuzz-0.9.39-1.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/intervaltree-3.0.2-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/jedi-0.14.1-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/mkl-service-2.3.0-py27he904b0f_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/more-itertools-5.0.0-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/packaging-20.4-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pathlib2-2.3.5-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pexpect-4.7.0-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pyqt-5.9.2-py27h05f1152_2.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pyrsistent-0.15.6-py27h7b6447c_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/python-dateutil-2.8.1-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/python-jsonrpc-server-0.3.4-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/qtawesome-0.7.2-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/setuptools-44.0.0-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/singledispatch-3.4.0.3-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/sphinxcontrib-websupport-1.2.3-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/traitlets-4.3.3-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/watchdog-0.9.0-py27_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/wurlitzer-2.0.0-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/backports.functools_lru_cache-1.6.1-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/bleach-3.1.5-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/cryptography-2.8-py27h1ba5d50_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/entrypoints-0.3-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/jinja2-2.11.2-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/jupyter_core-4.6.1-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/numpy-base-1.16.6-py27hde5b4d6_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pickleshare-0.7.5-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pydocstyle-3.0.0-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/pygments-2.5.2-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/tornado-5.1.1-py27h7b6447c_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/wheel-0.33.6-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/zipp-0.6.0-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/astroid-1.6.5-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/flake8-3.7.9-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/importlib_metadata-1.3.0-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/isort-4.3.21-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/jupyter_client-5.3.4-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pip-19.3.1-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/prompt_toolkit-1.0.15-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pyopenssl-19.1.0-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/ipython-5.8.0-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/jsonschema-3.2.0-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pluggy-0.13.1-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pylint-1.9.2-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/urllib3-1.25.7-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/ipykernel-4.10.0-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/nbformat-4.4.0-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/python-language-server-0.31.2-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/requests-2.24.0-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/nbconvert-5.6.1-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/qtconsole-4.7.5-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/sphinx-1.8.5-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/spyder-kernels-1.8.1-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/numpydoc-1.0.0-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/spyder-4.0.1-py27_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/mkl_fft-1.0.15-py27ha843d7b_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/mkl_random-1.1.0-py27hd6b4f25_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/numpy-1.16.6-py27hbc911f0_0.tar.bz2 https://conda.anaconda.org/menpo/linux-64/opencv3-3.1.0-py27_0.tar.bz2 ================================================ FILE: libs/annotator/smpl_webuser/LICENSE.txt ================================================ Please read carefully the following terms and conditions and any accompanying documentation before you download and/or use the SMPL body model and software, (the "Model"), including 3D meshes, blend weights blend shapes, textures, software, scripts, and animations. By downloading and/or using the Model, you acknowledge that you have read these terms and conditions, understand them, and agree to be bound by them. If you do not agree with these terms and conditions, you must not download and/or use the Model. Ownership The Model has been developed at the Max Planck Institute for Intelligent Systems (hereinafter "MPI") and is owned by and proprietary material of the Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (hereinafter “MPG”; MPI and MPG hereinafter collectively “Max-Planck”). License Grant Max-Planck grants you a non-exclusive, non-transferable, free of charge right: To download the Model and use it on computers owned, leased or otherwise controlled by you and/or your organisation; To use the Model for the sole purpose of performing non-commercial scientific research, non-commercial education, or non-commercial artistic projects. Any other use, in particular any use for commercial purposes, is prohibited. This includes, without limitation, incorporation in a commercial product, use in a commercial service, as training data for a commercial product, for commercial ergonomic analysis (e.g. product design, architectural design, etc.), or production of other artifacts for commercial purposes including, for example, web services, movies, television programs, mobile applications, or video games. The Model may not be used for pornographic purposes or to generate pornographic material whether commercial or not. This license also prohibits the use of the Model to train methods/algorithms/neural networks/etc. for commercial use of any kind. The Model may not be reproduced, modified and/or made available in any form to any third party without Max-Planck’s prior written permission. By downloading the Model, you agree not to reverse engineer it. Disclaimer of Representations and Warranties You expressly acknowledge and agree that the Model results from basic research, is provided “AS IS”, may contain errors, and that any use of the Model is at your sole risk. MAX-PLANCK MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE MODEL, NEITHER EXPRESS NOR IMPLIED, AND THE ABSENCE OF ANY LEGAL OR ACTUAL DEFECTS, WHETHER DISCOVERABLE OR NOT. Specifically, and not to limit the foregoing, Max-Planck makes no representations or warranties (i) regarding the merchantability or fitness for a particular purpose of the Model, (ii) that the use of the Model will not infringe any patents, copyrights or other intellectual property rights of a third party, and (iii) that the use of the Model will not cause any damage of any kind to you or a third party. Limitation of Liability Under no circumstances shall Max-Planck be liable for any incidental, special, indirect or consequential damages arising out of or relating to this license, including but not limited to, any lost profits, business interruption, loss of programs or other data, or all other commercial damages or losses, even if advised of the possibility thereof. No Maintenance Services You understand and agree that Max-Planck is under no obligation to provide either maintenance services, update services, notices of latent defects, or corrections of defects with regard to the Model. Max-Planck nevertheless reserves the right to update, modify, or discontinue the Model at any time. Publication with SMPL You agree to cite the most recent paper describing the model as specified on the download website. This website lists the most up to date bibliographic information on the about page. Media projects with SMPL When using SMPL in a media project please give credit to Max Planck Institute for Intelligent Systems. For example: SMPL was used for character animation courtesy of the Max Planck Institute for Intelligent Systems. Commercial licensing opportunities For commercial use in the fields of medicine, psychology, and biomechanics, please contact ps-license@tue.mpg.de. For commercial use in all other fields please contact Body Labs Inc at smpl@bodylabs.com ================================================ FILE: libs/annotator/smpl_webuser/README.txt ================================================ License: ======== To learn about SMPL, please visit our website: http://smpl.is.tue.mpg You can find the SMPL paper at: http://files.is.tue.mpg.de/black/papers/SMPL2015.pdf Visit our downloads page to download some sample animation files (FBX), and python code: http://smpl.is.tue.mpg/downloads For comments or questions, please email us at: smpl@tuebingen.mpg.de System Requirements: ==================== Operating system: OSX, Linux Python Dependencies: - Numpy & Scipy [http://www.scipy.org/scipylib/download.html] - Chumpy [https://github.com/mattloper/chumpy] - OpenCV [http://opencv.org/downloads.html] Getting Started: ================ 1. Extract the Code: -------------------- Extract the 'smpl.zip' file to your home directory (or any other location you wish) 2. Set the PYTHONPATH: ---------------------- We need to update the PYTHONPATH environment variable so that the system knows how to find the SMPL code. Add the following lines to your ~/.bash_profile file (create it if it doesn't exist; Linux users might have ~/.bashrc file instead), replacing ~/smpl with the location where you extracted the smpl.zip file: SMPL_LOCATION=~/smpl export PYTHONPATH=$PYTHONPATH:$SMPL_LOCATION Open a new terminal window to check if the python path has been updated by typing the following: > echo $PYTHONPATH 3. Run the Hello World scripts: ------------------------------- In the new Terminal window, navigate to the smpl/smpl_webuser/hello_world directory. You can run the hello world scripts now by typing the following: > python hello_smpl.py OR > python render_smpl.py Note: Both of these scripts will require the dependencies listed above. The scripts are provided as a sample to help you get started. ================================================ FILE: libs/annotator/smpl_webuser/__init__.py ================================================ ''' Copyright 2015 Matthew Loper, Naureen Mahmood and the Max Planck Gesellschaft. All rights reserved. This software is provided for research purposes only. By using this software you agree to the terms of the SMPL Model license here http://smpl.is.tue.mpg.de/license More information about SMPL is available here http://smpl.is.tue.mpg. For comments or questions, please email us at: smpl@tuebingen.mpg.de About this file: ================ This is an initialization file to help python look for submodules in this directory. ''' ================================================ FILE: libs/annotator/smpl_webuser/hello_world/hello_smpl.py ================================================ ''' Copyright 2015 Matthew Loper, Naureen Mahmood and the Max Planck Gesellschaft. All rights reserved. This software is provided for research purposes only. By using this software you agree to the terms of the SMPL Model license here http://smpl.is.tue.mpg.de/license More information about SMPL is available here http://smpl.is.tue.mpg. For comments or questions, please email us at: smpl@tuebingen.mpg.de Please Note: ============ This is a demo version of the script for driving the SMPL model with python. We would be happy to receive comments, help and suggestions on improving this code and in making it available on more platforms. System Requirements: ==================== Operating system: OSX, Linux Python Dependencies: - Numpy & Scipy [http://www.scipy.org/scipylib/download.html] - Chumpy [https://github.com/mattloper/chumpy] About the Script: ================= This script demonstrates a few basic functions to help users get started with using the SMPL model. The code shows how to: - Load the SMPL model - Edit pose & shape parameters of the model to create a new body in a new pose - Save the resulting body as a mesh in .OBJ format Running the Hello World code: ============================= Inside Terminal, navigate to the smpl/webuser/hello_world directory. You can run the hello world script now by typing the following: > python hello_smpl.py ''' from smpl_webuser.serialization import load_model import numpy as np ## Load SMPL model (here we load the female model) ## Make sure path is correct m = load_model( '../../models/basicModel_f_lbs_10_207_0_v1.0.0.pkl' ) ## Assign random pose and shape parameters m.pose[:] = np.random.rand(m.pose.size) * .2 m.betas[:] = np.random.rand(m.betas.size) * .03 ## Write to an .obj file outmesh_path = './hello_smpl.obj' with open( outmesh_path, 'w') as fp: for v in m.r: fp.write( 'v %f %f %f\n' % ( v[0], v[1], v[2]) ) for f in m.f+1: # Faces are 1-based, not 0-based in obj files fp.write( 'f %d %d %d\n' % (f[0], f[1], f[2]) ) ## Print message print '..Output mesh saved to: ', outmesh_path ================================================ FILE: libs/annotator/smpl_webuser/hello_world/render_smpl.py ================================================ ''' Copyright 2015 Matthew Loper, Naureen Mahmood and the Max Planck Gesellschaft. All rights reserved. This software is provided for research purposes only. By using this software you agree to the terms of the SMPL Model license here http://smpl.is.tue.mpg.de/license More information about SMPL is available here http://smpl.is.tue.mpg. For comments or questions, please email us at: smpl@tuebingen.mpg.de Please Note: ============ This is a demo version of the script for driving the SMPL model with python. We would be happy to receive comments, help and suggestions on improving this code and in making it available on more platforms. System Requirements: ==================== Operating system: OSX, Linux Python Dependencies: - Numpy & Scipy [http://www.scipy.org/scipylib/download.html] - Chumpy [https://github.com/mattloper/chumpy] - OpenCV [http://opencv.org/downloads.html] --> (alternatively: matplotlib [http://matplotlib.org/downloads.html]) About the Script: ================= This script demonstrates loading the smpl model and rendering it using OpenDR to render and OpenCV to display (or alternatively matplotlib can also be used for display, as shown in commented code below). This code shows how to: - Load the SMPL model - Edit pose & shape parameters of the model to create a new body in a new pose - Create an OpenDR scene (with a basic renderer, camera & light) - Render the scene using OpenCV / matplotlib Running the Hello World code: ============================= Inside Terminal, navigate to the smpl/webuser/hello_world directory. You can run the hello world script now by typing the following: > python render_smpl.py ''' import numpy as np from opendr.renderer import ColoredRenderer from opendr.lighting import LambertianPointLight from opendr.camera import ProjectPoints from smpl_webuser.serialization import load_model ## Load SMPL model (here we load the female model) m = load_model('../../models/basicModel_f_lbs_10_207_0_v1.0.0.pkl') ## Assign random pose and shape parameters m.pose[:] = np.random.rand(m.pose.size) * .2 m.betas[:] = np.random.rand(m.betas.size) * .03 m.pose[0] = np.pi ## Create OpenDR renderer rn = ColoredRenderer() ## Assign attributes to renderer w, h = (640, 480) rn.camera = ProjectPoints(v=m, rt=np.zeros(3), t=np.array([0, 0, 2.]), f=np.array([w,w])/2., c=np.array([w,h])/2., k=np.zeros(5)) rn.frustum = {'near': 1., 'far': 10., 'width': w, 'height': h} rn.set(v=m, f=m.f, bgcolor=np.zeros(3)) ## Construct point light source rn.vc = LambertianPointLight( f=m.f, v=rn.v, num_verts=len(m), light_pos=np.array([-1000,-1000,-2000]), vc=np.ones_like(m)*.9, light_color=np.array([1., 1., 1.])) ## Show it using OpenCV import cv2 cv2.imshow('render_SMPL', rn.r) print ('..Print any key while on the display window') cv2.waitKey(0) cv2.destroyAllWindows() ## Could also use matplotlib to display # import matplotlib.pyplot as plt # plt.ion() # plt.imshow(rn.r) # plt.show() # import pdb; pdb.set_trace() ================================================ FILE: libs/annotator/smpl_webuser/lbs.py ================================================ ''' Copyright 2015 Matthew Loper, Naureen Mahmood and the Max Planck Gesellschaft. All rights reserved. This software is provided for research purposes only. By using this software you agree to the terms of the SMPL Model license here http://smpl.is.tue.mpg.de/license More information about SMPL is available here http://smpl.is.tue.mpg. For comments or questions, please email us at: smpl@tuebingen.mpg.de About this file: ================ This file defines linear blend skinning for the SMPL loader which defines the effect of bones and blendshapes on the vertices of the template mesh. Modules included: - global_rigid_transformation: computes global rotation & translation of the model - verts_core: [overloaded function inherited from verts.verts_core] computes the blending of joint-influences for each vertex based on type of skinning ''' from smpl_webuser.posemapper import posemap import chumpy import numpy as np def global_rigid_transformation(pose, J, kintree_table, xp): results = {} pose = pose.reshape((-1,3)) id_to_col = {kintree_table[1,i] : i for i in range(kintree_table.shape[1])} parent = {i : id_to_col[kintree_table[0,i]] for i in range(1, kintree_table.shape[1])} if xp == chumpy: from smpl_webuser.posemapper import Rodrigues rodrigues = lambda x : Rodrigues(x) else: import cv2 rodrigues = lambda x : cv2.Rodrigues(x)[0] with_zeros = lambda x : xp.vstack((x, xp.array([[0.0, 0.0, 0.0, 1.0]]))) results[0] = with_zeros(xp.hstack((rodrigues(pose[0,:]), J[0,:].reshape((3,1))))) for i in range(1, kintree_table.shape[1]): results[i] = results[parent[i]].dot(with_zeros(xp.hstack(( rodrigues(pose[i,:]), ((J[i,:] - J[parent[i],:]).reshape((3,1))) )))) pack = lambda x : xp.hstack([np.zeros((4, 3)), x.reshape((4,1))]) results = [results[i] for i in sorted(results.keys())] results_global = results if True: results2 = [results[i] - (pack( results[i].dot(xp.concatenate( ( (J[i,:]), 0 ) ))) ) for i in range(len(results))] results = results2 result = xp.dstack(results) return result, results_global def verts_core(pose, v, J, weights, kintree_table, want_Jtr=False, xp=chumpy): A, A_global = global_rigid_transformation(pose, J, kintree_table, xp) T = A.dot(weights.T) rest_shape_h = xp.vstack((v.T, np.ones((1, v.shape[0])))) v =(T[:,0,:] * rest_shape_h[0, :].reshape((1, -1)) + T[:,1,:] * rest_shape_h[1, :].reshape((1, -1)) + T[:,2,:] * rest_shape_h[2, :].reshape((1, -1)) + T[:,3,:] * rest_shape_h[3, :].reshape((1, -1))).T v = v[:,:3] if not want_Jtr: return v Jtr = xp.vstack([g[:3,3] for g in A_global]) return (v, Jtr) ================================================ FILE: libs/annotator/smpl_webuser/posemapper.py ================================================ ''' Copyright 2015 Matthew Loper, Naureen Mahmood and the Max Planck Gesellschaft. All rights reserved. This software is provided for research purposes only. By using this software you agree to the terms of the SMPL Model license here http://smpl.is.tue.mpg.de/license More information about SMPL is available here http://smpl.is.tue.mpg. For comments or questions, please email us at: smpl@tuebingen.mpg.de About this file: ================ This module defines the mapping of joint-angles to pose-blendshapes. Modules included: - posemap: computes the joint-to-pose blend shape mapping given a mapping type as input ''' import chumpy as ch import numpy as np import cv2 class Rodrigues(ch.Ch): dterms = 'rt' def compute_r(self): return cv2.Rodrigues(self.rt.r)[0] def compute_dr_wrt(self, wrt): if wrt is self.rt: return cv2.Rodrigues(self.rt.r)[1].T def lrotmin(p): if isinstance(p, np.ndarray): p = p.ravel()[3:] return np.concatenate([(cv2.Rodrigues(np.array(pp))[0]-np.eye(3)).ravel() for pp in p.reshape((-1,3))]).ravel() if p.ndim != 2 or p.shape[1] != 3: p = p.reshape((-1,3)) p = p[1:] return ch.concatenate([(Rodrigues(pp)-ch.eye(3)).ravel() for pp in p]).ravel() def posemap(s): if s == 'lrotmin': return lrotmin else: raise Exception('Unknown posemapping: %s' % (str(s),)) ================================================ FILE: libs/annotator/smpl_webuser/serialization.py ================================================ ''' Copyright 2015 Matthew Loper, Naureen Mahmood and the Max Planck Gesellschaft. All rights reserved. This software is provided for research purposes only. By using this software you agree to the terms of the SMPL Model license here http://smpl.is.tue.mpg.de/license More information about SMPL is available here http://smpl.is.tue.mpg. For comments or questions, please email us at: smpl@tuebingen.mpg.de About this file: ================ This file defines the serialization functions of the SMPL model. Modules included: - save_model: saves the SMPL model to a given file location as a .pkl file - load_model: loads the SMPL model from a given file location (i.e. a .pkl file location), or a dictionary object. ''' __all__ = ['load_model', 'save_model'] import numpy as np import pickle import chumpy as ch from chumpy.ch import MatVecMult from smpl_webuser.posemapper import posemap from smpl_webuser.verts import verts_core def save_model(model, fname): m0 = model trainer_dict = {'v_template': np.asarray(m0.v_template),'J': np.asarray(m0.J),'weights': np.asarray(m0.weights),'kintree_table': m0.kintree_table,'f': m0.f, 'bs_type': m0.bs_type, 'posedirs': np.asarray(m0.posedirs)} if hasattr(model, 'J_regressor'): trainer_dict['J_regressor'] = m0.J_regressor if hasattr(model, 'J_regressor_prior'): trainer_dict['J_regressor_prior'] = m0.J_regressor_prior if hasattr(model, 'weights_prior'): trainer_dict['weights_prior'] = m0.weights_prior if hasattr(model, 'shapedirs'): trainer_dict['shapedirs'] = m0.shapedirs if hasattr(model, 'vert_sym_idxs'): trainer_dict['vert_sym_idxs'] = m0.vert_sym_idxs if hasattr(model, 'bs_style'): trainer_dict['bs_style'] = model.bs_style else: trainer_dict['bs_style'] = 'lbs' pickle.dump(trainer_dict, open(fname, 'w'), -1) def backwards_compatibility_replacements(dd): # replacements if 'default_v' in dd: dd['v_template'] = dd['default_v'] del dd['default_v'] if 'template_v' in dd: dd['v_template'] = dd['template_v'] del dd['template_v'] if 'joint_regressor' in dd: dd['J_regressor'] = dd['joint_regressor'] del dd['joint_regressor'] if 'blendshapes' in dd: dd['posedirs'] = dd['blendshapes'] del dd['blendshapes'] if 'J' not in dd: dd['J'] = dd['joints'] del dd['joints'] # defaults if 'bs_style' not in dd: dd['bs_style'] = 'lbs' def ready_arguments(fname_or_dict): if not isinstance(fname_or_dict, dict): dd = pickle.load(open(fname_or_dict, 'rb')) # dd = pickle.load(open(fname_or_dict, 'rb'), encoding='latin1') else: dd = fname_or_dict backwards_compatibility_replacements(dd) want_shapemodel = 'shapedirs' in dd nposeparms = dd['kintree_table'].shape[1]*3 if 'trans' not in dd: dd['trans'] = np.zeros(3) if 'pose' not in dd: dd['pose'] = np.zeros(nposeparms) if 'shapedirs' in dd and 'betas' not in dd: dd['betas'] = np.zeros(dd['shapedirs'].shape[-1]) for s in ['v_template', 'weights', 'posedirs', 'pose', 'trans', 'shapedirs', 'betas', 'J']: if (s in dd) and not hasattr(dd[s], 'dterms'): dd[s] = ch.array(dd[s]) if want_shapemodel: dd['v_shaped'] = dd['shapedirs'].dot(dd['betas'])+dd['v_template'] v_shaped = dd['v_shaped'] J_tmpx = MatVecMult(dd['J_regressor'], v_shaped[:,0]) J_tmpy = MatVecMult(dd['J_regressor'], v_shaped[:,1]) J_tmpz = MatVecMult(dd['J_regressor'], v_shaped[:,2]) dd['J'] = ch.vstack((J_tmpx, J_tmpy, J_tmpz)).T dd['v_posed'] = v_shaped + dd['posedirs'].dot(posemap(dd['bs_type'])(dd['pose'])) else: dd['v_posed'] = dd['v_template'] + dd['posedirs'].dot(posemap(dd['bs_type'])(dd['pose'])) return dd def load_model(fname_or_dict): dd = ready_arguments(fname_or_dict) args = { 'pose': dd['pose'], 'v': dd['v_posed'], 'J': dd['J'], 'weights': dd['weights'], 'kintree_table': dd['kintree_table'], 'xp': ch, 'want_Jtr': True, 'bs_style': dd['bs_style'] } result, Jtr = verts_core(**args) result = result + dd['trans'].reshape((1,3)) result.J_transformed = Jtr + dd['trans'].reshape((1,3)) for k, v in dd.items(): setattr(result, k, v) return result ================================================ FILE: libs/annotator/smpl_webuser/verts.py ================================================ ''' Copyright 2015 Matthew Loper, Naureen Mahmood and the Max Planck Gesellschaft. All rights reserved. This software is provided for research purposes only. By using this software you agree to the terms of the SMPL Model license here http://smpl.is.tue.mpg.de/license More information about SMPL is available here http://smpl.is.tue.mpg. For comments or questions, please email us at: smpl@tuebingen.mpg.de About this file: ================ This file defines the basic skinning modules for the SMPL loader which defines the effect of bones and blendshapes on the vertices of the template mesh. Modules included: - verts_decorated: creates an instance of the SMPL model which inherits model attributes from another SMPL model. - verts_core: [overloaded function inherited by lbs.verts_core] computes the blending of joint-influences for each vertex based on type of skinning ''' import chumpy import smpl_webuser.lbs as lbs from smpl_webuser.posemapper import posemap import scipy.sparse as sp from chumpy.ch import MatVecMult def ischumpy(x): return hasattr(x, 'dterms') def verts_decorated(trans, pose, v_template, J, weights, kintree_table, bs_style, f, bs_type=None, posedirs=None, betas=None, shapedirs=None, want_Jtr=False): for which in [trans, pose, v_template, weights, posedirs, betas, shapedirs]: if which is not None: assert ischumpy(which) v = v_template if shapedirs is not None: if betas is None: betas = chumpy.zeros(shapedirs.shape[-1]) v_shaped = v + shapedirs.dot(betas) else: v_shaped = v if posedirs is not None: v_posed = v_shaped + posedirs.dot(posemap(bs_type)(pose)) else: v_posed = v_shaped v = v_posed if sp.issparse(J): regressor = J J_tmpx = MatVecMult(regressor, v_shaped[:,0]) J_tmpy = MatVecMult(regressor, v_shaped[:,1]) J_tmpz = MatVecMult(regressor, v_shaped[:,2]) J = chumpy.vstack((J_tmpx, J_tmpy, J_tmpz)).T else: assert(ischumpy(J)) assert(bs_style=='lbs') result, Jtr = lbs.verts_core(pose, v, J, weights, kintree_table, want_Jtr=True, xp=chumpy) tr = trans.reshape((1,3)) result = result + tr Jtr = Jtr + tr result.trans = trans result.f = f result.pose = pose result.v_template = v_template result.J = J result.weights = weights result.kintree_table = kintree_table result.bs_style = bs_style result.bs_type =bs_type if posedirs is not None: result.posedirs = posedirs result.v_posed = v_posed if shapedirs is not None: result.shapedirs = shapedirs result.betas = betas result.v_shaped = v_shaped if want_Jtr: result.J_transformed = Jtr return result def verts_core(pose, v, J, weights, kintree_table, bs_style, want_Jtr=False, xp=chumpy): if xp == chumpy: assert(hasattr(pose, 'dterms')) assert(hasattr(v, 'dterms')) assert(hasattr(J, 'dterms')) assert(hasattr(weights, 'dterms')) assert(bs_style=='lbs') result = lbs.verts_core(pose, v, J, weights, kintree_table, want_Jtr, xp) return result ================================================ FILE: libs/annotator/smplify/__init__.py ================================================ ================================================ FILE: libs/annotator/smplify/fit_3d.py ================================================ """ Copyright 2016 Max Planck Society, Federica Bogo, Angjoo Kanazawa. All rights reserved. This software is provided for research purposes only. By using this software you agree to the terms of the SMPLify license here: http://smplify.is.tue.mpg.de/license About this Script: ============ This is a demo version of the algorithm implemented in the paper, which fits the SMPL body model to the image given the joint detections. The code is organized to be run on the LSP dataset. See README to see how to download images and the detected joints. """ from os.path import join, exists, abspath, dirname from os import makedirs import logging import pickle from time import time from glob import glob import argparse import cv2 import numpy as np import chumpy as ch from opendr.camera import ProjectPoints from .lib.robustifiers import GMOf from .lib.sphere_collisions import SphereCollisions from .lib.max_mixture_prior import MaxMixtureCompletePrior from .render_model import render_model from smpl_webuser.serialization import load_model from smpl_webuser.lbs import global_rigid_transformation from smpl_webuser.verts import verts_decorated _LOGGER = logging.getLogger(__name__) # Mapping from LSP joints to SMPL joints. # 0 Right ankle 8 # 1 Right knee 5 # 2 Right hip 2 # 3 Left hip 1 # 4 Left knee 4 # 5 Left ankle 7 # 6 Right wrist 21 # 7 Right elbow 19 # 8 Right shoulder 17 # 9 Left shoulder 16 # 10 Left elbow 18 # 11 Left wrist 20 # 12 Neck - # 13 Head top added # --------------------Camera estimation -------------------- def guess_init(model, focal_length, j2d, init_pose): """Initialize the camera translation via triangle similarity, by using the torso joints . :param model: SMPL model :param focal_length: camera focal length (kept fixed) :param j2d: 14x2 array of CNN joints :param init_pose: 72D vector of pose parameters used for initialization (kept fixed) :returns: 3D vector corresponding to the estimated camera translation """ cids = np.arange(0, 12) # map from LSP to SMPL joints j2d_here = j2d[cids] smpl_ids = [8, 5, 2, 1, 4, 7, 21, 19, 17, 16, 18, 20] opt_pose = ch.array(init_pose) (_, A_global) = global_rigid_transformation( opt_pose, model.J, model.kintree_table, xp=ch) Jtr = ch.vstack([g[:3, 3] for g in A_global]) Jtr = Jtr[smpl_ids].r # 9 is L shoulder, 3 is L hip # 8 is R shoulder, 2 is R hip diff3d = np.array([Jtr[9] - Jtr[3], Jtr[8] - Jtr[2]]) mean_height3d = np.mean(np.sqrt(np.sum(diff3d**2, axis=1))) diff2d = np.array([j2d_here[9] - j2d_here[3], j2d_here[8] - j2d_here[2]]) mean_height2d = np.mean(np.sqrt(np.sum(diff2d**2, axis=1))) est_d = focal_length * (mean_height3d / mean_height2d) # just set the z value init_t = np.array([0., 0., est_d]) return init_t def initialize_camera(model, j2d, img, init_pose, flength=5000., pix_thsh=25., viz=False): """Initialize camera translation and body orientation :param model: SMPL model :param j2d: 14x2 array of CNN joints :param img: h x w x 3 image :param init_pose: 72D vector of pose parameters used for initialization :param flength: camera focal length (kept fixed) :param pix_thsh: threshold (in pixel), if the distance between shoulder joints in 2D is lower than pix_thsh, the body orientation as ambiguous (so a fit is run on both the estimated one and its flip) :param viz: boolean, if True enables visualization during optimization :returns: a tuple containing the estimated camera, a boolean deciding if both the optimized body orientation and its flip should be considered, 3D vector for the body orientation """ # optimize camera translation and body orientation based on torso joints # LSP torso ids: # 2=right hip, 3=left hip, 8=right shoulder, 9=left shoulder torso_cids = [2, 3, 8, 9] # corresponding SMPL torso ids torso_smpl_ids = [2, 1, 17, 16] center = np.array([img.shape[1] / 2, img.shape[0] / 2]) # initialize camera rotation rt = ch.zeros(3) # initialize camera translation _LOGGER.info('initializing translation via similar triangles') init_t = guess_init(model, flength, j2d, init_pose) t = ch.array(init_t) # check how close the shoulder joints are try_both_orient = np.linalg.norm(j2d[8] - j2d[9]) < pix_thsh opt_pose = ch.array(init_pose) (_, A_global) = global_rigid_transformation( opt_pose, model.J, model.kintree_table, xp=ch) Jtr = ch.vstack([g[:3, 3] for g in A_global]) # initialize the camera cam = ProjectPoints( f=np.array([flength, flength]), rt=rt, t=t, k=np.zeros(5), c=center) # we are going to project the SMPL joints cam.v = Jtr if viz: viz_img = img.copy() # draw the target (CNN) joints for coord in np.around(j2d).astype(int): if (coord[0] < img.shape[1] and coord[0] >= 0 and coord[1] < img.shape[0] and coord[1] >= 0): cv2.circle(viz_img, tuple(coord), 3, [0, 255, 0]) import matplotlib.pyplot as plt plt.ion() # draw optimized joints at each iteration def on_step(_): """Draw a visualization.""" plt.figure(1, figsize=(5, 5)) plt.subplot(1, 1, 1) viz_img = img.copy() for coord in np.around(cam.r[torso_smpl_ids]).astype(int): if (coord[0] < viz_img.shape[1] and coord[0] >= 0 and coord[1] < viz_img.shape[0] and coord[1] >= 0): cv2.circle(viz_img, tuple(coord), 3, [0, 0, 255]) plt.imshow(viz_img[:, :, ::-1]) plt.draw() plt.show() plt.pause(1e-3) else: on_step = None # optimize for camera translation and body orientation free_variables = [cam.t, opt_pose[:3]] ch.minimize( # data term defined over torso joints... {'cam': j2d[torso_cids] - cam[torso_smpl_ids], # ...plus a regularizer for the camera translation 'cam_t': 1e2 * (cam.t[2] - init_t[2])}, x0=free_variables, method='dogleg', callback=on_step, options={'maxiter': 100, 'e_3': .0001, # disp set to 1 enables verbose output from the optimizer 'disp': 0}) if viz: plt.ioff() return (cam, try_both_orient, opt_pose[:3].r) # --------------------Core optimization -------------------- def optimize_on_joints(j2d, model, cam, img, prior, try_both_orient, body_orient, n_betas=10, regs=None, conf=None, viz=False): """Fit the model to the given set of joints, given the estimated camera :param j2d: 14x2 array of CNN joints :param model: SMPL model :param cam: estimated camera :param img: h x w x 3 image :param prior: mixture of gaussians pose prior :param try_both_orient: boolean, if True both body_orient and its flip are considered for the fit :param body_orient: 3D vector, initialization for the body orientation :param n_betas: number of shape coefficients considered during optimization :param regs: regressors for capsules' axis and radius, if not None enables the interpenetration error term :param conf: 14D vector storing the confidence values from the CNN :param viz: boolean, if True enables visualization during optimization :returns: a tuple containing the optimized model, its joints projected on image space, the camera translation """ t0 = time() # define the mapping LSP joints -> SMPL joints # cids are joints ids for LSP: cids = range(12) + [13] # joint ids for SMPL # SMPL does not have a joint for head, instead we use a vertex for the head # and append it later. smpl_ids = [8, 5, 2, 1, 4, 7, 21, 19, 17, 16, 18, 20] # the vertex id for the joint corresponding to the head head_id = 411 # weights assigned to each joint during optimization; # the definition of hips in SMPL and LSP is significantly different so set # their weights to zero base_weights = np.array( [1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=np.float64) if try_both_orient: flipped_orient = cv2.Rodrigues(body_orient)[0].dot( cv2.Rodrigues(np.array([0., np.pi, 0]))[0]) flipped_orient = cv2.Rodrigues(flipped_orient)[0].ravel() orientations = [body_orient, flipped_orient] else: orientations = [body_orient] if try_both_orient: # store here the final error for both orientations, # and pick the orientation resulting in the lowest error errors = [] svs = [] cams = [] for o_id, orient in enumerate(orientations): # initialize the shape to the mean shape in the SMPL training set betas = ch.zeros(n_betas) # initialize the pose by using the optimized body orientation and the # pose prior init_pose = np.hstack((orient, prior.weights.dot(prior.means))) # instantiate the model: # verts_decorated allows us to define how many # shape coefficients (directions) we want to consider (here, n_betas) sv = verts_decorated( trans=ch.zeros(3), pose=ch.array(init_pose), v_template=model.v_template, J=model.J_regressor, betas=betas, shapedirs=model.shapedirs[:, :, :n_betas], weights=model.weights, kintree_table=model.kintree_table, bs_style=model.bs_style, f=model.f, bs_type=model.bs_type, posedirs=model.posedirs) # make the SMPL joints depend on betas Jdirs = np.dstack([model.J_regressor.dot(model.shapedirs[:, :, i]) for i in range(len(betas))]) J_onbetas = ch.array(Jdirs).dot(betas) + model.J_regressor.dot( model.v_template.r) # get joint positions as a function of model pose, betas and trans (_, A_global) = global_rigid_transformation( sv.pose, J_onbetas, model.kintree_table, xp=ch) Jtr = ch.vstack([g[:3, 3] for g in A_global]) + sv.trans # add the head joint, corresponding to a vertex... Jtr = ch.vstack((Jtr, sv[head_id])) # ... and add the joint id to the list if o_id == 0: smpl_ids.append(len(Jtr) - 1) # update the weights using confidence values weights = base_weights * conf[ cids] if conf is not None else base_weights # project SMPL joints on the image plane using the estimated camera cam.v = Jtr # data term: distance between observed and estimated joints in 2D obj_j2d = lambda w, sigma: ( w * weights.reshape((-1, 1)) * GMOf((j2d[cids] - cam[smpl_ids]), sigma)) # mixture of gaussians pose prior pprior = lambda w: w * prior(sv.pose) # joint angles pose prior, defined over a subset of pose parameters: # 55: left elbow, 90deg bend at -np.pi/2 # 58: right elbow, 90deg bend at np.pi/2 # 12: left knee, 90deg bend at np.pi/2 # 15: right knee, 90deg bend at np.pi/2 alpha = 10 my_exp = lambda x: alpha * ch.exp(x) obj_angle = lambda w: w * ch.concatenate([my_exp(sv.pose[55]), my_exp(-sv.pose[ 58]), my_exp(-sv.pose[12]), my_exp(-sv.pose[15])]) if viz: import matplotlib.pyplot as plt plt.ion() def on_step(_): """Create visualization.""" plt.figure(1, figsize=(10, 10)) plt.subplot(1, 2, 1) # show optimized joints in 2D tmp_img = img.copy() for coord, target_coord in zip( np.around(cam.r[smpl_ids]).astype(int), np.around(j2d[cids]).astype(int)): if (coord[0] < tmp_img.shape[1] and coord[0] >= 0 and coord[1] < tmp_img.shape[0] and coord[1] >= 0): cv2.circle(tmp_img, tuple(coord), 3, [0, 0, 255]) if (target_coord[0] < tmp_img.shape[1] and target_coord[0] >= 0 and target_coord[1] < tmp_img.shape[0] and target_coord[1] >= 0): cv2.circle(tmp_img, tuple(target_coord), 3, [0, 255, 0]) plt.imshow(tmp_img[:, :, ::-1]) plt.draw() plt.show() plt.pause(1e-2) on_step(_) else: on_step = None if regs is not None: # interpenetration term sp = SphereCollisions( pose=sv.pose, betas=sv.betas, model=model, regs=regs) sp.no_hands = True # weight configuration used in the paper, with joints + confidence values from the CNN # (all the weights used in the code were obtained via grid search, see the paper for more details) # the first list contains the weights for the pose priors, # the second list contains the weights for the shape prior opt_weights = zip([4.04 * 1e2, 4.04 * 1e2, 57.4, 4.78], [1e2, 5 * 1e1, 1e1, .5 * 1e1]) # run the optimization in 4 stages, progressively decreasing the # weights for the priors for stage, (w, wbetas) in enumerate(opt_weights): _LOGGER.info('stage %01d', stage) objs = {} objs['j2d'] = obj_j2d(1., 100) objs['pose'] = pprior(w) objs['pose_exp'] = obj_angle(0.317 * w) objs['betas'] = wbetas * betas if regs is not None: objs['sph_coll'] = 1e3 * sp ch.minimize( objs, x0=[sv.betas, sv.pose], method='dogleg', callback=on_step, options={'maxiter': 100, 'e_3': .0001, 'disp': 0}) t1 = time() _LOGGER.info('elapsed %.05f', (t1 - t0)) if try_both_orient: errors.append((objs['j2d'].r**2).sum()) svs.append(sv) cams.append(cam) if try_both_orient and errors[0] > errors[1]: choose_id = 1 else: choose_id = 0 if viz: plt.ioff() return (svs[choose_id], cams[choose_id].r, cams[choose_id].t.r, Jtr) def run_single_fit(img, j2d, conf, model, regs=None, n_betas=10, flength=5000., pix_thsh=25., scale_factor=1, viz=False, do_degrees=None): """Run the fit for one specific image. :param img: h x w x 3 image :param j2d: 14x2 array of CNN joints :param conf: 14D vector storing the confidence values from the CNN :param model: SMPL model :param regs: regressors for capsules' axis and radius, if not None enables the interpenetration error term :param n_betas: number of shape coefficients considered during optimization :param flength: camera focal length (kept fixed during optimization) :param pix_thsh: threshold (in pixel), if the distance between shoulder joints in 2D is lower than pix_thsh, the body orientation as ambiguous (so a fit is run on both the estimated one and its flip) :param scale_factor: int, rescale the image (for LSP, slightly greater images -- 2x -- help obtain better fits) :param viz: boolean, if True enables visualization during optimization :param do_degrees: list of degrees in azimuth to render the final fit when saving results :returns: a tuple containing camera/model parameters and images with rendered fits """ if do_degrees is None: do_degrees = [] # create the pose prior (GMM over CMU) prior = MaxMixtureCompletePrior(n_gaussians=8).get_gmm_prior() # get the mean pose as our initial pose init_pose = np.hstack((np.zeros(3), prior.weights.dot(prior.means))) if scale_factor != 1: img = cv2.resize(img, (img.shape[1] * scale_factor, img.shape[0] * scale_factor)) j2d[:, 0] *= scale_factor j2d[:, 1] *= scale_factor # estimate the camera parameters (cam, try_both_orient, body_orient) = initialize_camera( model, j2d, img, init_pose, flength=flength, pix_thsh=pix_thsh, viz=viz) # fit (sv, opt_j2d, t, v) = optimize_on_joints( j2d, model, cam, img, prior, try_both_orient, body_orient, n_betas=n_betas, conf=conf, viz=viz, regs=regs, ) h = img.shape[0] w = img.shape[1] dist = np.abs(cam.t.r[2] - np.mean(sv.r, axis=0)[2]) images = [] orig_v = sv.r for deg in do_degrees: if deg != 0: aroundy = cv2.Rodrigues(np.array([0, np.radians(deg), 0]))[0] center = orig_v.mean(axis=0) new_v = np.dot((orig_v - center), aroundy) verts = new_v + center else: verts = orig_v # now render im = (render_model( verts, model.f, w, h, cam, far=20 + dist) * 255.).astype('uint8') images.append(im) # return fit parameters # .r converts a chumpy array into numpy array params = {'cam_t': cam.t.r, 'f': cam.f.r, 'v': v.r, 'pose': sv.pose.r, 'betas': sv.betas.r} return params, images def main(base_dir, out_dir, use_interpenetration=True, n_betas=10, flength=5000., pix_thsh=25., use_neutral=False, viz=True): """Set up paths to image and joint data, saves results. :param base_dir: folder containing LSP images and data :param out_dir: output folder :param use_interpenetration: boolean, if True enables the interpenetration term :param n_betas: number of shape coefficients considered during optimization :param flength: camera focal length (an estimate) :param pix_thsh: threshold (in pixel), if the distance between shoulder joints in 2D is lower than pix_thsh, the body orientation as ambiguous (so a fit is run on both the estimated one and its flip) :param use_neutral: boolean, if True enables uses the neutral gender SMPL model :param viz: boolean, if True enables visualization during optimization """ img_dir = join(abspath(base_dir), 'images/lsp') data_dir = join(abspath(base_dir), 'results/lsp') if not exists(out_dir): makedirs(out_dir) # Render degrees: List of degrees in azimuth to render the final fit. # Note that rendering many views can take a while. do_degrees = [0.] sph_regs = None if not use_neutral: _LOGGER.info("Reading genders...") # File storing information about gender in LSP with open(join(data_dir, 'lsp_gender.csv')) as f: genders = f.readlines() model_female = load_model(MODEL_FEMALE_PATH) model_male = load_model(MODEL_MALE_PATH) if use_interpenetration: sph_regs_male = np.load(SPH_REGS_MALE_PATH) sph_regs_female = np.load(SPH_REGS_FEMALE_PATH) else: gender = 'neutral' model = load_model(MODEL_NEUTRAL_PATH) if use_interpenetration: sph_regs = np.load(SPH_REGS_NEUTRAL_PATH) # Load joints est = np.load(join(data_dir, 'est_joints.npz'))['est_joints'] # Load images img_paths = sorted(glob(join(img_dir, '*[0-9].jpg'))) for ind, img_path in enumerate(img_paths): out_path = '%s/%04d.pkl' % (out_dir, ind) if not exists(out_path): _LOGGER.info('Fitting 3D body on `%s` (saving to `%s`).', img_path, out_path) img = cv2.imread(img_path) if img.ndim == 2: _LOGGER.warn("The image is grayscale!") img = np.dstack((img, img, img)) # [x-y, keypoints, idx] joints = est[:2, :, ind].T conf = est[2, :, ind] if not use_neutral: gender = 'male' if int(genders[ind]) == 0 else 'female' if gender == 'female': model = model_female if use_interpenetration: sph_regs = sph_regs_female elif gender == 'male': model = model_male if use_interpenetration: sph_regs = sph_regs_male params, vis = run_single_fit( img, joints, conf, model, regs=sph_regs, n_betas=n_betas, flength=flength, pix_thsh=pix_thsh, scale_factor=2, viz=viz, do_degrees=do_degrees) if viz: import matplotlib.pyplot as plt plt.ion() plt.show() plt.subplot(121) plt.imshow(img[:, :, ::-1]) if do_degrees is not None: print(do_degrees) for di, deg in enumerate(do_degrees): plt.subplot(122) plt.cla() plt.imshow(vis[di]) plt.draw() plt.title('%d deg' % deg) plt.pause(1) raw_input('Press any key to continue...') with open(out_path, 'w') as outf: pickle.dump(params, outf) print(do_degrees) # This only saves the first rendering. if do_degrees is not None: cv2.imwrite(out_path.replace('.pkl', '.png'), vis[0]) if __name__ == '__main__': logging.basicConfig(level=logging.INFO) parser = argparse.ArgumentParser(description='run SMPLify on LSP dataset') parser.add_argument( 'base_dir', default='/scratch1/projects/smplify_public/', nargs='?', help="Directory that contains images/lsp and results/lps , i.e." "the directory you untared smplify_code.tar.gz") parser.add_argument( '--out_dir', default='/home/kp-kepra/Github/smplify_public', type=str, help='Where results will be saved, default is /tmp/smplify_lsp') parser.add_argument( '--no_interpenetration', default=False, action='store_true', help="Using this flag removes the interpenetration term, which speeds" "up optimization at the expense of possible interpenetration.") parser.add_argument( '--gender_neutral', default=False, action='store_true', help="Using this flag always uses the neutral SMPL model, otherwise " "gender specified SMPL models are used.") parser.add_argument( '--n_betas', default=10, type=int, help="Specify the number of shape coefficients to use.") parser.add_argument( '--flength', default=5000, type=float, help="Specify value of focal length.") parser.add_argument( '--side_view_thsh', default=25, type=float, help="This is thresholding value that determines whether the human is captured in a side view. If the pixel distance between the shoulders is less than this value, two initializations of SMPL fits are tried.") parser.add_argument( '--viz', default=False, action='store_true', help="Turns on visualization of intermediate optimization steps " "and final results.") args = parser.parse_args() use_interpenetration = not args.no_interpenetration if not use_interpenetration: _LOGGER.info('Not using interpenetration term.') if args.gender_neutral: _LOGGER.info('Using gender neutral model.') # Set up paths & load models. # Assumes 'models' in the 'code/' directory where this file is in. MODEL_DIR = join(abspath(dirname(__file__)), 'models') # Model paths: MODEL_NEUTRAL_PATH = join( MODEL_DIR, 'basicModel_neutral_lbs_10_207_0_v1.0.0.pkl') MODEL_FEMALE_PATH = join( MODEL_DIR, 'basicModel_f_lbs_10_207_0_v1.0.0.pkl') MODEL_MALE_PATH = join(MODEL_DIR, 'basicmodel_m_lbs_10_207_0_v1.0.0.pkl') if use_interpenetration: # paths to the npz files storing the regressors for capsules SPH_REGS_NEUTRAL_PATH = join(MODEL_DIR, 'regressors_locked_normalized_hybrid.npz') SPH_REGS_FEMALE_PATH = join(MODEL_DIR, 'regressors_locked_normalized_female.npz') SPH_REGS_MALE_PATH = join(MODEL_DIR, 'regressors_locked_normalized_male.npz') main(args.base_dir, args.out_dir, use_interpenetration, args.n_betas, args.flength, args.side_view_thsh, args.gender_neutral, args.viz) ================================================ FILE: libs/annotator/smplify/lib/__init__.py ================================================ ================================================ FILE: libs/annotator/smplify/lib/capsule_body.py ================================================ """ Copyright 2016 Max Planck Society, Federica Bogo, Angjoo Kanazawa. All rights reserved. This software is provided for research purposes only. By using this software you agree to the terms of the SMPLify license here: http://smplify.is.tue.mpg.de/license This script implements an approximation of the body by means of capsules (20 in total). Capsules can be further simplified into spheres (with centers along the capsule axis and radius corresponding to the capsule radius) to efficiently compute an interpenetration error term (as in sphere_collisions.py). """ import numpy as np import chumpy as ch import scipy.sparse as sp from .capsule_ch import Capsule joint2name = ['pelvis', 'leftThigh', 'rightThigh', 'spine', 'leftCalf', 'rightCalf', 'spine1', 'leftFoot', 'rightFoot', 'spine2', 'neck', 'leftShoulder', 'rightShoulder', 'head', 'leftUpperArm', 'rightUpperArm', 'leftForeArm', 'rightForeArm', 'leftHand', 'rightHand'] # the orientation of each capsule rots0 = ch.asarray( [[0, 0, np.pi / 2], [0, 0, np.pi], [0, 0, np.pi], [0, 0, np.pi / 2], [0, 0, np.pi], [0, 0, np.pi], [0, 0, np.pi / 2], [np.pi / 2, 0, 0], [np.pi / 2, 0, 0], [0, 0, np.pi / 2], [0, 0, 0], [0, 0, -np.pi / 2], [0, 0, np.pi / 2], [0, 0, 0], [0, 0, -np.pi / 2], [0, 0, np.pi / 2], [0, 0, -np.pi / 2], [0, 0, np.pi / 2], [0, 0, -np.pi / 2], [0, 0, np.pi / 2]]) # groups hands and fingers, feet and toes # each comment line provides the body part corresonding to the capsule # and the corresponding id mujoco2segm = [[0], # hip 0 [1], # leftThigh 1 [2], # rightThigh 2 [3], # spine 3 [4], # leftCalf 4 [5], # rightCalf 5 [6], # spine1 6 [7, 10], # leftFoot + leftToes 7 [8, 11], # rightFoot + rightToes 8 [9], # spine2 9 [12], # neck 10 [13], # leftShoulder 11 [14], # rightShoulder 12 [15], # head 13 [16], # leftUpperArm 14 [17], # rightUpperArm 15 [18], # leftForeArm 16 [19], # rightForeArm 17 [20, 22], # leftHand + leftFingers 18 [21, 23]] # rightHand + rightFingers 19 # sets pairs of ids, corresponding to capsules that should not # penetrate each other collisions = [ [0, 16], # hip and leftForeArm [0, 17], # hip and rightForeArm [0, 18], # hip and leftHand [0, 19], # hip and rightHand [3, 16], # spine and leftForeArm [3, 17], # spine and rightForeArm [3, 18], # spine and leftHand [3, 19], # spine and rightHand [4, 5], # leftCalf and rightCalf [6, 16], # spine1 and leftForeArm [6, 17], # spine1 and rightForeArm [6, 18], # spine1 and leftHand [6, 19], # spine1 and rightHand [7, 5], # leftFoot and rightCalf [8, 7], # rightFoot and leftFoot [8, 4], # rightFoot and leftCalf [9, 16], # spine2 and leftForeArm [9, 17], # spine2 and rightForeArm [9, 18], # spine2 and leftHand [9, 19], # spine2 and rightHand [11, 16], # leftShoulder and leftForeArm [12, 17], # rightShoulder and rightForeArm [18, 19], # leftHand and rightHand ] def get_capsules(model, wrt_betas=None, length_regs=None, rad_regs=None): from opendr.geometry import Rodrigues if length_regs is not None: n_shape_dofs = length_regs.shape[0] - 1 else: n_shape_dofs = model.betas.r.size segm = np.argmax(model.weights_prior, axis=1) J_off = ch.zeros((len(joint2name), 3)) rots = rots0.copy() mujoco_t_mid = [0, 3, 6, 9] if wrt_betas is not None: # if we want to differentiate wrt betas (shape), we must have the # regressors... assert (length_regs is not None and rad_regs is not None) # ... and betas must be a chumpy object assert (hasattr(wrt_betas, 'dterms')) pad = ch.concatenate( (wrt_betas, ch.zeros(n_shape_dofs - len(wrt_betas)), ch.ones(1))) lengths = pad.dot(length_regs) rads = pad.dot(rad_regs) else: lengths = ch.ones(len(joint2name)) rads = ch.ones(len(joint2name)) betas = wrt_betas if wrt_betas is not None else model.betas n_betas = len(betas) # the joint regressors are the original, pre-optimized ones # (middle of the part frontier) myJ_regressor = model.J_regressor_prior myJ0 = ch.vstack( (ch.ch.MatVecMult(myJ_regressor, model.v_template[:, 0] + model.shapedirs[:, :, :n_betas].dot(betas)[:, 0]), ch.ch.MatVecMult(myJ_regressor, model.v_template[:, 1] + model.shapedirs[:, :, :n_betas].dot(betas)[:, 1]), ch.ch.MatVecMult(myJ_regressor, model.v_template[:, 2] + model.shapedirs[:, :, :n_betas].dot(betas)[:, 2]))).T # with small adjustments for hips, spine and feet myJ = ch.vstack( [ch.concatenate([myJ0[0, 0], ( .6 * myJ0[0, 1] + .2 * myJ0[1, 1] + .2 * myJ0[2, 1]), myJ0[9, 2]]), ch.vstack([myJ0[i] for i in range(1, 7)]), ch.concatenate( [myJ0[7, 0], (1.1 * myJ0[7, 1] - .1 * myJ0[4, 1]), myJ0[7, 2]]), ch.concatenate( [myJ0[8, 0], (1.1 * myJ0[8, 1] - .1 * myJ0[5, 1]), myJ0[8, 2]]), ch.concatenate( [myJ0[9, 0], myJ0[9, 1], (.2 * myJ0[9, 2] + .8 * myJ0[12, 2])]), ch.vstack([myJ0[i] for i in range(10, 24)])]) capsules = [] # create one capsule per mujoco joint for ijoint, segms in enumerate(mujoco2segm): if wrt_betas is None: vidxs = np.asarray([segm == k for k in segms]).any(axis=0) verts = model.v_template[vidxs].r dims = (verts.max(axis=0) - verts.min(axis=0)) rads[ijoint] = .5 * ((dims[(np.argmax(dims) + 1) % 3] + dims[( np.argmax(dims) + 2) % 3]) / 4.) lengths[ijoint] = max(dims) - 2. * rads[ijoint].r # the core joints are different, since the capsule is not in the joint # but in the middle if ijoint in mujoco_t_mid: len_offset = ch.vstack([ch.zeros(1), ch.abs(lengths[ijoint]) / 2., ch.zeros(1)]).reshape(3, 1) caps = Capsule( (J_off[ijoint] + myJ[mujoco2segm[ijoint][0]]).reshape( 3, 1) - Rodrigues(rots[ijoint]).dot(len_offset), rots[ijoint], rads[ijoint], lengths[ijoint]) else: caps = Capsule( (J_off[ijoint] + myJ[mujoco2segm[ijoint][0]]).reshape(3, 1), rots[ijoint], rads[ijoint], lengths[ijoint]) caps.id = ijoint capsules.append(caps) return capsules def set_sphere_centers(capsule, floor=True): if floor: n_spheres = int(np.floor(capsule.length.r / (2 * capsule.rad.r) - 1)) else: n_spheres = int(np.ceil(capsule.length.r / (2 * capsule.rad.r) - 1)) # remove "redundant" spheres for right and left thigh... if capsule.id == 1 or capsule.id == 2: centers = [capsule.axis[1].r] # ... and right and left upper arm elif capsule.id == 14 or capsule.id == 15: if n_spheres >= 1: centers = [] else: centers = [capsule.axis[1].r] else: centers = [capsule.axis[0].r, capsule.axis[1].r] if n_spheres >= 1: step = capsule.length.r / (n_spheres + 1) for i in xrange(n_spheres): centers.append(capsule.axis[0].r + (capsule.axis[ 1].r - capsule.axis[0].r) * step * (i + 1) / capsule.length.r) capsule.centers = centers return capsule.centers def capsule_dist(capsule0, capsule1, alpha=.3, increase_hand=True): range0 = range(capsule0.center_id, capsule0.center_id + len(capsule0.centers)) range1 = range(capsule1.center_id, capsule1.center_id + len(capsule1.centers)) cnt0 = ch.concatenate([[cid] * len(range1) for cid in range0]) cnt1 = ch.concatenate([range1] * len(range0)) if increase_hand: if (capsule0.id == 18) or (capsule0.id == 19) or ( capsule1.id == 18) or (capsule1.id == 19): dst = (alpha * 1.2 * capsule0.rad.r)**2 + (alpha * 1.2 * capsule1.rad.r)**2 else: dst = (alpha * capsule0.rad.r)**2 + (alpha * capsule1.rad.r)**2 else: dst = (alpha * capsule0.rad.r)**2 + (alpha * capsule1.rad.r)**2 radiuss = np.hstack([dst] * len(cnt0)).squeeze() return (cnt0, cnt1, radiuss) def get_capsule_bweights(vs): # "blend" weights for the capsule. They are binary rows = np.arange(vs.shape[0]) cols = np.tile(np.hstack((range(10), range(12, 22))), (52, 1)).T.ravel() data = np.ones(vs.shape[0]) caps_weights = np.asarray( sp.csc_matrix( (data, (rows, cols)), shape=(vs.shape[0], 24)).todense()) return caps_weights def get_sphere_bweights(sph_vs, capsules): rows = np.arange(sph_vs.shape[0]) cols = [] for cps, w in zip(capsules, range(10) + range(12, 22)): cols.append([w] * len(cps.centers)) cols = np.hstack(cols) data = np.ones(sph_vs.shape[0]) sph_weights = np.asarray( sp.csc_matrix( (data, (rows, cols)), shape=(sph_vs.shape[0], 24)).todense()) return sph_weights ================================================ FILE: libs/annotator/smplify/lib/capsule_ch.py ================================================ """ Copyright 2016 Max Planck Society, Federica Bogo, Angjoo Kanazawa. All rights reserved. This software is provided for research purposes only. By using this software you agree to the terms of the SMPLify license here: http://smplify.is.tue.mpg.de/license This script implements a Capsule object, used in the body approximation implemented in capsule_body.py. Capsule sizes depend on body shape (and are differentiable with respect to it). Capsules are the basis to compute an approximation based on spheres, used to compute efficiently the interpenetration error term in sphere_collisions.py. """ import numpy as np import chumpy as ch from opendr.geometry import Rodrigues # faces for the capsules. Useful only for visualization purposes cap_f = np.asarray( [[0, 7, 6], [1, 7, 9], [0, 6, 11], [0, 11, 13], [0, 13, 10], [1, 9, 16], [2, 8, 18], [3, 12, 20], [4, 14, 22], [5, 15, 24], [1, 16, 19], [2, 18, 21], [3, 20, 23], [4, 22, 25], [5, 24, 17], [16, 17, 26], [22, 23, 32], [48, 18, 28], [49, 20, 30], [24, 25, 34], [25, 22, 50], [28, 19, 47], [30, 21, 48], [32, 23, 49], [17, 24, 51], [26, 17, 51], [34, 25, 50], [23, 20, 49], [21, 18, 48], [19, 16, 47], [51, 24, 34], [24, 15, 25], [15, 4, 25], [50, 22, 32], [22, 14, 23], [14, 3, 23], [20, 21, 30], [20, 12, 21], [12, 2, 21], [18, 19, 28], [18, 8, 19], [8, 1, 19], [47, 16, 26], [16, 9, 17], [9, 5, 17], [10, 15, 5], [10, 13, 15], [13, 4, 15], [13, 14, 4], [13, 11, 14], [11, 3, 14], [11, 12, 3], [11, 6, 12], [6, 2, 12], [9, 10, 5], [9, 7, 10], [7, 0, 10], [6, 8, 2], [6, 7, 8], [7, 1, 8], [29, 36, 41], [31, 37, 44], [33, 38, 45], [35, 39, 46], [27, 40, 42], [42, 46, 43], [42, 40, 46], [40, 35, 46], [46, 45, 43], [46, 39, 45], [39, 33, 45], [45, 44, 43], [45, 38, 44], [38, 31, 44], [44, 41, 43], [44, 37, 41], [37, 29, 41], [41, 42, 43], [41, 36, 42], [36, 27, 42], [26, 40, 27], [26, 51, 40], [51, 35, 40], [34, 39, 35], [34, 50, 39], [50, 33, 39], [32, 38, 33], [32, 49, 38], [49, 31, 38], [30, 37, 31], [30, 48, 37], [48, 29, 37], [28, 36, 29], [28, 47, 36], [47, 27, 36], [51, 34, 35], [50, 32, 33], [49, 30, 31], [48, 28, 29], [47, 26, 27]]) elev = np.asarray( [0., 0.5535673, 1.01721871, 0., -1.01721871, -0.5535673, 0.52359324, 0.31415301, 0.94246863, 0., -0.31415301, 0., 0.52359547, -0.52359324, -0.52359547, -0.94246863, 0.31415501, -0.31415501, 1.57079633, 0.94247719, 0.31415501, 0.94247719, -0.94247719, -0.31415501, -0.94247719, -1.57079633, -0.31415624, 0., 0.94248124, 1.01722122, 0.94247396, 0.55356579, -0.31415377, -0.55356579, -1.57079233, -1.01722122, 0.52359706, 0.94246791, 0., -0.94246791, -0.52359706, 0.52359371, 0., 0., 0.31415246, -0.31415246, -0.52359371, 0.31415624, 1.57079233, 0.31415377, -0.94247396, -0.94248124]) az = np.asarray( [-1.57079633, -0.55358064, -2.12435586, -2.67794236, -2.12435586, -0.55358064, -1.7595018, -1.10715248, -1.10714872, -0.55357999, -1.10715248, -2.12436911, -2.48922865, -1.7595018, -2.48922865, -1.10714872, 0., 0., 0., 0., 3.14159265, 3.14159265, 3.14159265, 3.14159265, 0., 0., 0., 0.46365119, 0., 1.01724226, 3.14159265, 2.58801549, 3.14159265, 2.58801549, 3.14159265, 1.01724226, 0.6523668, 2.03445078, 2.58801476, 2.03445078, 0.6523668, 1.38209652, 1.01722642, 1.57080033, 2.03444394, 2.03444394, 1.38209652, 0., 3.14159265, 3.14159265, 3.14159265, 0.]) # vertices for the capsules v = np.vstack( [np.cos(az) * np.cos(elev), np.sin(az) * np.cos(elev), np.sin(elev)]).T class Capsule(object): def __init__(self, t, rod, rad, length): assert (hasattr(t, 'dterms')) # the translation should be a chumpy object (differentiable wrt shape) self.t = t # translation of the axis self.rod = rod # rotation of the axis in Rodrigues form # the radius should be a chumpy object (differentiable wrt shape) assert (hasattr(rad, 'dterms')) self.rad = rad # radius of the capsule # the length should be a chumpy object (differentiable wrt shape) assert (hasattr(length, 'dterms')) self.length = length # length of the axis axis0 = ch.vstack([0, ch.abs(self.length), 0]) self.axis = ch.vstack((t.T, (t + Rodrigues(rod).dot(axis0)).T)) v0 = ch.hstack([v[:26].T * rad, (v[26:].T * rad) + axis0]) self.v = ((t + Rodrigues(rod).dot(v0)).T) self.set_sphere_centers() def set_sphere_centers(self, floor=False): # sphere centers are evenly spaced along the capsule axis length if floor: n_spheres = int(np.floor(self.length / (2 * self.rad) - 1)) else: n_spheres = int(np.ceil(self.length / (2 * self.rad) - 1)) centers = [self.axis[0].r, self.axis[1].r] if n_spheres >= 1: step = self.length.r / (n_spheres + 1) for i in xrange(n_spheres): centers.append(self.axis[0].r + (self.axis[1].r - self.axis[ 0].r) * step * (i + 1) / self.length.r) self.centers = centers ================================================ FILE: libs/annotator/smplify/lib/max_mixture_prior.py ================================================ """ Copyright 2016 Max Planck Society, Federica Bogo, Angjoo Kanazawa. All rights reserved. This software is provided for research purposes only. By using this software you agree to the terms of the SMPLify license here: http://smplify.is.tue.mpg.de/license This script implements the pose prior based on a mixture of Gaussians. To simplify the log-likelihood computation, the sum in the mixture of Gaussians is approximated by a max operator (see the paper for more details). """ import os import numpy as np import chumpy as ch class MaxMixtureComplete(ch.Ch): """Define the MaxMixture class.""" # x is the input vector we want to evaluate the prior on; # means, precs and weights are the parameters of the mixture dterms = 'x' terms = 'means', 'precs', 'weights' def on_changed(self, which): # on_changed is called before any call to r or dr_wrt, # therefore it can be used also for initialization # setup means, precs and loglikelihood expressions if 'means' in which or 'precs' in which or 'weights' in which: # This is just the mahalanobis part. self.loglikelihoods = [np.sqrt(0.5) * (self.x - m).dot(s) for m, s in zip(self.means, self.precs)] if 'x' in which: self.min_component_idx = np.argmin( [(logl**2).sum().r[0] - np.log(w[0]) for logl, w in zip(self.loglikelihoods, self.weights)]) def compute_r(self): min_w = self.weights[self.min_component_idx] # Add the sqrt(-log(weights)) return ch.concatenate((self.loglikelihoods[self.min_component_idx].r, np.sqrt(-np.log(min_w)))) def compute_dr_wrt(self, wrt): # the call to dr_wrt returns a jacobian 69 x 72, # when wrt has 72 elements (pose vector) # here we intercept the call and return a 70 x 72 matrix, # with an additional row of zeroes (these are the jacobian # entries corresponding to sqrt(-log(weights)) import scipy.sparse as sp dr = self.loglikelihoods[self.min_component_idx].dr_wrt(wrt) if dr is not None: # extract rows, cols and data, and return a new matrix with # the same values but 1 additional row Is, Js, Vs = sp.find(dr) dr = sp.csc_matrix( (Vs, (Is, Js)), shape=(dr.shape[0] + 1, dr.shape[1])) return dr class MaxMixtureCompleteWrapper(object): """Convenience wrapper to match interface spec.""" def __init__(self, means, precs, weights, prefix): self.means = means self.precs = precs # Already "sqrt"ed self.weights = weights self.prefix = prefix def __call__(self, x): # wrapping since __call__ couldn't be defined directly for a chumpy # object return (MaxMixtureComplete( x=x[self.prefix:], means=self.means, precs=self.precs, weights=self.weights)) class MaxMixtureCompletePrior(object): """Prior density estimation.""" def __init__(self, n_gaussians=8, prefix=3): self.n_gaussians = n_gaussians self.prefix = prefix self.prior = self.create_prior_from_cmu() def create_prior_from_cmu(self): """Load the gmm from the CMU motion database.""" from os.path import dirname import cPickle as pickle with open( os.path.join( dirname(dirname(__file__)), 'models', 'gmm_%02d.pkl' % self.n_gaussians)) as f: gmm = pickle.load(f) precs = ch.asarray([np.linalg.inv(cov) for cov in gmm['covars']]) chols = ch.asarray([np.linalg.cholesky(prec) for prec in precs]) # The constant term: sqrdets = np.array([(np.sqrt(np.linalg.det(c))) for c in gmm['covars']]) const = (2 * np.pi)**(69 / 2.) self.weights = ch.asarray(gmm['weights'] / (const * (sqrdets / sqrdets.min()))) return (MaxMixtureCompleteWrapper( means=gmm['means'], precs=chols, weights=self.weights, prefix=self.prefix)) def get_gmm_prior(self): """Getter implementation.""" return self.prior ================================================ FILE: libs/annotator/smplify/lib/robustifiers.py ================================================ """ Copyright 2016 Max Planck Society, Matthew Loper. All rights reserved. This software is provided for research purposes only. By using this software you agree to the terms of the SMPLify license here: http://smplify.is.tue.mpg.de/license This script implements the Geman-McClure robustifier as chumpy object. """ #!/usr/bin/env python import numpy as np import scipy import scipy.sparse as sp from chumpy import Ch __all__ = ['GMOf'] def GMOf(x, sigma): """Given x and sigma in some units (say mm), returns robustified values (in same units), by making use of the Geman-McClure robustifier.""" result = SignedSqrt(x=GMOfInternal(x=x, sigma=sigma)) return result class SignedSqrt(Ch): dterms = ('x', ) terms = () def compute_r(self): return np.sqrt(np.abs(self.x.r)) * np.sign(self.x.r) def compute_dr_wrt(self, wrt): if wrt is self.x: result = (.5 / np.sqrt(np.abs(self.x.r))) result = np.nan_to_num(result) result *= (self.x.r != 0).astype(np.uint32) return sp.spdiags(result.ravel(), [0], self.x.r.size, self.x.r.size) class GMOfInternal(Ch): dterms = 'x', 'sigma' def on_changed(self, which): if 'sigma' in which: assert (self.sigma.r > 0) if 'x' in which: self.squared_input = self.x.r**2. def compute_r(self): return (self.sigma.r**2 * (self.squared_input / (self.sigma.r**2 + self.squared_input))) * np.sign(self.x.r) def compute_dr_wrt(self, wrt): if wrt is not self.x and wrt is not self.sigma: return None squared_input = self.squared_input result = [] if wrt is self.x: dx = self.sigma.r**2 / (self.sigma.r**2 + squared_input ) - self.sigma.r**2 * (squared_input / ( self.sigma.r**2 + squared_input)**2) dx = 2 * self.x.r * dx result.append( scipy.sparse.spdiags( (dx * np.sign(self.x.r)).ravel(), [0], self.x.r.size, self.x.r.size, format='csc')) if wrt is self.sigma: ds = 2 * self.sigma.r * (squared_input / ( self.sigma.r**2 + squared_input)) - 2 * self.sigma.r**3 * ( squared_input / (self.sigma.r**2 + squared_input)**2) result.append( scipy.sparse.spdiags( (ds * np.sign(self.x.r)).ravel(), [0], self.x.r.size, self.x.r.size, format='csc')) if len(result) == 1: return result[0] else: return np.sum(result).tocsc() ================================================ FILE: libs/annotator/smplify/lib/sphere_collisions.py ================================================ """ Copyright 2016 Max Planck Society, Federica Bogo, Angjoo Kanazawa. All rights reserved. This software is provided for research purposes only. By using this software you agree to the terms of the SMPLify license here: http://smplify.is.tue.mpg.de/license This script implements the interpenetration error term. It uses the capsule approximation provided in capsule_body.py, and is differentiable with respect to body shape and pose. """ import numpy as np import chumpy as ch from smpl_webuser.lbs import verts_core from .capsule_body import get_capsules, set_sphere_centers,\ get_sphere_bweights, collisions,\ capsule_dist class SphereCollisions(ch.Ch): dterms = ('pose', 'betas') terms = ('regs', 'model') def update_capsules_and_centers(self): centers = [set_sphere_centers(capsule) for capsule in self.capsules] count = 0 for capsule in self.capsules: capsule.center_id = count count += len(capsule.centers) self.sph_vs = ch.vstack(centers) self.sph_weights = get_sphere_bweights(self.sph_vs, self.capsules) self.ids0 = [] self.ids1 = [] self.radiuss = [] self.caps_pairs = [] for collision in collisions: if hasattr(self, 'no_hands'): (id0, id1, rd) = capsule_dist( self.capsules[collision[0]], self.capsules[collision[1]], increase_hand=False) else: (id0, id1, rd) = capsule_dist(self.capsules[collision[0]], self.capsules[collision[1]]) self.ids0.append(id0.r) self.ids1.append(id1.r) self.radiuss.append(rd) self.caps_pairs.append(['%02d_%02d' % (collision[0], collision[1])] * len(id0)) self.ids0 = np.concatenate(self.ids0).astype(int) self.ids1 = np.concatenate(self.ids1).astype(int) self.radiuss = np.concatenate(self.radiuss) self.caps_pairs = np.concatenate(self.caps_pairs) assert (self.caps_pairs.size == self.ids0.size) assert (self.radiuss.size == self.ids0.size) def update_pose(self): self.sph_v = verts_core( self.pose, self.sph_vs, self.model.J, self.sph_weights, self.model.kintree_table, want_Jtr=False) def get_objective(self): return ch.sum((ch.exp( -((ch.sum((self.sph_v[self.ids0] - self.sph_v[self.ids1])**2, axis=1)) / (self.radiuss)) / 2.))**2)**.5 def compute_r(self): return self.get_objective().r def compute_dr_wrt(self, wrt): # we consider derivatives only with respect to pose here, # to avoid bias towards thin body shapes if wrt is self.pose: return self.get_objective().dr_wrt(wrt) def on_changed(self, which): if 'regs' in which: self.length_regs = self.regs['betas2lens'] self.rad_regs = self.regs['betas2rads'] if 'betas' in which: if not hasattr(self, 'capsules'): self.capsules = get_capsules( self.model, wrt_betas=self.betas, length_regs=self.length_regs, rad_regs=self.rad_regs) self.update_capsules_and_centers() if 'pose' in which: self.update_pose() ================================================ FILE: libs/annotator/smplify/render_model.py ================================================ """ Copyright 2016 Max Planck Society, Federica Bogo, Angjoo Kanazawa. All rights reserved. This software is provided for research purposes only. By using this software you agree to the terms of the SMPLify license here: http://smplify.is.tue.mpg.de/license Utility script for rendering the SMPL model using OpenDR. """ import numpy as np from opendr.camera import ProjectPoints from opendr.renderer import ColoredRenderer from opendr.lighting import LambertianPointLight import cv2 colors = { 'pink': [.7, .7, .9], 'neutral': [.9, .9, .8], 'capsule': [.7, .75, .5], 'yellow': [.5, .7, .75], } def _create_renderer(w=640, h=480, rt=np.zeros(3), t=np.zeros(3), f=None, c=None, k=None, near=.5, far=10.): f = np.array([w, w]) / 2. if f is None else f c = np.array([w, h]) / 2. if c is None else c k = np.zeros(5) if k is None else k rn = ColoredRenderer() rn.camera = ProjectPoints(rt=rt, t=t, f=f, c=c, k=k) rn.frustum = {'near': near, 'far': far, 'height': h, 'width': w} return rn def _rotateY(points, angle): """Rotate the points by a specified angle.""" ry = np.array([ [np.cos(angle), 0., np.sin(angle)], [0., 1., 0.], [-np.sin(angle), 0., np.cos(angle)] ]) return np.dot(points, ry) def simple_renderer(rn, verts, faces, yrot=np.radians(120)): # Rendered model color color = colors['pink'] rn.set(v=verts, f=faces, vc=color, bgcolor=np.ones(3)) albedo = rn.vc # Construct Back Light (on back right corner) rn.vc = LambertianPointLight( f=rn.f, v=rn.v, num_verts=len(rn.v), light_pos=_rotateY(np.array([-200, -100, -100]), yrot), vc=albedo, light_color=np.array([1, 1, 1])) # Construct Left Light rn.vc += LambertianPointLight( f=rn.f, v=rn.v, num_verts=len(rn.v), light_pos=_rotateY(np.array([800, 10, 300]), yrot), vc=albedo, light_color=np.array([1, 1, 1])) # Construct Right Light rn.vc += LambertianPointLight( f=rn.f, v=rn.v, num_verts=len(rn.v), light_pos=_rotateY(np.array([-500, 500, 1000]), yrot), vc=albedo, light_color=np.array([.7, .7, .7])) return rn.r def get_alpha(imtmp, bgval=1.): h, w = imtmp.shape[:2] alpha = (~np.all(imtmp == bgval, axis=2)).astype(imtmp.dtype) b_channel, g_channel, r_channel = cv2.split(imtmp) im_RGBA = cv2.merge( (b_channel, g_channel, r_channel, alpha.astype(imtmp.dtype))) return im_RGBA def render_model(verts, faces, w, h, cam, near=0.5, far=25, img=None): rn = _create_renderer( w=w, h=h, near=near, far=far, rt=cam.rt, t=cam.t, f=cam.f, c=cam.c) # Uses img as background, otherwise white background. if img is not None: rn.background_image = img / 255. if img.max() > 1 else img imtmp = simple_renderer(rn, verts, faces) # If white bg, make transparent. if img is None: imtmp = get_alpha(imtmp) return imtmp ================================================ FILE: libs/dataset/__init__.py ================================================ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Empty file. """ #import libs.dataset.h36m ================================================ FILE: libs/dataset/h36m/__init__.py ================================================ #!/usr/bin/env python3 # -*- coding: utf-8 -*- ================================================ FILE: libs/dataset/h36m/cameras.py ================================================ """ Utilities to deal with the cameras of human3.6m. Reference: https://github.com/una-dinosauria/3d-pose-baseline """ import numpy as np # import h5py def project_point_radial( P, R, T, f, c, k, p ): """ Project points from 3d to 2d using camera parameters including radial and tangential distortion Args P: Nx3 points in world coordinates R: 3x3 Camera rotation matrix T: 3x1 Camera translation parameters f: (scalar) Camera focal length c: 2x1 Camera center k: 3x1 Camera radial distortion coefficients p: 2x1 Camera tangential distortion coefficients Returns Proj: Nx2 points in pixel space D: 1xN depth of each point in camera space radial: 1xN radial distortion per point tan: 1xN tangential distortion per point r2: 1xN squared radius of the projected points before distortion """ # P is a matrix of 3-dimensional points assert len(P.shape) == 2 assert P.shape[1] == 3 N = P.shape[0] X = R.dot( P.T - T ) # rotate and translate XX = X[:2,:] / X[2,:] r2 = XX[0,:]**2 + XX[1,:]**2 radial = 1 + np.einsum( 'ij,ij->j', np.tile(k,(1, N)), np.array([r2, r2**2, r2**3]) ); tan = p[0]*XX[1,:] + p[1]*XX[0,:] XXX = (XX * np.tile(radial + tan,(2,1)) + np.outer(np.array([p[1], p[0]]).reshape(-1), r2)) Proj = (f * XXX) + c Proj = Proj.T D = X[2,] return Proj, D, radial, tan, r2 def world_to_camera_frame(P, R, T): """ Convert points from world to camera coordinates Args P: Nx3 3d points in world coordinates R: 3x3 Camera rotation matrix T: 3x1 Camera translation parameters Returns X_cam: Nx3 3d points in camera coordinates """ assert len(P.shape) == 2 assert P.shape[1] == 3 X_cam = R.dot( P.T - T ) # rotate and translate return X_cam.T def camera_to_world_frame(P, R, T): """ Inverse of world_to_camera_frame Args P: Nx3 points in camera coordinates R: 3x3 Camera rotation matrix T: 3x1 Camera translation parameters Returns X_cam: Nx3 points in world coordinates """ assert len(P.shape) == 2 assert P.shape[1] == 3 X_cam = R.T.dot( P.T ) + T # rotate and translate return X_cam.T def load_camera_params( hf, path ): """ Load h36m camera parameters Args hf: hdf5 open file with h36m cameras data path: path or key inside hf to the camera we are interested in Returns R: 3x3 Camera rotation matrix T: 3x1 Camera translation parameters f: (scalar) Camera focal length c: 2x1 Camera center k: 3x1 Camera radial distortion coefficients p: 2x1 Camera tangential distortion coefficients name: String with camera id """ R = hf[ path.format('R') ][:] R = R.T T = hf[ path.format('T') ][:] f = hf[ path.format('f') ][:] c = hf[ path.format('c') ][:] k = hf[ path.format('k') ][:] p = hf[ path.format('p') ][:] name = hf[ path.format('Name') ][:] name = "".join( [chr(item) for item in name] ) return R, T, f, c, k, p, name # def load_cameras( bpath='cameras.h5', subjects=[1,5,6,7,8,9,11] ): # """ # Loads the cameras of h36m # Args # bpath: path to hdf5 file with h36m camera data # subjects: List of ints representing the subject IDs for which cameras # are requested # Returns # rcams: dictionary of 4 tuples per subject ID containing its camera # parameters for the 4 h36m cams # """ # rcams = {} # with h5py.File(bpath,'r') as hf: # for s in subjects: # for c in range(4): # There are 4 cameras in human3.6m # string = 'subject%d/camera%d/{0}' % (s,c+1) # rcams[(s, c+1)] = load_camera_params(hf, string) # return rcams ================================================ FILE: libs/dataset/h36m/data_utils.py ================================================ """ Utility functions for dealing with Human3.6M dataset. Some functions are adapted from https://github.com/una-dinosauria/3d-pose-baseline """ import os import numpy as np import copy import logging import matplotlib.pyplot as plt import torch from mpl_toolkits.mplot3d import Axes3D import libs.dataset.h36m.cameras as cameras import libs.dataset.h36m.pth_dataset as dataset # Human3.6m IDs for training and testing TRAIN_SUBJECTS = [1, 5, 6, 7, 8] TEST_SUBJECTS = [9, 11] # Use camera coordinate system camera_frame = True # Joint names in H3.6M -- data has 32 joints, but only 17 that move; # these are the indices. H36M_NAMES = ['']*32 H36M_NAMES[0] = 'Hip' H36M_NAMES[1] = 'RHip' H36M_NAMES[2] = 'RKnee' H36M_NAMES[3] = 'RFoot' H36M_NAMES[6] = 'LHip' H36M_NAMES[7] = 'LKnee' H36M_NAMES[8] = 'LFoot' H36M_NAMES[12] = 'Spine' H36M_NAMES[13] = 'Thorax' H36M_NAMES[14] = 'Neck/Nose' H36M_NAMES[15] = 'Head' H36M_NAMES[17] = 'LShoulder' H36M_NAMES[18] = 'LElbow' H36M_NAMES[19] = 'LWrist' H36M_NAMES[25] = 'RShoulder' H36M_NAMES[26] = 'RElbow' H36M_NAMES[27] = 'RWrist' parent_indices = np.array([0, 1, 2, 0, 6, 7, 0, 12, 13, 14, 13, 17, 18, 13, 25, 26]) children_indices = np.array([1, 2, 3, 6, 7, 8, 12, 13, 14, 15, 17, 18, 19, 25, 26, 27]) # Stacked Hourglass produces 16 joints. These are the names. SH_NAMES = ['']*16 SH_NAMES[0] = 'RFoot' SH_NAMES[1] = 'RKnee' SH_NAMES[2] = 'RHip' SH_NAMES[3] = 'LHip' SH_NAMES[4] = 'LKnee' SH_NAMES[5] = 'LFoot' SH_NAMES[6] = 'Hip' SH_NAMES[7] = 'Spine' SH_NAMES[8] = 'Thorax' SH_NAMES[9] = 'Head' SH_NAMES[10] = 'RWrist' SH_NAMES[11] = 'RElbow' SH_NAMES[12] = 'RShoulder' SH_NAMES[13] = 'LShoulder' SH_NAMES[14] = 'LElbow' SH_NAMES[15] = 'LWrist' # The .h5 suffix in pose sequence name is just inherited from the original # naming convention. The '.sh' suffix means stacked hourglass key-point detector # used in previous works. Here we just use '.sh' to represent key-points obtained # from any heat-map regression model. We used high-resolution net instead of # stacked-hourglass model. def load_ckpt(opt): cascade = torch.load(os.path.join(opt.ckpt_dir, 'model.th')) stats = np.load(os.path.join(opt.ckpt_dir, 'stats.npy'), allow_pickle=True).item() if opt.cuda: cascade.cuda() return cascade, stats def list_remove(list_a, list_b): """ Fine all elements of a list A that does not exist in list B. Args list_a: list A list_b: list B Returns list_c: result """ list_c = [] for item in list_a: if item not in list_b: list_c.append(item) return list_c def add_virtual_cams(cams, visualize=False): """ Deprecated. Add virtual cameras. """ # add more cameras to the scene #R, T, f, c, k, p, name = cams[ (1,1) ] # plot the position of human subjects old_cam_num = 4 def add_coordinate_system(ax, origin, system, length=300, new=False): # draw a coordinate system at a specified origin origin = origin.reshape(3, 1) start_points = np.repeat(origin, 3, axis=1) # system: [v1, v2, v3] end_points = start_points + system*length color = ['g', 'y', 'k'] # color for v1, v2 and v3 if new: color = ['b', 'r', 'g'] def get_args(start_points, end_points): x = [start_points[0], end_points[0]] y = [start_points[1], end_points[1]] z = [start_points[2], end_points[2]] return x, y, z for i in range(3): x, y, z = get_args(start_points[:,i], end_points[:,i]) ax.plot(x, y, z, lw=2, c=color[i]) return def get_new_camera(system, center, rotation = [0,0,90.]): from scipy.spatial.transform import Rotation as Rotation center = center.reshape(3, 1) start_points = np.repeat(center, 3, axis=1) end_points = start_points + system r = Rotation.from_euler('xyz', rotation, degrees=True) start_points_new = r.as_dcm() @ start_points end_points_new = r.as_dcm() @ end_points new_system = [(end_points_new[:,i] - start_points_new[:,i]).reshape(3,1) for i in range(3)] new_system = np.hstack(new_system) return new_system, start_points_new[:,0] # the new cameras are added by rotating one existing camera # TODO: more rotations new_cams = cams.copy() for key in cams.keys(): subject, camera_idx = key if camera_idx != 1: # only rotate the first camera continue R, T, f, c, k, p, name = cams[key] angles = [80., 130., 270., 320.] for angle_idx in range(len(angles)): angle = angles[angle_idx] new_R, new_T = get_new_camera(R.T, T, [0., 0., angle]) new_cams[(subject, old_cam_num + angle_idx + 1)]\ = (new_R.T, new_T.reshape(3,1), f, c, k, p, name+'new'+str(angle_idx+1)) # visualize cameras used if visualize: train_set_3d = np.load('../data/human3.6M/h36m/numpy/threeDPose_train.npy').item() test_set_3d = np.load('../data/human3.6M/h36m/numpy/threeDPose_test.npy').item() hips_train = np.vstack(list(train_set_3d.values())) hips_test = np.vstack(list(test_set_3d.values())) ax = plt.subplot(111, projection='3d') chosen = np.random.choice(len(hips_train), 1000, replace=False) chosen_hips = hips_train[chosen, :3] ax.plot(chosen_hips[:,0], chosen_hips[:,1], chosen_hips[:,2], 'bo') chosen = np.random.choice(len(hips_test), 1000, replace=False) chosen_hips = hips_test[chosen, :3] ax.plot(chosen_hips[:,0], chosen_hips[:,1], chosen_hips[:,2], 'ro') ax.set_xlabel("x");ax.set_ylabel("y");ax.set_zlabel("z") plt.title('Blue dots: Hip positions in the h36m training set. \ Red dots: testing set. \ Old camera coordinates: x-green, y-yellow, z-black \ New camera coordinates: x-blue, y-red, z-green') plt.pause(0.1) for key in new_cams.keys(): R, T, f, c, k, p, name = new_cams[key] # R gives camera basis vectors row-by-row, T gives camera center if 'new' in name: new = True else: new = False add_coordinate_system(ax, T, R.T, new=new) RADIUS = 3000 # space around the subject xroot, yroot, zroot = 0., 0., 500. ax.set_xlim3d([-RADIUS+xroot, RADIUS+xroot]) ax.set_zlim3d([-RADIUS+zroot, RADIUS+zroot]) ax.set_ylim3d([-RADIUS+yroot, RADIUS+yroot]) ax.set_aspect("equal") return new_cams def down_sample_training_data(train_dict, opt): """ Down-sample the training data. Args train_dict: python dictionary contraining the training data opt: experiment options Returns train_dict/sampled_dict: a dictionary containing a subset of training data """ if opt.ws_name in ['S1', 'S15', 'S156']: sub_list = [int(opt.ws_name[i]) for i in range(1, len(opt.ws_name))] keys_to_delete = [] for key in train_dict.keys(): if key[0] not in sub_list: keys_to_delete.append(key) for key in keys_to_delete: del train_dict[key] return train_dict elif opt.ws_name in ['0.001S1','0.01S1', '0.05S1', '0.1S1', '0.5S1']: ratio = float(opt.ws_name.split('S')[0]) # randomly sample a portion of 3D data sampled_dict = {} for key in train_dict.keys(): if key[0] != 1: continue total = len(train_dict[key]) sampled_num = int(ratio*total) chosen_indices = np.random.choice(total, sampled_num, replace=False) sampled_dict[key] = train_dict[key][chosen_indices].copy() return sampled_dict else: raise ValueError('Unknown experiment setting.') def get_train_dict_3d(opt): """ Get the training 3d skeletons as a Python dictionary. Args opt: experiment options Returns train_dict_3d: a dictionary containing training 3d poses """ if not opt.train: return None dict_path = os.path.join(opt.data_dir, 'threeDPose_train.npy') #=========================================================================# # For real 2D detections, the down-sampling and data augmentation # are done later in get_train_dict_2d if opt.twoD_source != 'synthetic': train_dict_3d = np.load(dict_path, allow_pickle=True).item() return train_dict_3d #=========================================================================# # For synthetic 2D detections (For Protocol P1*), the down-sampling is # performed here and the data augmentation is assumed to be already done if opt.evolved_path is not None: # the data is pre-augmented train_dict_3d = np.load(opt.evolved_path, allow_pickle=True).item() elif opt.ws: # raw training data from Human 3.6M (S15678) # Down-sample the raw data to simulate an environment with scarce # training data, which is used in weakly-supervised experiments train_dict_3d = np.load(dict_path, allow_pickle=True).item() train_dict_3d = down_sample_training_data(train_dict_3d, opt) else: # raw training data from Human 3.6M (S15678) train_dict_3d = np.load(dict_path, allow_pickle=True).item() return train_dict_3d def get_test_dict_3d(opt): """ Get the testing 3d skeletons as a Python dictionary. Args opt: experiment options Returns test_dict_3d: a dictionary containing testing 3d poses """ if opt.test_source == 'h36m': # for h36m dict_path = os.path.join(opt.data_dir, 'threeDPose_test.npy') test_dict_3d = np.load(dict_path, allow_pickle=True).item() else: raise NotImplementedError return test_dict_3d def get_dict_2d(train_dict_3d, test_dict_3d, rcams, ncams, opt): """ Prepare 2D training and testing data as Python dictionaries. Args train_dict_3d: dictionary containing training 3d poses test_dict_3d: dictionary containing testing 3d poses rcams: camera parameters ncams: number of camera to use opt: experiment options Returns train_dict_2d: a dictionary containing training 2d poses test_dict_2d: a dictionary containing testing 2d poses train_dict_3d: the dictionary containing training 3d poses, which may be updated """ if opt.twoD_source == 'synthetic': # project the 3D key-points to 2D ones # This type of key-points is used to validate the performance of # 2D-to-3D networks and the noise of 2D key-point detector is ignored. # In fact, these 2D key-points are used as ground-truth to train the # first stage of TAG-net. if opt.virtual_cams: ncams *= 2 if opt.train: train_dict_2d = project_to_cameras(train_dict_3d, rcams, ncams=ncams) else: train_dict_2d = None test_dict_2d = project_to_cameras(test_dict_3d, rcams, ncams=ncams) elif opt.twoD_source == 'HRN': # The 2D key-point detections obtained by the heatmap regression model. # The model uses high-resolution net as backbone and pixel-shuffle super-resolution # to regress high-resolution heatmaps. if opt.train: train_dict_2d = np.load(os.path.join(opt.data_dir, 'twoDPose_HRN_train.npy'), allow_pickle=True).item() else: train_dict_2d = None test_dict_2d = np.load(os.path.join(opt.data_dir, 'twoDPose_HRN_test.npy'), allow_pickle=True).item() def delete(dic, actions): keys_to_delete = [] for key in dic.keys(): sub, act, name = key if act not in actions: keys_to_delete.append(key) for key in keys_to_delete: del dic[key] return dic def replace(dic, temp): for key in dic.keys(): sub, act, name = key temp_key = (sub, act, name[:-3]) synthetic = temp[temp_key] assert len(dic[key]) == len(synthetic) indices = np.random.choice(len(synthetic), int(0.5*len(synthetic)), replace=False) dic[key][indices] = synthetic[indices].copy() return dic # # weakly-supervised experiment def remove_keys(dic, name_list): keys_to_delete = [] for key in dic.keys(): if key[0] not in name_list: keys_to_delete.append(key) for key in keys_to_delete: del dic[key] return dic # down-sample the data for weakly-supervised experiment if opt.ws and opt.ws_name in ['S1', 'S15', 'S156']: sub_list = [int(opt.ws_name[i]) for i in range(1, len(opt.ws_name))] remove_keys(train_dict_3d, sub_list) if train_dict_2d is not None: remove_keys(train_dict_2d, sub_list) # data augmentation with evolved data if opt.evolved_path is not None: evolved_dict_3d = np.load(opt.evolved_path, allow_pickle=True).item() evolved_dict_2d = project_to_cameras(evolved_dict_3d, rcams, ncams=ncams) # combine the synthetic 2D-3D pair with the real 2D-3D pair train_dict_3d = {**train_dict_3d, **evolved_dict_3d} train_dict_2d = {**train_dict_2d, **evolved_dict_2d} return train_dict_2d, test_dict_2d, train_dict_3d def prepare_data_dict(rcams, opt, ncams=4, predict_14=False, use_nose=True ): """ Prepare 2D and 3D data as Python dictionaries. Args rcams: camera parameters opt: experiment options ncams: number of camera to use predict_14: whether to predict 14 joints or not use_nose: whether to use nose joint or not Returns data_dic: a dictionary containing training and testing data data_stats: statistics computed from training data """ assert opt.twoD_source in ['synthetic', 'HRN'], 'Unknown 2D key-point type.' data_dic = {} # get 3D skeleton data train_dict_3d = get_train_dict_3d(opt) test_dict_3d = get_test_dict_3d(opt) # get 2D key-point data train_dict_2d, test_dict_2d, train_dict_3d = get_dict_2d(train_dict_3d, test_dict_3d, rcams, ncams, opt ) # compute normalization statistics and normalize the 2D data if opt.train: complete_train_2d = copy.deepcopy(np.vstack(list(train_dict_2d.values()))) data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = \ normalization_stats(complete_train_2d, dim=2, norm_twoD=opt.norm_twoD, use_nose=use_nose ) data_dic['train_set_2d'] = normalize_data(train_dict_2d, data_mean_2d, data_std_2d, dim_to_use_2d, norm_single=opt.norm_single ) else: _, data_stats = load_ckpt(opt) data_mean_2d, data_std_2d = data_stats['mean_2d'], data_stats['std_2d'] dim_to_use_2d = data_stats['dim_use_2d'] data_dic['test_set_2d'] = normalize_data(test_dict_2d, data_mean_2d, data_std_2d, dim_to_use_2d, norm_single=opt.norm_single ) # The 3D joint position is represented in the world coordinate, # which is converted to camera coordinate system as the regression target if opt.train: train_dict_3d = transform_world_to_camera(train_dict_3d, rcams, ncams=ncams) # apply 3d post-processing (centering around root) train_dict_3d, train_root_positions = postprocess_3d(train_dict_3d) test_dict_3d = transform_world_to_camera(test_dict_3d, rcams, ncams=ncams) test_dict_3d, test_root_positions = postprocess_3d(test_dict_3d) if opt.train: # compute normalization statistics and normalize the 3D data complete_train_3d = copy.deepcopy(np.vstack(list(train_dict_3d.values()))) data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d =\ normalization_stats(complete_train_3d, dim=3, predict_14=predict_14) data_dic['train_set_3d'] = normalize_data(train_dict_3d, data_mean_3d, data_std_3d, dim_to_use_3d ) # some joints are not used during training dim_use_2d = list_remove([i for i in range(len(data_mean_2d))], list(dim_to_ignore_2d)) dim_use_3d = list_remove([i for i in range(len(data_mean_3d))], list(dim_to_ignore_3d)) # assemble a dictionary for data statistics data_stats = {'mean_2d':data_mean_2d, 'std_2d':data_std_2d, 'mean_3d':data_mean_3d, 'std_3d':data_std_3d, 'dim_ignore_2d':dim_to_ignore_2d, 'dim_ignore_3d':dim_to_ignore_3d, 'dim_use_2d':dim_use_2d, 'dim_use_3d':dim_use_3d } else: data_mean_3d, data_std_3d = data_stats['mean_3d'], data_stats['std_3d'] dim_to_use_3d = data_stats['dim_use_3d'] data_dic['test_set_3d'] = normalize_data(test_dict_3d, data_mean_3d, data_std_3d, dim_to_use_3d ) return data_dic, data_stats def select_action(dic_2d, dic_3d, action, twoD_source): """ Construct sub-dictionaries by specifying which action to use Args dic_2d: dictionary containing 2d poses dic_3d: dictionary containing 3d poses action: the action to use twoD_source: how the key-points are generated (synthetic or real) Returns dic_2d_action: sub-dictionary containing 2d poses for the specified action dic_3d_action: sub-dictionary containing 3d poses for the specified action """ dic_2d_action = {} dic_3d_action = {} for key in dic_2d.keys(): if key[1] == action: dic_2d_action[key] = dic_2d[key].copy() if twoD_source == 'synthetic': key3d = key else: key3d = (key[0], key[1], key[2][:-3]) dic_3d_action[key3d] = dic_3d[key3d].copy() return dic_2d_action, dic_3d_action def split_action(dic_2d, dic_3d, actions, camera_frame, opt, input_size, output_size): """ Generate a list of datasets for each action. Args dic_2d: dictionary containing 2d poses dic_3d: dictionary containing 3d poses actions: list of defined actions camera_frame: use camera coordinate system opt: experiment options input_size: input vector length output_size: output vector length Returns action_dataset_list: a list of datasets where each element correspond to one action """ action_dataset_list = [] for act_id in range(len(actions)): action = actions[act_id] dic_2d_action, dic_3d_action = select_action(dic_2d, dic_3d, action, opt.twoD_source) eval_input, eval_output = get_all_data(dic_2d_action, dic_3d_action, camera_frame, norm_twoD=opt.norm_twoD, input_size=input_size, output_size=output_size) action_dataset = dataset.PoseDataset(eval_input, eval_output, 'eval', action_name=action, refine_3d=opt.refine_3d) action_dataset_list.append(action_dataset) return action_dataset_list def normalization_stats(complete_data, dim, predict_14=False, norm_twoD=False, use_nose=False ): """ Computes normalization statistics: mean and stdev, dimensions used and ignored Args complete_data: nxd np array with poses dim. integer={2,3} dimensionality of the data predict_14. boolean. Whether to use only 14 joints use_nose: whether to use nose or not Returns data_mean: np vector with the mean of the data data_std: np vector with the standard deviation of the data dimensions_to_ignore: list of dimensions not used in the model dimensions_to_use: list of dimensions used in the model """ if not dim in [2,3]: raise(ValueError, 'dim must be 2 or 3') data_mean = np.mean(complete_data, axis=0) data_std = np.std(complete_data, axis=0) # Encodes which 17 (or 14) 2d-3d pairs we are predicting dimensions_to_ignore = [] if dim == 2: if not use_nose: dimensions_to_use = np.where(np.array([x != '' and x != 'Neck/Nose' for x in H36M_NAMES]))[0] else: dimensions_to_use = np.where(np.array([x != '' for x in H36M_NAMES]))[0] if norm_twoD: dimensions_to_use = np.delete(dimensions_to_use, 0) dimensions_to_use = np.sort(np.hstack((dimensions_to_use*2, dimensions_to_use*2+1))) dimensions_to_ignore = np.delete(np.arange(len(H36M_NAMES)*2), dimensions_to_use) else: dimensions_to_use = np.where(np.array([x != '' for x in H36M_NAMES]))[0] # hip is deleted # spine and neck are also deleted if predict_14 dimensions_to_use = np.delete(dimensions_to_use, [0,7,9] if predict_14 else 0) dimensions_to_use = np.sort(np.hstack((dimensions_to_use*3, dimensions_to_use*3+1, dimensions_to_use*3+2))) dimensions_to_ignore = np.delete(np.arange(len(H36M_NAMES)*3), dimensions_to_use) return data_mean, data_std, dimensions_to_ignore, dimensions_to_use def transform_world_to_camera(poses_set, cams, ncams=4): """ Transform 3d poses from world coordinate to camera coordinate system Args poses_set: dictionary with 3d poses cams: dictionary with cameras ncams: number of cameras per subject Return: t3d_camera: dictionary with 3d poses in camera coordinate """ t3d_camera = {} for t3dk in sorted(poses_set.keys()): subj, action, seqname = t3dk t3d_world = poses_set[t3dk] for c in range(ncams): R, T, f, c, k, p, name = cams[(subj, c+1)] camera_coord = cameras.world_to_camera_frame(np.reshape(t3d_world, [-1, 3]), R, T) camera_coord = np.reshape(camera_coord, [-1, len(H36M_NAMES)*3]) sname = seqname[:-3]+"."+name+".h5" # e.g.: Waiting 1.58860488.h5 t3d_camera[(subj, action, sname)] = camera_coord return t3d_camera def normalize_data(data, data_mean, data_std, dim_to_use, norm_single=False): """ Normalizes a dictionary of poses Args data: dictionary where values are data_mean: np vector with the mean of the data data_std: np vector with the standard deviation of the data dim_to_use: list of dimensions to keep in the data norm_single: whether to perform normalization independently for each sample Returns data_out: dictionary with same keys as data, but values have been normalized """ data_out = {} for key in data.keys(): data[ key ] = data[ key ][ :, dim_to_use ] if norm_single: # does not use statistics over the whole dataset temp = data[key] temp = temp.reshape(len(temp), -1, 2) mean_x = np.mean(temp[:,:,0], axis=1).reshape(len(temp), 1) std_x = np.std(temp[:,:,0], axis=1) mean_y = np.mean(temp[:,:,1], axis=1).reshape(len(temp), 1) std_y = np.std(temp[:,:,1], axis=1) denominator = (0.5*(std_x + std_y)).reshape(len(std_x), 1) temp[:,:,0] = (temp[:,:,0] - mean_x)/denominator temp[:,:,1] = (temp[:,:,1] - mean_y)/denominator data_out[key] = temp.reshape(len(temp), -1) else: mu = data_mean[dim_to_use] stddev = data_std[dim_to_use] data_out[ key ] = np.divide( (data[key] - mu), stddev ) return data_out def unNormalizeData(normalized_data, data_mean, data_std, dimensions_to_ignore): """ Un-normalizes a matrix whose mean has been substracted and that has been divided by standard deviation. Some dimensions might also be missing. Args normalized_data: nxd matrix to unnormalize data_mean: np vector with the mean of the data data_std: np vector with the standard deviation of the data dimensions_to_ignore: list of dimensions that were removed from the original data Returns orig_data: the unnormalized data """ T = normalized_data.shape[0] # batch size D = data_mean.shape[0] # dimensionality orig_data = np.zeros((T, D), dtype=np.float32) dimensions_to_use = np.array([dim for dim in range(D) if dim not in dimensions_to_ignore]) orig_data[:, dimensions_to_use] = normalized_data # multiply times stdev and add the mean stdMat = data_std.reshape((1, D)) stdMat = np.repeat(stdMat, T, axis=0) meanMat = data_mean.reshape((1, D)) meanMat = np.repeat(meanMat, T, axis=0) orig_data = np.multiply(orig_data, stdMat) + meanMat return orig_data def define_actions(action): """ Given an action string, returns a list of corresponding actions. Args action: String. either "all" or one of the h36m actions Returns actions: List of strings. Actions to use. Raises ValueError: if the action is not a valid action in Human 3.6M """ actions = ["Directions", "Discussion", "Eating", "Greeting", "Phoning", "Photo", "Posing", "Purchases", "Sitting", "SittingDown", "Smoking", "Waiting", "WalkDog", "Walking", "WalkTogether" ] if action == "All" or action == "all": return actions if not action in actions: raise( ValueError, "Unrecognized action: %s" % action ) return [action] def project_to_cameras(poses_set, cams, ncams=4): """ Project 3d poses using camera parameters Args poses_set: dictionary containing 3d poses cams: dictionary containing camera parameters ncams: number of cameras per subject Returns t2d: dictionary with 2d poses """ t2d = {} for t3dk in sorted(poses_set.keys()): subj, a, seqname = t3dk t3d = poses_set[t3dk] for cam in range(ncams): R, T, f, c, k, p, name = cams[(subj, cam+1)] pts2d, _, _, _, _ = cameras.project_point_radial(np.reshape(t3d, [-1, 3]), R, T, f, c, k, p) pts2d = np.reshape(pts2d, [-1, len(H36M_NAMES)*2]) sname = seqname[:-3] + "." + name + ".h5" # e.g.: Waiting 1.58860488.h5 t2d[ (subj, a, sname) ] = pts2d return t2d def postprocess_3d(poses_set): """ Center 3d points around root Args poses_set: dictionary with 3d data Returns poses_set: dictionary with 3d data centred around root (center hip) joint root_positions: dictionary with the original 3d position of each pose """ root_positions = {} for k in poses_set.keys(): # Keep track of the global position root_positions[k] = copy.deepcopy(poses_set[k][:,:3]) # Remove the root from the 3d position poses = poses_set[k] poses = poses - np.tile( poses[:,:3], [1, len(H36M_NAMES)] ) poses_set[k] = poses return poses_set, root_positions def postprocess_2d(poses_set): """ Center 2d points around root Args poses_set: dictionary with 2d data Returns poses_set: dictionary with 2d data centred around root (center hip) joint root_positions: dictionary with the original 2d position of each pose """ root_positions = {} for k in poses_set.keys(): # Keep track of the global position root_positions[k] = copy.deepcopy(poses_set[k][:,:2]) # Remove the root from the 3d position poses = poses_set[k] poses = poses - np.tile( poses[:,:2], [1, len(H36M_NAMES)] ) poses_set[k] = poses return poses_set, root_positions def get_all_data(data_x, data_y, camera_frame, norm_twoD=False, input_size=32, output_size=48 ): """ Obtain numpy arrays for network inputs/outputs Args data_x: dictionary with 2d inputs data_y: dictionary with 3d expected outputs camera_frame: whether the 3d data is in camera coordinates input_size: input vector length for each sample output_size: output vector length for each sample Returns encoder_inputs: numpy array for the input data decoder_outputs: numpy array for the output data """ if norm_twoD: input_size -= 2 # Figure out how many frames we have n = 0 for key2d in data_x.keys(): n2d, _ = data_x[ key2d ].shape n = n + n2d encoder_inputs = np.zeros((n, input_size), dtype=np.float32) decoder_outputs = np.zeros((n, output_size), dtype=np.float32) # Put all the data into big arrays idx = 0 for key2d in data_x.keys(): (subj, b, fname) = key2d # keys should be the same if 3d is in camera coordinates key3d = key2d if (camera_frame) else (subj, b, '{0}.h5'.format(fname.split('.')[0])) # '-sh' suffix means detected key-points are used key3d = (subj, b, fname[:-3]) if fname.endswith('-sh') and camera_frame else key3d n2d, _ = data_x[ key2d ].shape encoder_inputs[idx:idx+n2d, :] = data_x[ key2d ] decoder_outputs[idx:idx+n2d, :] = data_y[ key3d ] idx = idx + n2d return encoder_inputs, decoder_outputs def prepare_dataset(opt): """ Prepare PyTorch dataset objects used for training 2D-to-3D deep network Args opt: experiment options Returns train_dataset: training dataset as PyTorch dataset object eval_dataset: evaluation dataset as PyTorch dataset object data_stats: dataset statistics computed from the training dataset action_eval_list: a list of evaluation dataset objects where each corresponds to one action """ # get relevant paths data_dir = opt.data_dir cameras_path = os.path.join(data_dir, 'cameras.npy') # By default, all actions are used actions = define_actions(opt.actions) # load camera parameters to project 3D skeleton rcams = np.load(cameras_path, allow_pickle=True).item() # produce more camera views by adding virtual cameras if needed if opt.virtual_cams: rcams = add_virtual_cams(rcams) # first prepare Python dictionary containing 2D and 3D data data_dic, data_stats = prepare_data_dict(rcams, opt, predict_14=False ) input_size = len(data_stats['dim_use_2d']) output_size = len(data_stats['dim_use_3d']) if opt.train: # convert Python dictionary to numpy array train_input, train_output = get_all_data(data_dic['train_set_2d'], data_dic['train_set_3d'], camera_frame, norm_twoD=opt.norm_twoD, input_size=input_size, output_size=output_size ) # The Numpy arrays are finally used to initialize the dataset objects train_dataset = dataset.PoseDataset(train_input, train_output, 'train', refine_3d = opt.refine_3d ) else: train_dataset = None eval_input, eval_output = get_all_data(data_dic['test_set_2d'], data_dic['test_set_3d'], camera_frame, norm_twoD=opt.norm_twoD, input_size=input_size, output_size=output_size ) eval_dataset = dataset.PoseDataset(eval_input, eval_output, 'eval', refine_3d = opt.refine_3d ) # Create a list of dataset objects for action-wise evaluation action_eval_list = split_action(data_dic['test_set_2d'], data_dic['test_set_3d'], actions, camera_frame, opt, input_size=input_size, output_size=output_size ) return train_dataset, eval_dataset, data_stats, action_eval_list ================================================ FILE: libs/dataset/h36m/h36m_pose.py ================================================ #!/usr/bin/env python3 # -*- coding: utf-8 -*- # ------------------------------------------------------------------------------ # Copyright (c) Microsoft # Licensed under the MIT License. # Written by Bin Xiao (Bin.Xiao@microsoft.com) # Modified by Shichao Li (nicholas.li@connect.ust.hk) # ------------------------------------------------------------------------------ import numpy as np import copy import torch import cv2 import random from libs.dataset.h36m.pose_dataset import JointsDataset from libs.hhr.utils.transforms import get_affine_transform from libs.hhr.utils.transforms import affine_transform from libs.hhr.utils.transforms import fliplr_joints import logging logger = logging.getLogger(__name__) ## Human 3.6M dataset class class H36MDataset(JointsDataset): ''' COCO annotation: "keypoints": { 0: "nose", 1: "left_eye", 2: "right_eye", 3: "left_ear", 4: "right_ear", 5: "left_shoulder", 6: "right_shoulder", 7: "left_elbow", 8: "right_elbow", 9: "left_wrist", 10: "right_wrist", 11: "left_hip", 12: "right_hip", 13: "left_knee", 14: "right_knee", 15: "left_ankle", 16: "right_ankle" }, "skeleton": [ [16,14],[14,12],[17,15],[15,13],[12,13],[6,12],[7,13], [6,7],[6,8], [7,9],[8,10],[9,11],[2,3],[1,2],[1,3],[2,4],[3,5],[4,6],[5,7]] H36M annotation: H36M_NAMES[0] = 'Hip' H36M_NAMES[1] = 'RHip' H36M_NAMES[2] = 'RKnee' H36M_NAMES[3] = 'RFoot' H36M_NAMES[4] = 'LHip' H36M_NAMES[5] = 'LKnee' H36M_NAMES[6] = 'LFoot' H36M_NAMES[7] = 'Spine' H36M_NAMES[8] = 'Thorax' H36M_NAMES[9] = 'Neck/Nose' H36M_NAMES[10] = 'Head' H36M_NAMES[11] = 'LShoulder' H36M_NAMES[12] = 'LElbow' H36M_NAMES[13] = 'LWrist' H36M_NAMES[14] = 'RShoulder' H36M_NAMES[15] = 'RElbow' H36M_NAMES[16] = 'RWrist' "skeleton": [ [0,1], [1,2], [2,3], [0,4], [4,5], [5,6], [0,7], [7,8], [8,9], [9,10], [8,11], [11,12], [12,13], [8,14], [14,15], [15,16]] permutation from H36M to COCO: [9, 7, 8, 0, 10, 11, 14, 12, 15, 13, 16, 4, 1, 5, 2, 6, 3] permutation to get back: ''' def __init__(self, cfg, is_train, annot_path, transform=None): super().__init__(cfg, is_train, transform) self.nms_thre = cfg.TEST.NMS_THRE self.image_thre = cfg.TEST.IMAGE_THRE self.soft_nms = cfg.TEST.SOFT_NMS self.oks_thre = cfg.TEST.OKS_THRE self.in_vis_thre = cfg.TEST.IN_VIS_THRE self.image_width = cfg.MODEL.IMAGE_SIZE[0] self.image_height = cfg.MODEL.IMAGE_SIZE[1] self.loss_type = cfg.MODEL.TARGET_TYPE self.aspect_ratio = self.image_width * 1.0 / self.image_height self.pixel_std = 200 # path to pre-processed annotation self.annot_path = annot_path self.num_joints = 17 self.flip_pairs = [[5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]] self.parent_ids = None self.upper_body_ids = (0, 1, 2, 4, 5, 6, 7, 8, 9, 10) self.lower_body_ids = (3, 11, 12, 13, 14, 15, 16) self.joints_weight = np.ones((self.num_joints,1), np.float32) self.joints_weight[[7,8,13,14]] = 1.2 self.joints_weight[[9,10,15,16]] = 1.5 ## permute joint order for fine-tuning purpose self.fine_tune_re_order = [9, 7, 8, 0, 10, 11, 14, 12, 15, 13, 16, 4, 1, 5, 2, 6, 3] self.ratio = float(cfg.MODEL.IMAGE_SIZE[0]/cfg.MODEL.HEATMAP_SIZE[0]) self.db = self._get_db() logging.info('=> total annotation for images: {}'.format(len(self.db))) if is_train and cfg.DATASET.SELECT_DATA: self.db = self.select_data(self.db) logging.info('=> load {} samples'.format(len(self.db))) def _get_db(self): gt_db = np.load(self.annot_path) # permute joints for record in gt_db: record['p_2d'] = record['p_2d'][self.fine_tune_re_order, :] return gt_db def get_weights(self): weights = [] for sample_idx in range(len(self.db)): path = self.db[sample_idx]['path'] if 'S6' in path or 'S8' in path: weights.append(1.5) else: weights.append(1.0) return weights def _box2cs(self, box): x, y, w, h = box[:4] return self._xywh2cs(x, y, w, h) def _xywh2cs(self, x, y, w, h): center = np.zeros((2), dtype=np.float32) center[0] = x + w * 0.5 center[1] = y + h * 0.5 if w > self.aspect_ratio * h: h = w * 1.0 / self.aspect_ratio elif w < self.aspect_ratio * h: w = h * self.aspect_ratio scale = np.array( [w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std], dtype=np.float32) return center, scale def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['path'] # data_numpy = cv2.imread( # image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION # ) # opencv 3 data_numpy = cv2.imread( image_file, 1 | 128 ) if self.color_rgb: data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB) if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) joints = db_rec['p_2d'] joints_original = joints.copy() joints_vis = np.ones(joints.shape, dtype=np.float32) c, s = self._xywh2cs(0, 0, data_numpy.shape[1], data_numpy.shape[0]) score = 1 r = 0 if self.is_train: # do not do half body transform since there is not so much occlusion if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body and np.random.rand() < self.prob_half_body): c_half_body, s_half_body = self.half_body_transform( joints, joints_vis ) if c_half_body is not None and s_half_body is not None: c, s = c_half_body, s_half_body sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ if random.random() <= 0.3 else 0 if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints( joints, joints_vis, data_numpy.shape[1], self.flip_pairs) c[0] = data_numpy.shape[1] - c[0] - 1 trans = get_affine_transform(c, s, r, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: input = self.transform(input) for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) # set joints to in-visible if they are out-side of the image if joints[i, 0] >= self.image_width or joints[i, 1] >= self.image_height: joints_vis[i, 0] = 0.0 target, target_weight = self.generate_target(joints, joints_vis) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) meta = { 'image': image_file, 'joints': joints, 'joints_vis': joints_vis, 'j_original':joints_original, #original coordinates 'center': c, 'scale': s, 'rotation': r, 'score': score, 'trans':trans, 'bbox':[0, 0, data_numpy.shape[1], data_numpy.shape[0]] } return input, target, target_weight, meta def generate_target(self, joints, joints_vis): ''' :param joints: [num_joints, 3] :param joints_vis: [num_joints, 3] :return: target, target_weight(1: visible, 0: invisible) ''' target_weight = np.ones((self.num_joints, 1), dtype=np.float32) target_weight[:, 0] = joints_vis[:, 0] assert self.target_type in ['gaussian', 'coordinate'], \ 'Unsupported target type' if self.target_type == 'gaussian': target = np.zeros((self.num_joints, self.heatmap_size[1], self.heatmap_size[0]), dtype=np.float32) tmp_size = self.sigma * 3 for joint_id in range(self.num_joints): feat_stride = self.image_size / self.heatmap_size mu_x = int(joints[joint_id][0] / feat_stride[0] + 0.5) mu_y = int(joints[joint_id][1] / feat_stride[1] + 0.5) # Check that any part of the gaussian is in-bounds ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)] br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)] if ul[0] >= self.heatmap_size[0] or ul[1] >= self.heatmap_size[1] \ or br[0] < 0 or br[1] < 0: # If not, just return the image as is target_weight[joint_id] = 0 continue # # Generate gaussian size = 2 * tmp_size + 1 x = np.arange(0, size, 1, np.float32) y = x[:, np.newaxis] x0 = y0 = size // 2 # The gaussian is not normalized, we want the center value to equal 1 g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * self.sigma ** 2)) # Usable gaussian range g_x = max(0, -ul[0]), min(br[0], self.heatmap_size[0]) - ul[0] g_y = max(0, -ul[1]), min(br[1], self.heatmap_size[1]) - ul[1] # Image range img_x = max(0, ul[0]), min(br[0], self.heatmap_size[0]) img_y = max(0, ul[1]), min(br[1], self.heatmap_size[1]) v = target_weight[joint_id] if v > 0.5: target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = \ g[g_y[0]:g_y[1], g_x[0]:g_x[1]] elif self.target_type == 'coordinate': target = joints/self.ratio if self.use_different_joints_weight: target_weight = np.multiply(target_weight, self.joints_weight) return target, target_weight ================================================ FILE: libs/dataset/h36m/pose_dataset.py ================================================ # ------------------------------------------------------------------------------ # Copyright (c) Microsoft # Licensed under the MIT License. # Written by Bin Xiao (Bin.Xiao@microsoft.com) # Modified by Shichao Li (nicholas.li@connect.ust.hk) # ------------------------------------------------------------------------------ from __future__ import absolute_import from __future__ import division from __future__ import print_function import copy import logging import random import cv2 import numpy as np import torch from torch.utils.data import Dataset from libs.hhr.utils.transforms import get_affine_transform from libs.hhr.utils.transforms import affine_transform from libs.hhr.utils.transforms import fliplr_joints logger = logging.getLogger(__name__) class JointsDataset(Dataset): def __init__(self, cfg, is_train, root=None, image_set=None, transform=None): self.num_joints = 0 self.pixel_std = 200 self.flip_pairs = [] self.parent_ids = [] self.is_train = is_train self.root = root self.image_set = image_set self.output_path = cfg.OUTPUT_DIR self.data_format = cfg.DATASET.DATA_FORMAT self.scale_factor = cfg.DATASET.SCALE_FACTOR self.rotation_factor = cfg.DATASET.ROT_FACTOR self.flip = cfg.DATASET.FLIP self.num_joints_half_body = cfg.DATASET.NUM_JOINTS_HALF_BODY self.prob_half_body = cfg.DATASET.PROB_HALF_BODY self.color_rgb = cfg.DATASET.COLOR_RGB self.target_type = cfg.MODEL.TARGET_TYPE self.image_size = np.array(cfg.MODEL.IMAGE_SIZE) self.heatmap_size = np.array(cfg.MODEL.HEATMAP_SIZE) self.sigma = cfg.MODEL.SIGMA self.use_different_joints_weight = cfg.LOSS.USE_DIFFERENT_JOINTS_WEIGHT self.joints_weight = 1 self.transform = transform self.db = [] def _get_db(self): raise NotImplementedError def evaluate(self, cfg, preds, output_dir, *args, **kwargs): raise NotImplementedError def half_body_transform(self, joints, joints_vis): upper_joints = [] lower_joints = [] for joint_id in range(self.num_joints): if joints_vis[joint_id][0] > 0: if joint_id in self.upper_body_ids: upper_joints.append(joints[joint_id]) else: lower_joints.append(joints[joint_id]) if np.random.randn() < 0.5 and len(upper_joints) > 2: selected_joints = upper_joints else: selected_joints = lower_joints \ if len(lower_joints) > 2 else upper_joints if len(selected_joints) < 2: return None, None selected_joints = np.array(selected_joints, dtype=np.float32) center = selected_joints.mean(axis=0)[:2] left_top = np.amin(selected_joints, axis=0) right_bottom = np.amax(selected_joints, axis=0) w = right_bottom[0] - left_top[0] h = right_bottom[1] - left_top[1] if w > self.aspect_ratio * h: h = w * 1.0 / self.aspect_ratio elif w < self.aspect_ratio * h: w = h * self.aspect_ratio scale = np.array( [ w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std ], dtype=np.float32 ) scale = scale * 1.5 return center, scale def __len__(self,): return len(self.db) def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['image'] filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION ) else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION ) if self.color_rgb: data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB) if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) joints = db_rec['joints_3d'] joints_vis = db_rec['joints_3d_vis'] c = db_rec['center'] s = db_rec['scale'] score = db_rec['score'] if 'score' in db_rec else 1 r = 0 if self.is_train: if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body and np.random.rand() < self.prob_half_body): c_half_body, s_half_body = self.half_body_transform( joints, joints_vis ) if c_half_body is not None and s_half_body is not None: c, s = c_half_body, s_half_body sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ if random.random() <= 0.6 else 0 if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints( joints, joints_vis, data_numpy.shape[1], self.flip_pairs) c[0] = data_numpy.shape[1] - c[0] - 1 trans = get_affine_transform(c, s, r, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: input = self.transform(input) for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) target, target_weight = self.generate_target(joints, joints_vis) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) meta = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score } return input, target, target_weight, meta def select_data(self, db): db_selected = [] for rec in db: num_vis = 0 joints_x = 0.0 joints_y = 0.0 for joint, joint_vis in zip( rec['joints_3d'], rec['joints_3d_vis']): if joint_vis[0] <= 0: continue num_vis += 1 joints_x += joint[0] joints_y += joint[1] if num_vis == 0: continue joints_x, joints_y = joints_x / num_vis, joints_y / num_vis area = rec['scale'][0] * rec['scale'][1] * (self.pixel_std**2) joints_center = np.array([joints_x, joints_y]) bbox_center = np.array(rec['center']) diff_norm2 = np.linalg.norm((joints_center-bbox_center), 2) ks = np.exp(-1.0*(diff_norm2**2) / ((0.2)**2*2.0*area)) metric = (0.2 / 16) * num_vis + 0.45 - 0.2 / 16 if ks > metric: db_selected.append(rec) logger.info('=> num db: {}'.format(len(db))) logger.info('=> num selected db: {}'.format(len(db_selected))) return db_selected def generate_target(self, joints, joints_vis): ''' :param joints: [num_joints, 3] :param joints_vis: [num_joints, 3] :return: target, target_weight(1: visible, 0: invisible) ''' target_weight = np.ones((self.num_joints, 1), dtype=np.float32) target_weight[:, 0] = joints_vis[:, 0] assert self.target_type == 'gaussian', \ 'Only support gaussian map now!' if self.target_type == 'gaussian': target = np.zeros((self.num_joints, self.heatmap_size[1], self.heatmap_size[0]), dtype=np.float32) tmp_size = self.sigma * 3 for joint_id in range(self.num_joints): feat_stride = self.image_size / self.heatmap_size mu_x = int(joints[joint_id][0] / feat_stride[0] + 0.5) mu_y = int(joints[joint_id][1] / feat_stride[1] + 0.5) # Check that any part of the gaussian is in-bounds ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)] br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)] if ul[0] >= self.heatmap_size[0] or ul[1] >= self.heatmap_size[1] \ or br[0] < 0 or br[1] < 0: # If not, just return the image as is target_weight[joint_id] = 0 continue # # Generate gaussian size = 2 * tmp_size + 1 x = np.arange(0, size, 1, np.float32) y = x[:, np.newaxis] x0 = y0 = size // 2 # The gaussian is not normalized, we want the center value to equal 1 g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * self.sigma ** 2)) # Usable gaussian range g_x = max(0, -ul[0]), min(br[0], self.heatmap_size[0]) - ul[0] g_y = max(0, -ul[1]), min(br[1], self.heatmap_size[1]) - ul[1] # Image range img_x = max(0, ul[0]), min(br[0], self.heatmap_size[0]) img_y = max(0, ul[1]), min(br[1], self.heatmap_size[1]) v = target_weight[joint_id] if v > 0.5: target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = \ g[g_y[0]:g_y[1], g_x[0]:g_x[1]] if self.use_different_joints_weight: target_weight = np.multiply(target_weight, self.joints_weight) return target, target_weight ================================================ FILE: libs/dataset/h36m/pth_dataset.py ================================================ import logging import torch import torch.utils.data import torch.nn.functional as F import numpy as np def normalize(vec): # normalize a numpy vector return (vec-vec.mean())/vec.std() def unNormalizeData(normalized_data, data_mean, data_std, dimensions_to_ignore): """ Un-normalizes a matrix whose mean has been substracted and that has been divided by standard deviation. Some dimensions might also be missing Args normalized_data: nxd matrix to unnormalize data_mean: np vector with the mean of the data data_std: np vector with the standard deviation of the data dimensions_to_ignore: list of dimensions that were removed from the original data Returns orig_data: the input normalized_data, but unnormalized """ T = normalized_data.shape[0] # Batch size D = data_mean.shape[0] # Dimensionality orig_data = np.zeros((T, D), dtype=np.float32) dimensions_to_use = np.array([dim for dim in range(D) if dim not in dimensions_to_ignore]) orig_data[:, dimensions_to_use] = normalized_data # Multiply times stdev and add the mean stdMat = data_std.reshape((1, D)) stdMat = np.repeat(stdMat, T, axis=0) meanMat = data_mean.reshape((1, D)) meanMat = np.repeat(meanMat, T, axis=0) orig_data = np.multiply(orig_data, stdMat) + meanMat return orig_data class PoseDataset(torch.utils.data.Dataset): def __init__(self, array_2d, array_3d, split, action_name=None, refine_3d=False): """ Args: """ self.data_2d = array_2d self.data_3d = array_3d self.num_samples = len(self.data_2d) self.split = split self.action_name = action_name self.refine_3d = refine_3d self.stage_idx = 1 # initialize current estimate 3d pose self.current_estimate = np.zeros(self.data_3d.shape, dtype=np.float32) # initialize the regression target (starts with zero estimate) self.regression_target = self.data_3d.copy() assert len(self.data_2d) == len(self.data_3d) def __len__(self): return self.num_samples def __getitem__(self, idx): if self.refine_3d and self.stage_idx > 1: #return np.concatenate([self.data_2d[idx], self.current_estimate[idx]]), self.regression_target[idx] # normalized version return np.concatenate([self.data_2d[idx], normalize(self.current_estimate[idx])]), self.regression_target[idx] else: return self.data_2d[idx], self.regression_target[idx] def set_stage(self, stage_idx): self.stage_idx = stage_idx return def stage_update(self, model, stats, opt, verbose=False): # update the dataset for cascaded regression model.eval() eval_loader = torch.utils.data.DataLoader(self, batch_size = opt.batch_size, shuffle = False, num_workers = opt.num_threads) # vector to add at last update_vector = [] total_loss = 0 all_distance = np.zeros((0)) for batch_idx, batch in enumerate(eval_loader): data = batch[0] target = batch[1] if opt.cuda: with torch.no_grad(): # move to GPU data, target = data.cuda(), target.cuda() # forward pass to get prediction prediction = model(data) # mean squared loss loss = F.mse_loss(prediction, target, reduction='sum') total_loss += loss.data.item() # compute distance of body joints in un-normalized format unnorm_target = unNormalizeData(target.data.cpu().numpy(), stats['mean_3d'], stats['std_3d'], stats['dim_ignore_3d']) # put the prediction into the update list prediction = prediction.data.cpu().numpy() update_vector.append(prediction) unnorm_pred = unNormalizeData(prediction, stats['mean_3d'], stats['std_3d'], stats['dim_ignore_3d']) # pick the joints that are used dim_use = stats['dim_use_3d'] unnorm_target_use = unnorm_target[:, dim_use] unnorm_target_use = unnorm_target_use.reshape(-1,16,3) unnorm_pred_use = unnorm_pred[:, dim_use] unnorm_pred_use = unnorm_pred_use.reshape(-1,16,3) distance = np.sum((unnorm_target_use - unnorm_pred_use)**2, axis=2) distance = np.mean(np.sqrt(distance), axis=1) all_distance = np.hstack([all_distance, distance]) # update the current estimate and regression target update_vector = np.concatenate(update_vector, axis=0) self.current_estimate += update_vector self.regression_target -= update_vector # report statistics avg_loss = total_loss/(self.num_samples*16*3) avg_distance = all_distance.mean() if verbose: logging.info('Stage update finished.') logging.info('{:s} set: average loss: {:.4f} '.format(self.split, avg_loss)) logging.info('{:s} set: average joint distance: {:.4f} '.format(self.split, avg_distance)) return avg_loss, avg_distance ================================================ FILE: libs/evolution/__init__.py ================================================ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Empty file. """ ================================================ FILE: libs/evolution/genetic.py ================================================ """ Utility functions for genetic evolution. """ import libs.dataset.h36m.cameras as cameras from libs.skeleton.anglelimits import \ to_local, to_global, get_skeleton, to_spherical, \ nt_parent_indices, nt_child_indices, \ is_valid_local, is_valid import matplotlib.pyplot as plt import os import logging import numpy as np import torch from mpl_toolkits.mplot3d import Axes3D from tqdm import tqdm from scipy.spatial.transform import Rotation as R root = "../resources/constraints" # Joints in H3.6M -- data has 32 joints, but only 17 that move H36M_NAMES = ['']*32 H36M_NAMES[0] = 'Hip' H36M_NAMES[1] = 'RHip' H36M_NAMES[2] = 'RKnee' H36M_NAMES[3] = 'RFoot' H36M_NAMES[6] = 'LHip' H36M_NAMES[7] = 'LKnee' H36M_NAMES[8] = 'LFoot' H36M_NAMES[12] = 'Spine' H36M_NAMES[13] = 'Thorax' H36M_NAMES[14] = 'Neck/Nose' H36M_NAMES[15] = 'Head' H36M_NAMES[17] = 'LShoulder' H36M_NAMES[18] = 'LElbow' H36M_NAMES[19] = 'LWrist' H36M_NAMES[25] = 'RShoulder' H36M_NAMES[26] = 'RElbow' H36M_NAMES[27] = 'RWrist' total_joints_num = len(H36M_NAMES) # this dictionary stores the parent indice for each joint # key:value -> child joint index:its parent joint index parent_idx = {1:0, 2:1, 3:2, 6:0, 7:6, 8:7, 12:0, 13:12, 14:13, 15:14, 17:13, 18:17, 19:18, 25:13, 26:25, 27:26 } # this dictionary stores the children indices for each parent joint # key:value -> parent index: joint indices for its children as a list children_idx = { 0: [1, 6], 1: [2], 2: [3], 6: [7], 7: [8], 13: [14, 17, 25], 14: [15], 17: [18], 18:[19], 25: [26], 26:[27] } # used roots for random selection root_joints = [0, 1, 2, 6, 7, 13, 17, 18, 25, 26] # names of the bone vectors attached on the human torso bone_name = { 1: 'thorax to head top', 2: 'left shoulder to left elbow', 3: 'left elbow to left wrist', 4: 'right shoulder to right elbow', 5: 'right elbow to right wrist', 6: 'left hip to left knee', 7: 'left knee to left ankle', 8: 'right hip to right knee', 9: 'right knee to right ankle' } # this dictionary stores the sub-tree rooted at each root joint # key:value->root joint index:list of bone vector indices bone_indices = {0: [5, 6, 7, 8], 1: [7, 8], 2: [8], 6: [5, 6], 7: [6], 13: [1, 2, 3, 4], # thorax 17: [1, 2], 18: [2], 25: [3, 4], 26: [4] } # load template bone lengths that can be used during mutation # you can prepare your own bone length templates to represent # subjects with different size bl_templates = np.load(os.path.join(root, "bones.npy"), allow_pickle=True) # pre-compute the sub-tree joint indices for each joint subtree_indices = {} def get_subtree(joint_idx, children_idx): if joint_idx not in children_idx: return None subtree = set() for child_idx in children_idx[joint_idx]: subtree.add(child_idx) offsprings = get_subtree(child_idx, children_idx) if offsprings is not None: subtree = subtree.union(offsprings) return subtree for joint_idx in range(total_joints_num): if H36M_NAMES[joint_idx] != '': subtree_indices[joint_idx] = get_subtree(joint_idx, children_idx) def swap_bones(bones_father, bones_mother, root_idx): swap_indices = bone_indices[root_idx] temp = bones_father.copy() bones_father[swap_indices] = bones_mother[swap_indices].copy() bones_mother[swap_indices] = temp[swap_indices].copy() del temp return bones_father, bones_mother, swap_indices def get_bone_length(skeleton): """ Compute limb length for a given skeleton. """ bones = skeleton[nt_parent_indices, :] - skeleton[nt_child_indices, :] bone_lengths = to_spherical(bones)[:, 0] return bone_lengths def get_h36m_bone_length(visualize=True): #1: F 5:F 6:M 7:F 8:M 9:M 11:M bl_dic = {1:[], 5:[], 6:[], 7:[], 8:[], 9:[], 11:[]} train_dic = np.load('../data/human3.6M/h36m/numpy/threeDPose_train.npy').item() test_dic = np.load('../data/human3.6M/h36m/numpy/threeDPose_test.npy').item() def process_dic(data_dic, bl_dic, candicate=50): for key in data_dic.keys(): subject = key[0] indices = np.random.choice(len(data_dic[key]), candicate, replace=False) selected = data_dic[key][indices] for pose in selected: bl_dic[subject].append(get_bone_length(pose.reshape(32,3))) return process_dic(train_dic, bl_dic) process_dic(test_dic, bl_dic) for key in bl_dic: for array in bl_dic[key]: array = array.reshape(9,1) bl_dic[key] = np.vstack(bl_dic[key]) if visualize: # as can be observed, only bone length of idx 0 vary a lot. Others are almost fixed # for key in bl_dic.keys(): # fig, axes = plt.subplots(3,3) # plt.title('Subject: '+str(key)) # for row in range(3): # for col in range(3): # axes[row][col].hist(bl_dic[key][:,3*row + col], bins=20) fig, axes = plt.subplots(3,3) all_lengths = np.vstack(list(bl_dic.values())) for row in range(3): for col in range(3): axes[row][col].hist(all_lengths[:,3*row + col]) return bl_dic def get_random_rotation(sigma=60.): angle = np.random.normal(scale=sigma) axis_idx = np.random.choice(3, 1) if axis_idx == 0: r = R.from_euler('xyz', [angle, 0., 0.], degrees=True) elif axis_idx == 1: r = R.from_euler('xyz', [0., angle, 0.], degrees=True) else: r = R.from_euler('xyz', [0., 0., angle], degrees=True) return r def rotate_bone_random(bone, sigma=10.): r = get_random_rotation(sigma) bone_rot = r.as_dcm() @ bone.reshape(3,1) return bone_rot.reshape(3) def rotate_pose_random(pose=None, sigma=60.): # pose shape: [n_joints, 3] if pose is None: result = None else: r = get_random_rotation() pose = pose.reshape(32, 3) # rotate around hip hip = pose[0].reshape(1, 3) relative_pose = pose - hip rotated = r.as_dcm() @ relative_pose.T result = rotated.T + hip return result def re_order(skeleton): # the ordering of coordinate used by the Prior was x,z and y return skeleton[:, [0,2,1]] def set_z(pose, target): if pose is None: return None original_shape = pose.shape pose = pose.reshape(32, 3) min_val = pose[:, 2].min() pose[:, 2] -= min_val - target return pose.reshape(original_shape) def modify_pose(skeleton, local_bones, bone_length, ro=False): # get a new pose by modify an existing pose with input local bone vectors # and bone lengths new_bones = to_global(skeleton, local_bones)['bg'] new_pose = get_skeleton(new_bones, skeleton, bone_length=bone_length) if ro: new_pose = re_order(new_pose) return new_pose.reshape(-1) def exploration(father, mother, opt, post_processing=True): """ Produce novel data by exploring the data space with evolutionary operators. cross over operator in the local coordinate system mutation: perturb the local joint angle """ # get local coordinate for each bone vector father = re_order(father.reshape(total_joints_num, -1)) father_bone_length = get_bone_length(father) mother = re_order(mother.reshape(total_joints_num, -1)) mother_bone_length = get_bone_length(mother) bones_father = to_local(father) bones_mother = to_local(mother) if opt.CV: # crossover: exchange random sub-trees of two kinematic trees root_idx = np.random.randint(0, len(root_joints)) root_selected = root_joints[root_idx] bones_father, bones_mother, indices = swap_bones(bones_father, bones_mother, root_selected) if opt.M: # local mutation: apply random rotation to local limb for bone_idx in indices: if np.random.rand() <= opt.MRL: bones_father[bone_idx] = rotate_bone_random(bones_father[bone_idx], sigma=opt.SDL) bones_mother[bone_idx] = rotate_bone_random(bones_mother[bone_idx], sigma=opt.SDL) son_pose, daughter_pose = None, None if opt.C: # apply joint angle constraint as the fitness function valid_vec_fa = is_valid_local(bones_father) valid_vec_mo = is_valid_local(bones_mother) if not opt.C or valid_vec_fa.sum() >= opt.Th: son_pose = modify_pose(father, bones_father, mother_bone_length, ro=True) if not opt.C or valid_vec_mo.sum() >= opt.Th: daughter_pose = modify_pose(mother, bones_mother, father_bone_length, ro=True) if opt.M: # global mutation: rotate the whole 3D skeleton if np.random.rand() <= opt.MRG: son_pose = rotate_pose_random(son_pose, sigma=opt.SDG) if np.random.rand() <= opt.MRG: daughter_pose = rotate_pose_random(daughter_pose, sigma=opt.SDG) if post_processing: # move the poses to the ground plane set_z(son_pose, np.random.normal(loc=20.0, scale=3.0)) set_z(daughter_pose, np.random.normal(loc=20.0, scale=3.0)) if opt.DE: valid_vec_fa = is_valid_local(bones_father) valid_vec_mo = is_valid_local(bones_mother) # re_order: order back to x, y, z son_pose = modify_pose(father, bones_father, mother_bone_length, ro=True) daughter_pose = modify_pose(mother, bones_mother, father_bone_length, ro=True) # valid_vec_son = is_valid(son_pose) # valid_vec_dau = is_valid(daughter_pose) if opt.DE and opt.V: plt.figure() ax1 = plt.subplot(1,4,1, projection='3d') plt.title('father') show3Dpose(re_order(father), ax1, add_labels=False, plot_dot=True) ax2 = plt.subplot(1,4,2, projection='3d') plt.title('mother') show3Dpose(re_order(mother), ax2, add_labels=False, plot_dot=True) ax3 = plt.subplot(1,4,3, projection='3d') plt.title('son: ' + str(valid_vec_fa.sum())) show3Dpose(son_pose, ax3, add_labels=False, plot_dot=True) ax4 = plt.subplot(1,4,4, projection='3d') plt.title('daughter: ' + str(valid_vec_mo.sum())) show3Dpose(daughter_pose, ax4, add_labels=False, plot_dot=True) plt.tight_layout() return son_pose, daughter_pose def show3Dpose(channels, ax, lcolor="#3498db", rcolor="#e74c3c", add_labels=True, gt=False, pred=False, plot_dot=False ): # blue, orange """ Visualize a 3d skeleton Args channels: 96x1 vector. The pose to plot. ax: matplotlib 3d axis to draw on lcolor: color for left part of the body rcolor: color for right part of the body add_labels: whether to add coordinate labels Returns Nothing. Draws on ax. """ if channels.shape[0] == 96: vals = np.reshape( channels, (32, -1) ) else: vals = channels I = np.array([1,2,3,1,7,8,1, 13,14,15,14,18,19,14,26,27])-1 # start points J = np.array([2,3,4,7,8,9,13,14,15,16,18,19,20,26,27,28])-1 # end points LR = np.array([1,1,1,0,0,0,0, 0, 0, 0, 0, 0, 0, 1, 1, 1], dtype=bool) dim_use_3d = [3, 4, 5, 6, 7, 8, 9, 10, 11, 18, 19, 20, 21, 22, 23, 24, 25, 26, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53, 54, 55, 56, 57, 58, 59, 75, 76, 77, 78, 79, 80, 81, 82, 83] # Make connection matrix for i in np.arange( len(I) ): x, y, z = [np.array( [vals[I[i], j], vals[J[i], j]] ) for j in range(3)] if gt: ax.plot(x,y, z, lw=4, c='k') # ax.plot(x,y, z, lw=2, c='k') elif pred: ax.plot(x,z, -y, lw=4, c='r') # ax.plot(x,y, z, lw=2, c='r') else: # ax.plot(x,z, -y, lw=2, c=lcolor if LR[i] else rcolor) ax.plot(x,y, z, lw=4, c=lcolor if LR[i] else rcolor) if plot_dot: joints = channels.reshape(96) joints = joints[dim_use_3d].reshape(16,3) ax.scatter(joints[:,0], joints[:,1], joints[:,2], c='k', marker='o') RADIUS = 750 # space around the subject xroot, yroot, zroot = vals[0,0], vals[0,1], vals[0,2] ax.set_xlim3d([-RADIUS+xroot, RADIUS+xroot]) ax.set_zlim3d([-RADIUS+zroot, RADIUS+zroot]) ax.set_ylim3d([-RADIUS+yroot, RADIUS+yroot]) if add_labels: ax.set_xlabel("x") ax.set_ylabel("y") ax.set_zlabel("z") ax.set_aspect('equal') # ax.set_xticks([]) # ax.set_yticks([]) # ax.set_zticks([]) # ax.get_xaxis().set_ticklabels([]) # ax.get_yaxis().set_ticklabels([]) # ax.set_zticklabels([]) # Get rid of the panes (actually, make them white) # white = (1.0, 1.0, 1.0, 0.0) # ax.w_xaxis.set_pane_color(white) # ax.w_yaxis.set_pane_color(white) # Keep z pane # Get rid of the lines in 3d # ax.w_xaxis.line.set_color(white) # ax.w_yaxis.line.set_color(white) # ax.w_zaxis.line.set_color(white) ax.view_init(10, -60) def choose_best(population, fraction = 0.02, method='random'): """ Choose the best candidates to produce descendents. """ if method == 'random': # this is a simple implementation by random sampling num_total = len(population) num_to_choose = int(fraction*num_total) chosen_indices = np.random.choice(num_total, num_to_choose*2, replace=False) father_indices = chosen_indices[:num_to_choose] mother_indices = chosen_indices[num_to_choose:] else: raise NotImplementedError return father_indices, mother_indices def project_to_cameras(poses, cams): """ Project 3d poses using camera parameters input: 3D poses: [n_pose, pose length] cams: list of camera parameters return: list of 2D projections for each camera cams: """ p_2d = [] for cam in cams: R, T, f, c, k, p, name = cam pts2d, _, _, _, _ = cameras.project_point_radial(np.reshape(poses, [-1, 3]), R, T, f, c, k, p ) p_2d.append(np.reshape( pts2d, [-1, len(H36M_NAMES)*2])) return p_2d def transform_world_to_camera(poses, cams): """ Project 3d poses from world coordinate to camera coordinate system return: list of 3D poses in camera coordinate systems """ p_3d_cam = [] for cam in cams: R, T, f, c, k, p, name = cam camera_coord = cameras.world_to_camera_frame( np.reshape(poses, [-1, 3]), R, T) camera_coord = np.reshape( camera_coord, [-1, len(H36M_NAMES)*3] ) p_3d_cam.append(camera_coord) return p_3d_cam def normalize(data, mean=None, std=None): if mean is not None and std is not None: pass elif mean is None and std is None: mean = np.mean(data, axis=0).reshape(1, data.shape[1]) std = np.std(data, axis=0).reshape(1, data.shape[1]) else: raise ValueError return (data-mean)/std def unnormalize(data, mean, std): return (data*std) + mean def postprocess_3d( poses): return poses - np.tile( poses[:,:3], [1, len(H36M_NAMES)] ) def calc_errors(pred_poses, gt_poses, protocol='mpjpe'): # error after a regid alignment, corresponding to protocol #2 in the paper # Compute Euclidean distance error per joint sqerr = (pred_poses - gt_poses)**2 # Squared error between prediction and expected output sqerr = sqerr.reshape(len(sqerr), -1, 3) sqerr = np.sqrt(sqerr.sum(axis=2)) if protocol == 'mpjpe': ret = sqerr.mean(axis=1) ret = ret.reshape(len(ret), 1) else: raise NotImplementedError return ret def to_numpy(tensor): return tensor.data.cpu().numpy() def get_prediction(cascade, data): data = torch.from_numpy(data.astype(np.float32)) if torch.cuda.is_available(): data = data.cuda() # forward pass to get prediction for the first stage prediction = cascade[0](data) # prediction for later stages for stage_idx in range(1, len(cascade)): prediction += cascade[stage_idx](data) return prediction def get_score(model_file, candidates): """ Obtain model inference errors for the candidates. """ cams = model_file['cams'] stats = model_file['stats'] model = model_file['model'] if torch.cuda.is_available(): model = model.cuda() # project to 2D keypoints p_2d = project_to_cameras(candidates, cams) # convert to camera coordinate p_3d_cam = transform_world_to_camera(candidates, cams) # re-center relative to the hip for idx in range(len(p_3d_cam)): p_3d_cam[idx] = postprocess_3d(p_3d_cam[idx]) # normalize the inputs dim_use_2d = stats['dim_use_2d'] dim_use_3d = stats['dim_use_3d'] mean_2d = stats['mean_2d'][dim_use_2d] std_2d = stats['std_2d'][dim_use_2d] mean_3d = stats['mean_3d'][dim_use_3d] std_3d = stats['std_3d'][dim_use_3d] for idx in range(len(p_2d)): p_2d[idx] = p_2d[idx][:, dim_use_2d] p_2d[idx] = normalize(p_2d[idx], mean_2d, std_2d) # get output and calculate errors output = [] for idx in range(len(p_2d)): prediction = to_numpy(get_prediction(model, p_2d[idx])) # unnormalize the prediction prediction = unnormalize(prediction, mean_3d, std_3d) output.append(prediction) errors = [] for idx in range(len(output)): gt_poses = p_3d_cam[idx][:, dim_use_3d] errors.append(calc_errors(output[idx], gt_poses)) all_errors = np.concatenate(errors, axis = 1) # mean error for all the cameras mean_errors = all_errors.mean(axis = 1) return mean_errors def active_select(model_file, candidates, ratio): """ Actively select candidates that cause the model to fail. """ scores = get_score(model_file, candidates) indices = np.argsort(scores) # error from low to high indices = indices[-int(ratio*len(candidates)):] mean_error = scores[indices].mean() return candidates[indices], mean_error def cast_to_float(dic, dtype=np.float32): # cast to float 32 for space saving for key in dic.keys(): dic[key] = dic[key].astype(dtype) return dic def xyz2spherical(xyz): # convert cartesian coordinate to spherical coordinate # return in r, phi, and theta (elevation angle from z axis down) return_value = np.zeros(xyz.shape, dtype=xyz.dtype) xy = xyz[:,:,0]**2 + xyz[:,:,1]**2 return_value[:,:,0] = np.sqrt(xy + xyz[:,:,2]**2) # r return_value[:,:,1] = np.arctan2(np.sqrt(xy), xyz[:,:,2]) # phi return_value[:,:,2] = np.arctan2(xyz[:,:,1], xyz[:,:,0]) #theta return return_value def spherical2xyz(rphitheta): return_value = np.zeros(rphitheta.shape, dtype=rphitheta.dtype) sinphi = np.sin(rphitheta[:,:,1]) cosphi = np.cos(rphitheta[:,:,1]) sintheta = np.sin(rphitheta[:,:,2]) costheta = np.cos(rphitheta[:,:,2]) return_value[:,:,0] = rphitheta[:,:,0]*sinphi*costheta # x return_value[:,:,1] = rphitheta[:,:,0]*sinphi*sintheta # y return_value[:,:,2] = rphitheta[:,:,0]*cosphi #z return return_value # global variables parent_idx = [0, 6, 7, \ 0, 1, 2, \ 0, 12, 13, 14,\ 13, 17, 18,\ 13, 25, 26] child_idx = [6, 7, 8, \ 1, 2, 3, \ 12, 13, 14, 15,\ 17, 18, 19,\ 25, 26, 27] def position_to_angle(skeletons): # transform 3d positions to joint angle representation # first compute the bone vectors # a bone vector is the vector from on parent joint to one child joint # hip->left hip->left knee->left foot, # hip->right hip-> right knee-> right foot # hip -> spine->thorax->nose->head # thorax -> left shoulder->left elbow->left wrist # thorax -> right shoulder-> right elbow->right wrist num_sample = skeletons.shape[0] skeletons = skeletons.reshape(num_sample, -1, 3) parent_joints = skeletons[:, parent_idx, :] child_joints = skeletons[:, child_idx, :] bone_vectors = child_joints - parent_joints # now compute the angles and bone lengths rphitheta = xyz2spherical(bone_vectors) return rphitheta def angle_to_position(rphitheta, skeletons): # transform joint angle representation to 3d positions # starting from the root, create joint one by one according to predefined # hierarchical relation num_sample = skeletons.shape[0] skeletons = skeletons.reshape(num_sample, -1, 3) for bone_idx in range(len(parent_idx)): offset = spherical2xyz(np.expand_dims(rphitheta[:, bone_idx, :], axis=1)) offset = offset[:,0,:] skeletons[:, child_idx[bone_idx], :] = \ skeletons[:, parent_idx[bone_idx], :] + offset return skeletons def mutate_bone_length(population, opt, gen_idx, method='simple'): """ Randomly mutate bone length in a population to increase variation in subject size. For example, H36M only contains adults yet you can modify bone length to represent children. Since the posture and subject size are independent, you can synthetize data for dancing kids for free if you already have data for dancing adults. You only need little prior knowledge on human bone length. """ # the camera parameters in H36M correspond to the five subjects # Rename the synthetic population as these subjects so that the camera # parameters can be used psuedo_subject_names = [1, 5, 6, 7, 8] dict_3d = {} for i in range(len(population)): if np.random.rand() > opt.MBLR: angles = position_to_angle(population[i].reshape(1, -1)) if method == 'simple': # The simplest way is to change to bone length to some value # according to prior knowledge about human bone size. # In our experiment, we collect these values manually from our # interactive visualization tool as well as cross validation. idx = np.random.randint(0, len(bl_templates)) angles[0, :, 0] = bl_templates[idx] population[i] = (angle_to_position(angles, population[i].reshape(1,-1))).reshape(-1) elif method == 'addnoise': # add Gaussian noise to current bone length to obtain new bone length raise ValueError('Deprecated') else: raise NotImplementedError poses_list = np.array_split(population, len(psuedo_subject_names)) for subject_idx in range(len(psuedo_subject_names)): dict_3d[(psuedo_subject_names[subject_idx], 'n/a', 'n/a')] =\ poses_list[subject_idx] save_path = get_save_path(opt, gen_idx) np.save(save_path, cast_to_float(dict_3d)) logging.info('file saved at ' + save_path) return def one_iteration(population, opt, model_file=None): """ Run one iteration to produce the next generation. """ # select the best individuals father_indices, mother_indices = choose_best(population, fraction=opt.F) # produce next generation by evolutionary operators offsprings = [] for idx in tqdm(range(len(father_indices))): son, daughter = exploration(population[father_indices[idx]], population[mother_indices[idx]], opt) if son is not None: offsprings.append(son.reshape(1,-1)) if daughter is not None: offsprings.append(daughter.reshape(1,-1)) offsprings = np.concatenate(offsprings, axis=0) logging.info('{:d} out of {:d} poses survived.'.format(len(offsprings), len(father_indices)*2)) # select the synthetic data actively if opt.A: assert model_file is not None num_before = len(offsprings) offsprings, mean_error = active_select(model_file, offsprings, opt.AR) logging.info('{:d} out of {:d} poses are selected actively with mean'\ 'error {:.2f}'.format(len(offsprings), num_before, mean_error)) if opt.Mer: # merge the offsprings with the parents population = np.vstack([population, offsprings]) else: population = offsprings return population def get_save_path(opt, gen_idx): if opt.WS: save_path = os.path.join(opt.SD, opt.SS, opt.SN) else: save_path = os.path.join(opt.SD, 'S15678', opt.SN) if not os.path.exists(save_path): os.makedirs(save_path) save_path = os.path.join(save_path, 'generation_{:d}.npy'.format(gen_idx)) return save_path def split_and_save(final_poses, parameters, gen_idx): temp_subject_list = [1, 5, 6, 7, 8] train_set_3d = {} poses_list = np.array_split(final_poses, len(temp_subject_list)) for subject_idx in range(len(temp_subject_list)): train_set_3d[(temp_subject_list[subject_idx], 'n/a', 'n/a')] =\ poses_list[subject_idx] save_path = get_save_path(parameters, gen_idx) np.save(save_path, cast_to_float(train_set_3d)) print('file saved at {:s}!'.format(save_path)) return def save_results(poses, opt, gen_idx): # get save path if opt.MBL: mutate_bone_length(poses, opt, gen_idx) else: split_and_save(poses, opt, gen_idx) return def evolution(initial_population, opt, model_file=None): """ Dataset evolution. """ logging.basicConfig(level=logging.INFO, format="[%(asctime)s]: %(message)s" ) population = initial_population save_results(initial_population, opt, 0) initial_num = len(initial_population) for gen_idx in range(1, opt.G+1): population = one_iteration(population, opt, model_file=model_file) save_results(population, opt, gen_idx) # if not enough if opt.E and len(population) < initial_num * opt.T: logging.info('Running extra generations to synthesize enough data...') while len(population) < initial_num * opt.T: gen_idx += 1 logging.info('Generation {:d}...'.format(gen_idx)) population = one_iteration(population, opt, model_file=model_file) if opt.I: save_results(population.copy(), opt, gen_idx) logging.info('Generation {:d} saved.'.format(gen_idx)) save_results(population, opt, gen_idx) logging.info('Final population saved.') return population ================================================ FILE: libs/evolution/parameter.py ================================================ """ Arguments and hyper-parameters used in dataset evolution. """ import argparse def parse_arg(): parser = argparse.ArgumentParser(description='evolve.py') ##-----------------------------------------------------------------------## ## Hyper-parameters # Number of generation to run parser.add_argument('-G', type=int, default=1) # Synthetize enough (E) data with a target ratio after G generations parser.add_argument('-E', type=bool, default=True) parser.add_argument('-T', type=float, default=2.5) # the target ratio # Fraction parser.add_argument('-F', type=float, default=0.05) # Apply mutation on skeleton orientation parser.add_argument('-M', type=bool, default=True) # Apply mutation on bone vector length parser.add_argument('-MBL', type=bool, default=True) # The mutation rate for bone vector length parser.add_argument('-MBLR', type=float, default=0.5) # Mutation rate of changing local limb orientation parser.add_argument('-MRL', type=float, default=0.3) # Mutation rate of changing global skeleton orientation parser.add_argument('-MRG', type=float, default=0.1) # Standrd deviation of Guassian noise (in degrees) for local limb mutation parser.add_argument('-SDL', type=float, default=10.0) # Standrd deviation of Guassian noise for global orientation mutation parser.add_argument('-SDG', type=float, default=30.0) # Select the synthesized data in an active manner parser.add_argument('-A', type=bool, default=False) # The ratio for active selection parser.add_argument('-AR', type=float, default=0.5) # Merge the synthetic data with the initial population parser.add_argument('-Mer', type=bool, default=True) # Apply the crossover operator parser.add_argument('-CV', type=bool, default=True) # Apply constraint to rule out invalid poses parser.add_argument('-C', type=bool, default=True) # Threshold for valid bone vector parser.add_argument('-Th', type=int, default=9) # Visualize the synthetic skeleton during exploring the data space parser.add_argument('-V', type=bool, default=True) # Save the intermediate synthetic data after each generation parser.add_argument('-I', type=bool, default=False) # File name for saving parser.add_argument('-SN', type=str, default='evolved_data') # Sampling string used to down-sample the original data # Examples: ['0.001S1', '0.01S1', '0.05S1', '0.1S1', '0.5S1', 'S1', 'S15', 'S156', 'S15678'] parser.add_argument('-SS', type=str, default='0.01S1') # Down-sample the original data for weakly-supervised experiments parser.add_argument('-WS', type=bool, default=False) # Debug mode parser.add_argument('-DE', type=bool, default=False) ##-----------------------------------------------------------------------## ## paths # path to H36M data parser.add_argument('-data_path', type=str, default="../data/human3.6M/threeDPose_train.npy") # Directory for saving the synthetic data parser.add_argument('-SD', type=str, default='../data/human3.6M/evolved') ##-----------------------------------------------------------------------## ## Usages # Usage: generate data parser.add_argument('-generate', type=bool, default=False) # Usage: visualize data parser.add_argument('-visualize', type=bool, default=False) # Usage: split and save evolved dataset parser.add_argument('-split', type=bool, default=False) parser.add_argument('-split_ratio', type=float, default=0.9) opt = parser.parse_args() return opt ================================================ FILE: libs/hhr/__init__.py ================================================ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Empty file. """ ================================================ FILE: libs/hhr/config/__init__.py ================================================ # ------------------------------------------------------------------------------ # Copyright (c) Microsoft # Licensed under the MIT License. # Written by Bin Xiao (Bin.Xiao@microsoft.com) # ------------------------------------------------------------------------------ from .default import _C as cfg from .default import update_config from .models import MODEL_EXTRAS ================================================ FILE: libs/hhr/config/default.py ================================================ # ------------------------------------------------------------------------------ # Copyright (c) Microsoft # Licensed under the MIT License. # Written by Bin Xiao (Bin.Xiao@microsoft.com) # ------------------------------------------------------------------------------ from __future__ import absolute_import from __future__ import division from __future__ import print_function import os from yacs.config import CfgNode as CN _C = CN() _C.OUTPUT_DIR = '' _C.LOG_DIR = '' _C.DATA_DIR = '' _C.GPUS = (0,) _C.WORKERS = 4 _C.PRINT_FREQ = 20 _C.AUTO_RESUME = False _C.PIN_MEMORY = True _C.RANK = 0 # Cudnn related params _C.CUDNN = CN() _C.CUDNN.BENCHMARK = True _C.CUDNN.DETERMINISTIC = False _C.CUDNN.ENABLED = True # common params for NETWORK _C.MODEL = CN() _C.MODEL.NAME = 'pose_hrnet' _C.MODEL.INIT_WEIGHTS = True _C.MODEL.PRETRAINED = '' _C.MODEL.NUM_JOINTS = 17 _C.MODEL.TAG_PER_JOINT = True _C.MODEL.TARGET_TYPE = 'gaussian' _C.MODEL.IMAGE_SIZE = [256, 256] # width * height, ex: 192 * 256 _C.MODEL.HEATMAP_SIZE = [64, 64] # width * height, ex: 24 * 32 _C.MODEL.SIGMA = 2 _C.MODEL.EXTRA = CN(new_allowed=True) _C.LOSS = CN() _C.LOSS.USE_OHKM = False _C.LOSS.TOPK = 8 _C.LOSS.USE_TARGET_WEIGHT = True _C.LOSS.USE_DIFFERENT_JOINTS_WEIGHT = False # DATASET related params _C.DATASET = CN() _C.DATASET.ROOT = '' _C.DATASET.DATASET = 'mpii' _C.DATASET.TRAIN_SET = 'train' _C.DATASET.TEST_SET = 'valid' _C.DATASET.DATA_FORMAT = 'jpg' _C.DATASET.TRAIN_PATH = '/h36m/h36m_annot_train.npy' _C.DATASET.VALID_PATH = '/h36m/h36m_annot_test.npy' _C.DATASET.HYBRID_JOINTS_TYPE = '' _C.DATASET.SELECT_DATA = False # training data augmentation _C.DATASET.FLIP = True _C.DATASET.SCALE_FACTOR = 0.25 _C.DATASET.ROT_FACTOR = 30 _C.DATASET.PROB_HALF_BODY = 0.0 _C.DATASET.NUM_JOINTS_HALF_BODY = 8 _C.DATASET.COLOR_RGB = False # train _C.TRAIN = CN() _C.TRAIN.LR_FACTOR = 0.1 _C.TRAIN.LR_STEP = [90, 110] _C.TRAIN.LR = 0.001 _C.TRAIN.OPTIMIZER = 'adam' _C.TRAIN.MOMENTUM = 0.9 _C.TRAIN.WD = 0.0001 _C.TRAIN.NESTEROV = False _C.TRAIN.GAMMA1 = 0.99 _C.TRAIN.GAMMA2 = 0.0 _C.TRAIN.BEGIN_EPOCH = 0 _C.TRAIN.END_EPOCH = 140 _C.TRAIN.RESUME = False _C.TRAIN.CHECKPOINT = '' _C.TRAIN.BATCH_SIZE_PER_GPU = 32 _C.TRAIN.SHUFFLE = True # testing _C.TEST = CN() # size of images for each device _C.TEST.BATCH_SIZE_PER_GPU = 32 # Test Model Epoch _C.TEST.FLIP_TEST = False _C.TEST.POST_PROCESS = False _C.TEST.SHIFT_HEATMAP = False _C.TEST.USE_GT_BBOX = False # nms _C.TEST.IMAGE_THRE = 0.1 _C.TEST.NMS_THRE = 0.6 _C.TEST.SOFT_NMS = False _C.TEST.OKS_THRE = 0.5 _C.TEST.IN_VIS_THRE = 0.0 _C.TEST.COCO_BBOX_FILE = '' _C.TEST.BBOX_THRE = 1.0 _C.TEST.MODEL_FILE = '' # debug _C.DEBUG = CN() _C.DEBUG.DEBUG = False _C.DEBUG.SAVE_BATCH_IMAGES_GT = False _C.DEBUG.SAVE_BATCH_IMAGES_PRED = False _C.DEBUG.SAVE_HEATMAPS_GT = False _C.DEBUG.SAVE_HEATMAPS_PRED = False def update_config(cfg, args): cfg.defrost() if hasattr(args, 'cfg'): cfg.merge_from_file(args.cfg) if hasattr(args, 'opts'): cfg.merge_from_list(args.opts) if hasattr(args, 'modelDir') and args.modelDir: cfg.OUTPUT_DIR = args.modelDir if hasattr(args, 'logDir') and args.logDir: cfg.LOG_DIR = args.logDir if hasattr(args, 'dataDir') and args.dataDir: cfg.DATA_DIR = args.dataDir cfg.DATASET.ROOT = os.path.join( cfg.DATA_DIR, cfg.DATASET.ROOT ) cfg.MODEL.PRETRAINED = os.path.join( cfg.DATA_DIR, cfg.MODEL.PRETRAINED ) if cfg.TEST.MODEL_FILE: cfg.TEST.MODEL_FILE = os.path.join( cfg.DATA_DIR, cfg.TEST.MODEL_FILE ) cfg.freeze() if __name__ == '__main__': import sys with open(sys.argv[1], 'w') as f: print(_C, file=f) ================================================ FILE: libs/hhr/config/models.py ================================================ # ------------------------------------------------------------------------------ # Copyright (c) Microsoft # Licensed under the MIT License. # Written by Bin Xiao (Bin.Xiao@microsoft.com) # ------------------------------------------------------------------------------ from __future__ import absolute_import from __future__ import division from __future__ import print_function from yacs.config import CfgNode as CN # pose_resnet related params POSE_RESNET = CN() POSE_RESNET.NUM_LAYERS = 50 POSE_RESNET.DECONV_WITH_BIAS = False POSE_RESNET.NUM_DECONV_LAYERS = 3 POSE_RESNET.NUM_DECONV_FILTERS = [256, 256, 256] POSE_RESNET.NUM_DECONV_KERNELS = [4, 4, 4] POSE_RESNET.FINAL_CONV_KERNEL = 1 POSE_RESNET.PRETRAINED_LAYERS = ['*'] # pose_multi_resoluton_net related params POSE_HIGH_RESOLUTION_NET = CN() POSE_HIGH_RESOLUTION_NET.PRETRAINED_LAYERS = ['*'] POSE_HIGH_RESOLUTION_NET.STEM_INPLANES = 64 POSE_HIGH_RESOLUTION_NET.FINAL_CONV_KERNEL = 1 POSE_HIGH_RESOLUTION_NET.STAGE2 = CN() POSE_HIGH_RESOLUTION_NET.STAGE2.NUM_MODULES = 1 POSE_HIGH_RESOLUTION_NET.STAGE2.NUM_BRANCHES = 2 POSE_HIGH_RESOLUTION_NET.STAGE2.NUM_BLOCKS = [4, 4] POSE_HIGH_RESOLUTION_NET.STAGE2.NUM_CHANNELS = [32, 64] POSE_HIGH_RESOLUTION_NET.STAGE2.BLOCK = 'BASIC' POSE_HIGH_RESOLUTION_NET.STAGE2.FUSE_METHOD = 'SUM' POSE_HIGH_RESOLUTION_NET.STAGE3 = CN() POSE_HIGH_RESOLUTION_NET.STAGE3.NUM_MODULES = 1 POSE_HIGH_RESOLUTION_NET.STAGE3.NUM_BRANCHES = 3 POSE_HIGH_RESOLUTION_NET.STAGE3.NUM_BLOCKS = [4, 4, 4] POSE_HIGH_RESOLUTION_NET.STAGE3.NUM_CHANNELS = [32, 64, 128] POSE_HIGH_RESOLUTION_NET.STAGE3.BLOCK = 'BASIC' POSE_HIGH_RESOLUTION_NET.STAGE3.FUSE_METHOD = 'SUM' POSE_HIGH_RESOLUTION_NET.STAGE4 = CN() POSE_HIGH_RESOLUTION_NET.STAGE4.NUM_MODULES = 1 POSE_HIGH_RESOLUTION_NET.STAGE4.NUM_BRANCHES = 4 POSE_HIGH_RESOLUTION_NET.STAGE4.NUM_BLOCKS = [4, 4, 4, 4] POSE_HIGH_RESOLUTION_NET.STAGE4.NUM_CHANNELS = [32, 64, 128, 256] POSE_HIGH_RESOLUTION_NET.STAGE4.BLOCK = 'BASIC' POSE_HIGH_RESOLUTION_NET.STAGE4.FUSE_METHOD = 'SUM' MODEL_EXTRAS = { 'pose_resnet': POSE_RESNET, 'pose_high_resolution_net': POSE_HIGH_RESOLUTION_NET, } ================================================ FILE: libs/hhr/core/__init__.py ================================================ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Empty file. """ ================================================ FILE: libs/hhr/core/evaluate.py ================================================ # ------------------------------------------------------------------------------ # Copyright (c) Microsoft # Licensed under the MIT License. # Written by Bin Xiao (Bin.Xiao@microsoft.com) # Modified by Shichao Li (nicholas.li@connect.ust.hk) # ------------------------------------------------------------------------------ from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np from libs.hhr.core.inference import get_max_preds, get_max_preds_soft from libs.hhr.core.loss import get_max_preds_soft_pt def calc_dists(preds, target, normalize): preds = preds.astype(np.float32) target = target.astype(np.float32) dists = np.zeros((preds.shape[1], preds.shape[0])) for n in range(preds.shape[0]): for c in range(preds.shape[1]): if target[n, c, 0] > 1 and target[n, c, 1] > 1: normed_preds = preds[n, c, :] / normalize[n] normed_targets = target[n, c, :] / normalize[n] dists[c, n] = np.linalg.norm(normed_preds - normed_targets) else: dists[c, n] = -1 return dists def dist_acc(dists, thr=0.5): ''' Return percentage below threshold while ignoring values with a -1 ''' dist_cal = np.not_equal(dists, -1) num_dist_cal = dist_cal.sum() if num_dist_cal > 0: return np.less(dists[dist_cal], thr).sum() * 1.0 / num_dist_cal else: return -1 def accuracy(output, target, hm_type='gaussian', thr=0.5): ''' Calculate accuracy according to PCK, but uses ground truth heatmap rather than x,y locations First value to be returned is average accuracy across 'idxs', followed by individual accuracies ''' idx = list(range(output.shape[1])) norm = 1.0 if hm_type == 'gaussian': pred, _ = get_max_preds(output) target, _ = get_max_preds(target) h = output.shape[2] w = output.shape[3] norm = np.ones((pred.shape[0], 2)) * np.array([h, w]) / 10 dists = calc_dists(pred, target, norm) acc = np.zeros((len(idx) + 1)) avg_acc = 0 cnt = 0 for i in range(len(idx)): acc[i + 1] = dist_acc(dists[idx[i]]) if acc[i + 1] >= 0: avg_acc = avg_acc + acc[i + 1] cnt += 1 avg_acc = avg_acc / cnt if cnt != 0 else 0 if cnt != 0: acc[0] = avg_acc return acc, avg_acc, cnt, pred from libs.hhr.utils.transforms import get_affine_transform from libs.hhr.utils.transforms import affine_transform_modified def get_distance(gt, pred): # gt: [n_joints, 2] # pred: [n_joints, 2] sqerr = (gt - pred)**2 sqerr = sqerr.sum(axis = 1, keepdims=True) dist = np.sqrt(sqerr) return dist def accuracy_pixel(output, meta_data, image_size = (288.0, 384.0), arg_max='hard'): ''' Report errors in terms of pixels in the original image plane. ''' if arg_max == 'soft': if isinstance(output, np.ndarray): pred, max_vals = get_max_preds_soft(output) else: pred, max_vals = get_max_preds_soft_pt(output) elif arg_max == 'hard': if not isinstance(output, np.ndarray): output = output.data.cpu().numpy() pred, max_vals = get_max_preds(output) else: raise NotImplementedError # multiply by down-sample ratio if not isinstance(pred, np.ndarray): pred = pred.data.cpu().numpy() max_vals = max_vals.data.cpu().numpy() pred *= image_size[0]/output.shape[3] # inverse transform and compare pixel didstance centers, scales, rots = meta_data['center'], meta_data['scale'], meta_data['rotation'] centers = centers.data.cpu().numpy() scales = scales.data.cpu().numpy() rots = rots.data.cpu().numpy() joints_original_batch = meta_data['j_original'].data.cpu().numpy() distance_list = [] all_src_coordinates = [] for sample_idx in range(len(pred)): trans_inv = get_affine_transform(centers[sample_idx], scales[sample_idx], rots[sample_idx], image_size, inv=1) joints_original = joints_original_batch[sample_idx] pred_src_coordinates = affine_transform_modified(pred[sample_idx], trans_inv) all_src_coordinates.append(pred_src_coordinates.reshape(1, len(pred_src_coordinates), 2)) distance_list.append(get_distance(joints_original, pred_src_coordinates)) all_distance = np.hstack(distance_list) acc = all_distance avg_acc = all_distance.mean() cnt = len(distance_list) * len(all_distance) return acc, avg_acc, cnt, np.concatenate(all_src_coordinates, axis=0), pred, max_vals ================================================ FILE: libs/hhr/core/function.py ================================================ # ------------------------------------------------------------------------------ # Copyright (c) Microsoft # Licensed under the MIT License. # Written by Bin Xiao (Bin.Xiao@microsoft.com) # Modified by Shichao Li (nicholas.li@connect.ust.hk) # ------------------------------------------------------------------------------ from __future__ import absolute_import from __future__ import division from __future__ import print_function import time import logging import os import numpy as np import torch from torch import autograd from libs.hhr.core.evaluate import accuracy, accuracy_pixel from libs.hhr.core.inference import get_final_preds from libs.hhr.utils.transforms import flip_back, get_affine_transform, affine_transform_modified from libs.hhr.utils.vis import save_debug_images logger = logging.getLogger(__name__) def test_transformation(meta_data, image_size=(288, 384)): joints_original = meta_data['j_original'].squeeze().data.cpu().numpy() joints = meta_data['joints'].squeeze().data.cpu().numpy() # coordinates in 384*288 image box center, scale = meta_data['center'], meta_data['scale'] center = center.data.cpu().numpy().reshape(2) scale = scale.data.cpu().numpy().reshape(2) trans = get_affine_transform(center=center, scale=scale, rot=0.0, output_size=image_size, inv=0) trans_inv = get_affine_transform(center=center, scale=scale, rot=0.0, output_size=image_size, inv=1) # calculate the distance in terms of pixels transformed_coordinates = affine_transform_modified(joints_original, trans) transformed_coordinates2 = affine_transform_modified(joints, trans_inv) dif1 = joints - transformed_coordinates dif2 = joints_original - transformed_coordinates2 # compute inverse matrix inv_compute = np.zeros(trans.shape, trans.dtype) inv_compute[:2, :2] = np.linalg.inv(trans[:2, :2]) inv_compute[:, 2] = - trans[:, 2] transformed_coordinates3 = affine_transform_modified(joints, inv_compute) dif3 = joints_original - transformed_coordinates3 print(dif1, dif2, dif3) return def train(config, train_loader, model, criterion, optimizer, epoch, output_dir, tb_log_dir, writer_dict=None, total_iters=None): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() # switch to train mode model.train() end = time.time() num_iters = 0 for i, (input, target, target_weight, meta) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) num_iters += 1 if total_iters is not None and num_iters > total_iters: return # compute output outputs = model(input) target = target.cuda(non_blocking=True) target_weight = target_weight.cuda(non_blocking=True) if isinstance(outputs, list): loss = criterion(outputs[0], target, target_weight) for output in outputs[1:]: loss += criterion(output, target, target_weight) else: output = outputs loss = criterion(output, target, target_weight) # compute gradient and do update step optimizer.zero_grad() loss.backward() optimizer.step() # measure accuracy and record loss losses.update(loss.item(), input.size(0)) _, avg_acc, cnt, src_coordinates, pred, max_vals = accuracy_pixel(outputs, meta) acc.update(avg_acc, cnt) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % config.PRINT_FREQ == 0: msg = 'Epoch: [{0}][{1}/{2}]\t' \ 'Time {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t' \ 'Speed {speed:.1f} samples/s\t' \ 'Data {data_time.val:.3f}s ({data_time.avg:.3f}s)\t' \ 'Loss {loss.val:.5f} ({loss.avg:.5f})\t' \ 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( epoch, i, len(train_loader), batch_time=batch_time, speed=input.size(0)/batch_time.val, data_time=data_time, loss=losses, acc=acc) logger.info(msg) prefix = '{}_{}'.format(os.path.join(output_dir, 'train'), i) #TODO: save for L1 loss save_debug_images(config, input, meta, target, pred, output, prefix) del input, target return def validate(config, val_loader, val_dataset, model, criterion, output_dir, tb_log_dir, writer_dict=None): batch_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() ratio = config.MODEL.IMAGE_SIZE[0]/config.MODEL.HEATMAP_SIZE[0] # switch to evaluate mode model.eval() num_samples = len(val_dataset) all_preds = np.zeros( (num_samples, config.MODEL.NUM_JOINTS, 3), dtype=np.float32 ) all_boxes = np.zeros((num_samples, 6)) image_path = [] filenames = [] imgnums = [] idx = 0 with torch.no_grad(): end = time.time() for i, (input, target, target_weight, meta) in enumerate(val_loader): # compute output outputs = model(input) if isinstance(outputs, list): output = outputs[-1] else: output = outputs if config.TEST.FLIP_TEST: input_flipped = np.flip(input.cpu().numpy(), 3).copy() input_flipped = torch.from_numpy(input_flipped).cuda() outputs_flipped = model(input_flipped) if isinstance(outputs_flipped, list): output_flipped = outputs_flipped[-1] else: output_flipped = outputs_flipped output_flipped = flip_back(output_flipped.cpu().numpy(), val_dataset.flip_pairs) output_flipped = torch.from_numpy(output_flipped.copy()).cuda() # feature is not aligned, shift flipped heatmap for higher accuracy if config.TEST.SHIFT_HEATMAP: output_flipped[:, :, :, 1:] = \ output_flipped.clone()[:, :, :, 0:-1] output = (output + output_flipped) * 0.5 target = target.cuda(non_blocking=True) target_weight = target_weight.cuda(non_blocking=True) loss = criterion(output, target, target_weight) num_images = input.size(0) # measure accuracy and record loss losses.update(loss.item(), num_images) _, avg_acc, cnt, pred = accuracy(output.cpu().numpy(), target.cpu().numpy()) acc.update(avg_acc, cnt) # measure elapsed time batch_time.update(time.time() - end) end = time.time() c = meta['center'].numpy() s = meta['scale'].numpy() score = meta['score'].numpy() preds, maxvals = get_final_preds( config, output.clone().cpu().numpy(), c, s) all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2] all_preds[idx:idx + num_images, :, 2:3] = maxvals # double check this all_boxes parts all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2] all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2] all_boxes[idx:idx + num_images, 4] = np.prod(s*200, 1) all_boxes[idx:idx + num_images, 5] = score image_path.extend(meta['image']) idx += num_images if i % config.PRINT_FREQ == 0: msg = 'Test: [{0}/{1}]\t' \ 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \ 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' \ 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, acc=acc) logger.info(msg) prefix = '{}_{}'.format( os.path.join(output_dir, 'val'), i ) save_debug_images(config, input, meta, target, pred*ratio, output, prefix) del input, target name_values, perf_indicator = val_dataset.evaluate( config, all_preds, output_dir, all_boxes, image_path, filenames, imgnums ) model_name = config.MODEL.NAME if isinstance(name_values, list): for name_value in name_values: _print_name_value(name_value, model_name) else: _print_name_value(name_values, model_name) return perf_indicator def validate_pixel(config, val_loader, val_dataset, model, criterion, output_dir, tb_log_dir, writer_dict=None, total_batches = 410, save=False, split=None): batch_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() # switch to evaluate mode model.eval() # save prediction results if save: num_samples = len(val_dataset) all_preds = np.zeros( (num_samples, config.MODEL.NUM_JOINTS, 3), dtype=np.float32 ) image_path = [] num_iters = 0 idx = 0 with torch.no_grad(): end = time.time() for i, (input, target, target_weight, meta) in enumerate(val_loader): num_iters += 1 if num_iters > total_batches and not save: break # compute output outputs = model(input) if isinstance(outputs, list): output = outputs[-1] else: output = outputs if config.TEST.FLIP_TEST: input_flipped = np.flip(input.cpu().numpy(), 3).copy() input_flipped = torch.from_numpy(input_flipped).cuda() outputs_flipped = model(input_flipped) if isinstance(outputs_flipped, list): output_flipped = outputs_flipped[-1] else: output_flipped = outputs_flipped output_flipped = flip_back(output_flipped.cpu().numpy(), val_dataset.flip_pairs) output_flipped = torch.from_numpy(output_flipped.copy()).cuda() # feature is not aligned, shift flipped heatmap for higher accuracy if config.TEST.SHIFT_HEATMAP: output_flipped[:, :, :, 1:] = \ output_flipped.clone()[:, :, :, 0:-1] output = (output + output_flipped) * 0.5 target = target.cuda(non_blocking=True) target_weight = target_weight.cuda(non_blocking=True) loss = criterion(output, target, target_weight) num_images = input.size(0) # measure accuracy and record loss losses.update(loss.item(), num_images) # pytorch version _, avg_acc, cnt, src_coordinates, preds, max_vals = accuracy_pixel(output, meta) acc.update(avg_acc, cnt) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if save: all_preds[idx:idx + num_images, :, 0:2] = src_coordinates all_preds[idx:idx + num_images, :, 2:3] = max_vals # double check this all_boxes parts image_path.extend(meta['image']) idx += num_images if i % config.PRINT_FREQ == 0: msg = 'Test: [{0}/{1}]\t' \ 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \ 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' \ 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, acc=acc) logger.info(msg) prefix = '{}_{}'.format( os.path.join(output_dir, 'val'), i ) if not save: save_debug_images(config, input, meta, target, preds, output, prefix) if save: save_path = './h36m_prediction_' + split + '.npy' np.save(save_path, {'pred': all_preds, 'paths':image_path}) perf_indicator = acc.avg return perf_indicator # markdown format output def _print_name_value(name_value, full_arch_name): names = name_value.keys() values = name_value.values() num_values = len(name_value) logger.info( '| Arch ' + ' '.join(['| {}'.format(name) for name in names]) + ' |' ) logger.info('|---' * (num_values+1) + '|') if len(full_arch_name) > 15: full_arch_name = full_arch_name[:8] + '...' logger.info( '| ' + full_arch_name + ' ' + ' '.join(['| {:.3f}'.format(value) for value in values]) + ' |' ) class AverageMeter(object): """Computes and stores the average and current value""" def __init__(self): self.reset() def reset(self): self.val = 0 self.avg = 0 self.sum = 0 self.count = 0 def update(self, val, n=1): self.val = val self.sum += val * n self.count += n self.avg = self.sum / self.count if self.count != 0 else 0 ================================================ FILE: libs/hhr/core/inference.py ================================================ # ------------------------------------------------------------------------------ # Copyright (c) Microsoft # Licensed under the MIT License. # Written by Bin Xiao (Bin.Xiao@microsoft.com) # Modified by Shichao Li (nicholas.li@connect.ust.hk) # ------------------------------------------------------------------------------ from __future__ import absolute_import from __future__ import division from __future__ import print_function import math import numpy as np import torch from libs.hhr.utils.transforms import transform_preds def get_max_preds(batch_heatmaps): ''' get predictions from score maps heatmaps: numpy.ndarray([batch_size, num_joints, height, width]) ''' assert isinstance(batch_heatmaps, np.ndarray), \ 'batch_heatmaps should be numpy.ndarray' assert batch_heatmaps.ndim == 4, 'batch_images should be 4-ndim' batch_size = batch_heatmaps.shape[0] num_joints = batch_heatmaps.shape[1] width = batch_heatmaps.shape[3] heatmaps_reshaped = batch_heatmaps.reshape((batch_size, num_joints, -1)) idx = np.argmax(heatmaps_reshaped, 2) maxvals = np.amax(heatmaps_reshaped, 2) maxvals = maxvals.reshape((batch_size, num_joints, 1)) idx = idx.reshape((batch_size, num_joints, 1)) preds = np.tile(idx, (1, 1, 2)).astype(np.float32) preds[:, :, 0] = (preds[:, :, 0]) % width preds[:, :, 1] = np.floor((preds[:, :, 1]) / width) pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2)) pred_mask = pred_mask.astype(np.float32) preds *= pred_mask return preds, maxvals def get_final_preds(config, batch_heatmaps, center, scale): coords, maxvals = get_max_preds(batch_heatmaps) heatmap_height = batch_heatmaps.shape[2] heatmap_width = batch_heatmaps.shape[3] # post-processing if config.TEST.POST_PROCESS: for n in range(coords.shape[0]): for p in range(coords.shape[1]): hm = batch_heatmaps[n][p] px = int(math.floor(coords[n][p][0] + 0.5)) py = int(math.floor(coords[n][p][1] + 0.5)) if 1 < px < heatmap_width-1 and 1 < py < heatmap_height-1: diff = np.array( [ hm[py][px+1] - hm[py][px-1], hm[py+1][px]-hm[py-1][px] ] ) coords[n][p] += np.sign(diff) * .25 preds = coords.copy() # Transform back for i in range(coords.shape[0]): preds[i] = transform_preds( coords[i], center[i], scale[i], [heatmap_width, heatmap_height] ) return preds, maxvals def get_max_preds_soft(batch_heatmaps): assert isinstance(batch_heatmaps, np.ndarray), \ 'batch_heatmaps should be numpy.ndarray' assert batch_heatmaps.ndim == 4, 'batch_images should be 4-ndim' batch_size = batch_heatmaps.shape[0] num_joints = batch_heatmaps.shape[1] height = batch_heatmaps.shape[2] width = batch_heatmaps.shape[3] # get score/confidence for each joint heatmaps_reshaped = batch_heatmaps.reshape((batch_size, num_joints, -1)) maxvals = np.amax(heatmaps_reshaped, 2) maxvals = maxvals.reshape((batch_size, num_joints, 1)) # normalize the heatmaps so that they sum to 1 #assert batch_heatmaps.min() >= 0.0 batch_heatmaps = np.clip(batch_heatmaps, a_min=0.0, a_max=None) temp_sum = heatmaps_reshaped.sum(axis = 2, keepdims=True) heatmaps_reshaped /= temp_sum ## another normalization method: softmax # spatial soft-max #heatmaps_reshaped = softmax(heatmaps_reshaped, axis=2) ## batch_heatmaps = heatmaps_reshaped.reshape(batch_size, num_joints, height, width) x = batch_heatmaps.sum(axis = 2) y = batch_heatmaps.sum(axis = 3) x_indices = np.arange(width).astype(np.float32).reshape(1,1,width) y_indices = np.arange(height).astype(np.float32).reshape(1,1,height) x *= x_indices y *= y_indices x = x.sum(axis = 2, keepdims=True) y = y.sum(axis = 2, keepdims=True) preds = np.concatenate([x, y], axis=2) pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2)) pred_mask = pred_mask.astype(np.float32) preds *= pred_mask return preds, maxvals ================================================ FILE: libs/hhr/core/loss.py ================================================ # ------------------------------------------------------------------------------ # Copyright (c) Microsoft # Licensed under the MIT License. # Written by Bin Xiao (Bin.Xiao@microsoft.com) # Modified by Shichao Li (nicholas.li@connect.ust.hk) # ------------------------------------------------------------------------------ from __future__ import absolute_import from __future__ import division from __future__ import print_function import torch import torch.nn as nn import torch.nn.functional as F import numpy as np class JointsMSELoss(nn.Module): def __init__(self, use_target_weight): super(JointsMSELoss, self).__init__() self.criterion = nn.MSELoss(reduction='mean') self.use_target_weight = use_target_weight def forward(self, output, target, target_weight): batch_size = output.size(0) num_joints = output.size(1) heatmaps_pred = output.reshape((batch_size, num_joints, -1)).split(1, 1) heatmaps_gt = target.reshape((batch_size, num_joints, -1)).split(1, 1) loss = 0 for idx in range(num_joints): heatmap_pred = heatmaps_pred[idx].squeeze() heatmap_gt = heatmaps_gt[idx].squeeze() if self.use_target_weight: loss += 0.5 * self.criterion( heatmap_pred.mul(target_weight[:, idx]), heatmap_gt.mul(target_weight[:, idx]) ) else: loss += 0.5 * self.criterion(heatmap_pred, heatmap_gt) return loss / num_joints class JointsOHKMMSELoss(nn.Module): def __init__(self, use_target_weight, topk=8): super(JointsOHKMMSELoss, self).__init__() self.criterion = nn.MSELoss(reduction='none') self.use_target_weight = use_target_weight self.topk = topk def ohkm(self, loss): ohkm_loss = 0. for i in range(loss.size()[0]): sub_loss = loss[i] topk_val, topk_idx = torch.topk( sub_loss, k=self.topk, dim=0, sorted=False ) tmp_loss = torch.gather(sub_loss, 0, topk_idx) ohkm_loss += torch.sum(tmp_loss) / self.topk ohkm_loss /= loss.size()[0] return ohkm_loss def forward(self, output, target, target_weight): batch_size = output.size(0) num_joints = output.size(1) heatmaps_pred = output.reshape((batch_size, num_joints, -1)).split(1, 1) heatmaps_gt = target.reshape((batch_size, num_joints, -1)).split(1, 1) loss = [] for idx in range(num_joints): heatmap_pred = heatmaps_pred[idx].squeeze() heatmap_gt = heatmaps_gt[idx].squeeze() if self.use_target_weight: loss.append(0.5 * self.criterion( heatmap_pred.mul(target_weight[:, idx]), heatmap_gt.mul(target_weight[:, idx]) )) else: loss.append( 0.5 * self.criterion(heatmap_pred, heatmap_gt) ) loss = [l.mean(dim=1).unsqueeze(dim=1) for l in loss] loss = torch.cat(loss, dim=1) return self.ohkm(loss) # soft-argmax def get_max_preds_soft_pt(batch_heatmaps): # pytorch version of the above function using tensors assert len(batch_heatmaps.shape) == 4, 'batch_images should be 4-ndim' batch_size = batch_heatmaps.shape[0] num_joints = batch_heatmaps.shape[1] height = batch_heatmaps.shape[2] width = batch_heatmaps.shape[3] heatmaps_reshaped = batch_heatmaps.view((batch_size, num_joints, -1)) # get score/confidence for each joint maxvals = heatmaps_reshaped.max(dim=2)[0] maxvals = maxvals.view((batch_size, num_joints, 1)) heatmaps_reshaped = F.softmax(heatmaps_reshaped, dim=2) batch_heatmaps = heatmaps_reshaped.view(batch_size, num_joints, height, width) x = batch_heatmaps.sum(dim = 2) y = batch_heatmaps.sum(dim = 3) x_indices = torch.cuda.comm.broadcast(torch.arange(width).type(torch.cuda.FloatTensor), devices=[x.device.index])[0] x_indices = x_indices.view(1,1,width) y_indices = torch.cuda.comm.broadcast(torch.arange(height).type(torch.cuda.FloatTensor), devices=[x.device.index])[0] y_indices = y_indices.view(1,1,height) x *= x_indices y *= y_indices x = x.sum(dim = 2, keepdim=True) y = y.sum(dim = 2, keepdim=True) preds = torch.cat([x, y], dim=2) return preds, maxvals class JointsCoordinateLoss(nn.Module): def __init__(self, use_target_weight, loss_type='sl1', image_size=(384, 288)): super(JointsCoordinateLoss, self).__init__() self.use_target_weight = use_target_weight self.loss_type = loss_type self.image_size = image_size return def forward(self, output, target, target_weight): preds, _ = get_max_preds_soft_pt(output) # normalize the coordinates to 0-1 preds[:, :, 0] /= self.image_size[1] preds[:, :, 1] /= self.image_size[0] target[:, :, 0] /= self.image_size[1] target[:, :, 1] /= self.image_size[0] if self.loss_type == 'sl1': loss = F.smooth_l1_loss(preds, target) elif self.loss_type == 'wing': raise NotImplementedError else: raise NotImplementedError return loss class WingLoss(nn.Module): def __init__(self, use_target_weight, width=5, curvature=0.5, image_size=(384, 288)): super(WingLoss, self).__init__() self.width = width self.curvature = curvature self.C = self.width - self.width * np.log(1 + self.width / self.curvature) self.image_size = image_size def forward(self, output, target, target_weight): prediction, _ = get_max_preds_soft_pt(output) # normalize the coordinates to 0-1 prediction[:, :, 0] /= self.image_size[1] prediction[:, :, 1] /= self.image_size[0] target[:, :, 0] /= self.image_size[1] target[:, :, 1] /= self.image_size[0] diff = target - prediction diff_abs = diff.abs() loss = diff_abs.clone() idx_smaller = diff_abs < self.width idx_bigger = diff_abs >= self.width loss[idx_smaller] = self.width * torch.log(1 + diff_abs[idx_smaller] / self.curvature) loss[idx_bigger] = loss[idx_bigger] - self.C loss = loss.mean() return loss ================================================ FILE: libs/hhr/utils/__init__.py ================================================ ================================================ FILE: libs/hhr/utils/transforms.py ================================================ # ------------------------------------------------------------------------------ # Copyright (c) Microsoft # Licensed under the MIT License. # Written by Bin Xiao (Bin.Xiao@microsoft.com) # Modified by Shichao Li (nicholas.li@connect.ust.hk) # ------------------------------------------------------------------------------ from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np import cv2 def flip_back(output_flipped, matched_parts): ''' ouput_flipped: numpy.ndarray(batch_size, num_joints, height, width) ''' assert output_flipped.ndim == 4,\ 'output_flipped should be [batch_size, num_joints, height, width]' output_flipped = output_flipped[:, :, :, ::-1] for pair in matched_parts: tmp = output_flipped[:, pair[0], :, :].copy() output_flipped[:, pair[0], :, :] = output_flipped[:, pair[1], :, :] output_flipped[:, pair[1], :, :] = tmp return output_flipped def fliplr_joints(joints, joints_vis, width, matched_parts): """ flip coords """ # Flip horizontal joints[:, 0] = width - joints[:, 0] - 1 # Change left-right parts for pair in matched_parts: joints[pair[0], :], joints[pair[1], :] = \ joints[pair[1], :], joints[pair[0], :].copy() joints_vis[pair[0], :], joints_vis[pair[1], :] = \ joints_vis[pair[1], :], joints_vis[pair[0], :].copy() return joints*joints_vis, joints_vis def transform_preds(coords, center, scale, output_size): target_coords = np.zeros(coords.shape) trans = get_affine_transform(center, scale, 0, output_size, inv=1) for p in range(coords.shape[0]): target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans) return target_coords def get_affine_transform( center, scale, rot, output_size, shift=np.array([0, 0], dtype=np.float32), inv=0 ): if not isinstance(scale, np.ndarray) and not isinstance(scale, list): print(scale) scale = np.array([scale, scale]) scale_tmp = scale * 200.0 src_w = scale_tmp[0] dst_w = output_size[0] dst_h = output_size[1] rot_rad = np.pi * rot / 180 src_dir = get_dir([0, src_w * -0.5], rot_rad) dst_dir = np.array([0, dst_w * -0.5], np.float32) src = np.zeros((3, 2), dtype=np.float32) dst = np.zeros((3, 2), dtype=np.float32) src[0, :] = center + scale_tmp * shift src[1, :] = center + src_dir + scale_tmp * shift dst[0, :] = [dst_w * 0.5, dst_h * 0.5] dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir src[2:, :] = get_3rd_point(src[0, :], src[1, :]) dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :]) if inv: trans = cv2.getAffineTransform(np.float32(dst), np.float32(src)) else: trans = cv2.getAffineTransform(np.float32(src), np.float32(dst)) return trans def affine_transform(pt, t): new_pt = np.array([pt[0], pt[1], 1.]).T new_pt = np.dot(t, new_pt) return new_pt[:2] def affine_transform_modified(pts, t): # pts of shape [n, 2] new_pts = np.hstack([pts, np.ones((len(pts), 1))]).T new_pts = t @ new_pts return new_pts[:2, :].T def get_3rd_point(a, b): direct = a - b return b + np.array([-direct[1], direct[0]], dtype=np.float32) def get_dir(src_point, rot_rad): sn, cs = np.sin(rot_rad), np.cos(rot_rad) src_result = [0, 0] src_result[0] = src_point[0] * cs - src_point[1] * sn src_result[1] = src_point[0] * sn + src_point[1] * cs return src_result def crop(img, center, scale, output_size, rot=0): trans = get_affine_transform(center, scale, rot, output_size) dst_img = cv2.warpAffine( img, trans, (int(output_size[0]), int(output_size[1])), flags=cv2.INTER_LINEAR ) return dst_img ================================================ FILE: libs/hhr/utils/utils.py ================================================ # ------------------------------------------------------------------------------ # Copyright (c) Microsoft # Licensed under the MIT License. # Written by Bin Xiao (Bin.Xiao@microsoft.com) # Modified by Shichao Li (nicholas.li@connect.ust.hk) # ------------------------------------------------------------------------------ from __future__ import absolute_import from __future__ import division from __future__ import print_function import os import logging import time from collections import namedtuple from pathlib import Path import torch import torch.optim as optim import torch.nn as nn def create_logger(cfg, cfg_name, phase='train'): root_output_dir = Path(cfg.OUTPUT_DIR) # set up logger if not root_output_dir.exists(): print('=> creating {}'.format(root_output_dir)) root_output_dir.mkdir() dataset = cfg.DATASET.DATASET + '_' + cfg.DATASET.HYBRID_JOINTS_TYPE \ if cfg.DATASET.HYBRID_JOINTS_TYPE else cfg.DATASET.DATASET dataset = dataset.replace(':', '_') model = cfg.MODEL.NAME cfg_name = os.path.basename(cfg_name).split('.')[0] final_output_dir = root_output_dir / dataset / model / cfg_name print('=> creating {}'.format(final_output_dir)) final_output_dir.mkdir(parents=True, exist_ok=True) time_str = time.strftime('%Y-%m-%d-%H-%M') log_file = '{}_{}_{}.log'.format(cfg_name, time_str, phase) final_log_file = final_output_dir / log_file head = '%(asctime)-15s %(message)s' logging.basicConfig(filename=str(final_log_file), format=head) logger = logging.getLogger() logger.setLevel(logging.INFO) console = logging.StreamHandler() logging.getLogger('').addHandler(console) tensorboard_log_dir = Path(cfg.LOG_DIR) / dataset / model / \ (cfg_name + '_' + time_str) print('=> creating {}'.format(tensorboard_log_dir)) tensorboard_log_dir.mkdir(parents=True, exist_ok=True) return logger, str(final_output_dir), str(tensorboard_log_dir) def get_optimizer(cfg, model): optimizer = None if cfg.TRAIN.OPTIMIZER == 'sgd': optimizer = optim.SGD( model.parameters(), lr=cfg.TRAIN.LR, momentum=cfg.TRAIN.MOMENTUM, weight_decay=cfg.TRAIN.WD, nesterov=cfg.TRAIN.NESTEROV ) elif cfg.TRAIN.OPTIMIZER == 'adam': optimizer = optim.Adam( model.parameters(), lr=cfg.TRAIN.LR ) return optimizer def save_checkpoint(states, is_best, output_dir, filename='checkpoint.pth'): torch.save(states, os.path.join(output_dir, filename)) if is_best and 'state_dict' in states: torch.save(states['best_state_dict'], os.path.join(output_dir, 'model_best.pth')) def get_model_summary(model, *input_tensors, item_length=26, verbose=False): """ :param model: :param input_tensors: :param item_length: :return: """ summary = [] ModuleDetails = namedtuple( "Layer", ["name", "input_size", "output_size", "num_parameters", "multiply_adds"]) hooks = [] layer_instances = {} def add_hooks(module): def hook(module, input, output): class_name = str(module.__class__.__name__) instance_index = 1 if class_name not in layer_instances: layer_instances[class_name] = instance_index else: instance_index = layer_instances[class_name] + 1 layer_instances[class_name] = instance_index layer_name = class_name + "_" + str(instance_index) params = 0 if class_name.find("Conv") != -1 or class_name.find("BatchNorm") != -1 or \ class_name.find("Linear") != -1: for param_ in module.parameters(): params += param_.view(-1).size(0) flops = "Not Available" if class_name.find("Conv") != -1 and hasattr(module, "weight"): flops = ( torch.prod( torch.LongTensor(list(module.weight.data.size()))) * torch.prod( torch.LongTensor(list(output.size())[2:]))).item() elif isinstance(module, nn.Linear): flops = (torch.prod(torch.LongTensor(list(output.size()))) \ * input[0].size(1)).item() if isinstance(input[0], list): input = input[0] if isinstance(output, list): output = output[0] summary.append( ModuleDetails( name=layer_name, input_size=list(input[0].size()), output_size=list(output.size()), num_parameters=params, multiply_adds=flops) ) if not isinstance(module, nn.ModuleList) \ and not isinstance(module, nn.Sequential) \ and module != model: hooks.append(module.register_forward_hook(hook)) model.eval() model.apply(add_hooks) space_len = item_length model(*input_tensors) for hook in hooks: hook.remove() details = '' if verbose: details = "Model Summary" + \ os.linesep + \ "Name{}Input Size{}Output Size{}Parameters{}Multiply Adds (Flops){}".format( ' ' * (space_len - len("Name")), ' ' * (space_len - len("Input Size")), ' ' * (space_len - len("Output Size")), ' ' * (space_len - len("Parameters")), ' ' * (space_len - len("Multiply Adds (Flops)"))) \ + os.linesep + '-' * space_len * 5 + os.linesep params_sum = 0 flops_sum = 0 for layer in summary: params_sum += layer.num_parameters if layer.multiply_adds != "Not Available": flops_sum += layer.multiply_adds if verbose: details += "{}{}{}{}{}{}{}{}{}{}".format( layer.name, ' ' * (space_len - len(layer.name)), layer.input_size, ' ' * (space_len - len(str(layer.input_size))), layer.output_size, ' ' * (space_len - len(str(layer.output_size))), layer.num_parameters, ' ' * (space_len - len(str(layer.num_parameters))), layer.multiply_adds, ' ' * (space_len - len(str(layer.multiply_adds)))) \ + os.linesep + '-' * space_len * 5 + os.linesep details += os.linesep \ + "Total Parameters: {:,}".format(params_sum) \ + os.linesep + '-' * space_len * 5 + os.linesep details += "Total Multiply Adds (For Convolution and Linear Layers only): {:,} GFLOPs".format(flops_sum/(1024**3)) \ + os.linesep + '-' * space_len * 5 + os.linesep details += "Number of Layers" + os.linesep for layer in layer_instances: details += "{} : {} layers ".format(layer, layer_instances[layer]) return details ================================================ FILE: libs/hhr/utils/vis.py ================================================ # ------------------------------------------------------------------------------ # Copyright (c) Microsoft # Licensed under the MIT License. # Written by Bin Xiao (Bin.Xiao@microsoft.com) # Modified by Shichao Li (nicholas.li@connect.ust.hk) # ------------------------------------------------------------------------------ from __future__ import absolute_import from __future__ import division from __future__ import print_function import math import numpy as np import torchvision import cv2 from libs.hhr.core.inference import get_max_preds def save_batch_image_with_joints(batch_image, batch_joints, batch_joints_vis, file_name, nrow=8, padding=2): ''' batch_image: [batch_size, channel, height, width] batch_joints: [batch_size, num_joints, 3], batch_joints_vis: [batch_size, num_joints, 1], } ''' grid = torchvision.utils.make_grid(batch_image, nrow, padding, True) ndarr = grid.mul(255).clamp(0, 255).byte().permute(1, 2, 0).cpu().numpy() ndarr = ndarr.copy() nmaps = batch_image.size(0) xmaps = min(nrow, nmaps) ymaps = int(math.ceil(float(nmaps) / xmaps)) height = int(batch_image.size(2) + padding) width = int(batch_image.size(3) + padding) k = 0 for y in range(ymaps): for x in range(xmaps): if k >= nmaps: break joints = batch_joints[k] joints_vis = batch_joints_vis[k] for joint, joint_vis in zip(joints, joints_vis): joint[0] = x * width + padding + joint[0] joint[1] = y * height + padding + joint[1] if joint_vis[0]: cv2.circle(ndarr, (int(joint[0]), int(joint[1])), 2, [255, 0, 0], 2) k = k + 1 cv2.imwrite(file_name, ndarr) def save_batch_heatmaps(batch_image, batch_heatmaps, file_name, normalize=True): ''' batch_image: [batch_size, channel, height, width] batch_heatmaps: ['batch_size, num_joints, height, width] file_name: saved file name ''' if normalize: batch_image = batch_image.clone() min = float(batch_image.min()) max = float(batch_image.max()) batch_image.add_(-min).div_(max - min + 1e-5) batch_size = batch_heatmaps.size(0) num_joints = batch_heatmaps.size(1) heatmap_height = batch_heatmaps.size(2) heatmap_width = batch_heatmaps.size(3) grid_image = np.zeros((batch_size*heatmap_height, (num_joints+1)*heatmap_width, 3), dtype=np.uint8) preds, maxvals = get_max_preds(batch_heatmaps.detach().cpu().numpy()) for i in range(batch_size): image = batch_image[i].mul(255)\ .clamp(0, 255)\ .byte()\ .permute(1, 2, 0)\ .cpu().numpy() heatmaps = batch_heatmaps[i].mul(255)\ .clamp(0, 255)\ .byte()\ .cpu().numpy() resized_image = cv2.resize(image, (int(heatmap_width), int(heatmap_height))) height_begin = heatmap_height * i height_end = heatmap_height * (i + 1) for j in range(num_joints): cv2.circle(resized_image, (int(preds[i][j][0]), int(preds[i][j][1])), 1, [0, 0, 255], 1) heatmap = heatmaps[j, :, :] colored_heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET) masked_image = colored_heatmap*0.7 + resized_image*0.3 cv2.circle(masked_image, (int(preds[i][j][0]), int(preds[i][j][1])), 1, [0, 0, 255], 1) width_begin = heatmap_width * (j+1) width_end = heatmap_width * (j+2) grid_image[height_begin:height_end, width_begin:width_end, :] = \ masked_image # grid_image[height_begin:height_end, width_begin:width_end, :] = \ # colored_heatmap*0.7 + resized_image*0.3 grid_image[height_begin:height_end, 0:heatmap_width, :] = resized_image cv2.imwrite(file_name, grid_image) def save_debug_images(config, input, meta, target, joints_pred, output, prefix): if not config.DEBUG.DEBUG: return if config.DEBUG.SAVE_BATCH_IMAGES_GT: save_batch_image_with_joints( input, meta['joints'], meta['joints_vis'], '{}_gt.jpg'.format(prefix) ) if config.DEBUG.SAVE_BATCH_IMAGES_PRED: save_batch_image_with_joints( input, joints_pred, meta['joints_vis'], '{}_pred.jpg'.format(prefix) ) if config.DEBUG.SAVE_HEATMAPS_GT: save_batch_heatmaps( input, target, '{}_hm_gt.jpg'.format(prefix) ) if config.DEBUG.SAVE_HEATMAPS_PRED: save_batch_heatmaps( input, output, '{}_hm_pred.jpg'.format(prefix) ) ================================================ FILE: libs/model/__init__.py ================================================ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Empty file. """ ================================================ FILE: libs/model/model.py ================================================ """ Fully-connected residual network as a single deep learner. """ import torch.nn as nn import torch class ResidualBlock(nn.Module): """ A residual block. """ def __init__(self, linear_size, p_dropout=0.5, kaiming=False, leaky=False): super(ResidualBlock, self).__init__() self.l_size = linear_size if leaky: self.relu = nn.LeakyReLU(inplace=True) else: self.relu = nn.ReLU(inplace=True) self.dropout = nn.Dropout(p_dropout) self.w1 = nn.Linear(self.l_size, self.l_size) self.batch_norm1 = nn.BatchNorm1d(self.l_size) self.w2 = nn.Linear(self.l_size, self.l_size) self.batch_norm2 = nn.BatchNorm1d(self.l_size) if kaiming: self.w1.weight.data = nn.init.kaiming_normal_(self.w1.weight.data) self.w2.weight.data = nn.init.kaiming_normal_(self.w2.weight.data) def forward(self, x): y = self.w1(x) y = self.batch_norm1(y) y = self.relu(y) y = self.dropout(y) y = self.w2(y) y = self.batch_norm2(y) y = self.relu(y) y = self.dropout(y) out = x + y return out class FCModel(nn.Module): def __init__(self, stage_id=1, linear_size=1024, num_blocks=2, p_dropout=0.5, norm_twoD=False, kaiming=False, refine_3d=False, leaky=False, dm=False, input_size=32, output_size=64): """ Fully-connected network. """ super(FCModel, self).__init__() self.linear_size = linear_size self.p_dropout = p_dropout self.num_blocks = num_blocks self.stage_id = stage_id self.refine_3d = refine_3d self.leaky = leaky self.dm = dm self.input_size = input_size if self.stage_id>1 and self.refine_3d: self.input_size += 16 * 3 # 3d joints self.output_size = output_size # process input to linear size self.w1 = nn.Linear(self.input_size, self.linear_size) self.batch_norm1 = nn.BatchNorm1d(self.linear_size) self.res_blocks = [] for l in range(num_blocks): self.res_blocks.append(ResidualBlock(self.linear_size, self.p_dropout, leaky=self.leaky)) self.res_blocks = nn.ModuleList(self.res_blocks) # output self.w2 = nn.Linear(self.linear_size, self.output_size) if self.leaky: self.relu = nn.LeakyReLU(inplace=True) else: self.relu = nn.ReLU(inplace=True) self.dropout = nn.Dropout(self.p_dropout) if kaiming: self.w1.weight.data = nn.init.kaiming_normal_(self.w1.weight.data) self.w2.weight.data = nn.init.kaiming_normal_(self.w2.weight.data) def forward(self, x): y = self.get_representation(x) y = self.w2(y) return y def get_representation(self, x): # get the latent representation of an input vector # first layer y = self.w1(x) y = self.batch_norm1(y) y = self.relu(y) y = self.dropout(y) # residual blocks for i in range(self.num_blocks): y = self.res_blocks[i](y) return y def get_model(stage_id, refine_3d=False, norm_twoD=False, num_blocks=2, input_size=32, output_size=64, linear_size=1024, dropout=0.5, leaky=False ): model = FCModel(stage_id=stage_id, refine_3d=refine_3d, norm_twoD=norm_twoD, num_blocks=num_blocks, input_size=input_size, output_size=output_size, linear_size=linear_size, p_dropout=dropout, leaky=leaky ) return model def prepare_optim(model, opt): """ Prepare optimizer. """ params = [ p for p in model.parameters() if p.requires_grad] if opt.optim_type == 'adam': optimizer = torch.optim.Adam(params, lr = opt.lr, weight_decay = opt.weight_decay ) elif opt.optim_type == 'sgd': optimizer = torch.optim.SGD(params, lr = opt.lr, momentum = opt.momentum, weight_decay = opt.weight_decay ) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones = opt.milestones, gamma = opt.gamma) return optimizer, scheduler def get_cascade(): """ Get an empty cascade. """ return nn.ModuleList([]) ================================================ FILE: libs/model/pose_hrnet.py ================================================ # ------------------------------------------------------------------------------ # Copyright (c) Microsoft # Licensed under the MIT License. # Written by Bin Xiao (Bin.Xiao@microsoft.com) # ------------------------------------------------------------------------------ from __future__ import absolute_import from __future__ import division from __future__ import print_function import os import logging import torch import torch.nn as nn BN_MOMENTUM = 0.1 logger = logging.getLogger(__name__) def conv3x3(in_planes, out_planes, stride=1): """3x3 convolution with padding""" return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False) class BasicBlock(nn.Module): expansion = 1 def __init__(self, inplanes, planes, stride=1, downsample=None): super(BasicBlock, self).__init__() self.conv1 = conv3x3(inplanes, planes, stride) self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) self.relu = nn.ReLU(inplace=True) self.conv2 = conv3x3(planes, planes) self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) self.downsample = downsample self.stride = stride def forward(self, x): residual = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) if self.downsample is not None: residual = self.downsample(x) out += residual out = self.relu(out) return out class Bottleneck(nn.Module): expansion = 4 def __init__(self, inplanes, planes, stride=1, downsample=None): super(Bottleneck, self).__init__() self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False) self.bn3 = nn.BatchNorm2d(planes * self.expansion, momentum=BN_MOMENTUM) self.relu = nn.ReLU(inplace=True) self.downsample = downsample self.stride = stride def forward(self, x): residual = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out = self.relu(out) out = self.conv3(out) out = self.bn3(out) if self.downsample is not None: residual = self.downsample(x) out += residual out = self.relu(out) return out class HighResolutionModule(nn.Module): def __init__(self, num_branches, blocks, num_blocks, num_inchannels, num_channels, fuse_method, multi_scale_output=True): super(HighResolutionModule, self).__init__() self._check_branches( num_branches, blocks, num_blocks, num_inchannels, num_channels) self.num_inchannels = num_inchannels self.fuse_method = fuse_method self.num_branches = num_branches self.multi_scale_output = multi_scale_output self.branches = self._make_branches( num_branches, blocks, num_blocks, num_channels) self.fuse_layers = self._make_fuse_layers() self.relu = nn.ReLU(True) def _check_branches(self, num_branches, blocks, num_blocks, num_inchannels, num_channels): if num_branches != len(num_blocks): error_msg = 'NUM_BRANCHES({}) <> NUM_BLOCKS({})'.format( num_branches, len(num_blocks)) logger.error(error_msg) raise ValueError(error_msg) if num_branches != len(num_channels): error_msg = 'NUM_BRANCHES({}) <> NUM_CHANNELS({})'.format( num_branches, len(num_channels)) logger.error(error_msg) raise ValueError(error_msg) if num_branches != len(num_inchannels): error_msg = 'NUM_BRANCHES({}) <> NUM_INCHANNELS({})'.format( num_branches, len(num_inchannels)) logger.error(error_msg) raise ValueError(error_msg) def _make_one_branch(self, branch_index, block, num_blocks, num_channels, stride=1): downsample = None if stride != 1 or \ self.num_inchannels[branch_index] != num_channels[branch_index] * block.expansion: downsample = nn.Sequential( nn.Conv2d( self.num_inchannels[branch_index], num_channels[branch_index] * block.expansion, kernel_size=1, stride=stride, bias=False ), nn.BatchNorm2d( num_channels[branch_index] * block.expansion, momentum=BN_MOMENTUM ), ) layers = [] layers.append( block( self.num_inchannels[branch_index], num_channels[branch_index], stride, downsample ) ) self.num_inchannels[branch_index] = \ num_channels[branch_index] * block.expansion for i in range(1, num_blocks[branch_index]): layers.append( block( self.num_inchannels[branch_index], num_channels[branch_index] ) ) return nn.Sequential(*layers) def _make_branches(self, num_branches, block, num_blocks, num_channels): branches = [] for i in range(num_branches): branches.append( self._make_one_branch(i, block, num_blocks, num_channels) ) return nn.ModuleList(branches) def _make_fuse_layers(self): if self.num_branches == 1: return None num_branches = self.num_branches num_inchannels = self.num_inchannels fuse_layers = [] for i in range(num_branches if self.multi_scale_output else 1): fuse_layer = [] for j in range(num_branches): if j > i: fuse_layer.append( nn.Sequential( nn.Conv2d( num_inchannels[j], num_inchannels[i], 1, 1, 0, bias=False ), nn.BatchNorm2d(num_inchannels[i]), nn.Upsample(scale_factor=2**(j-i), mode='nearest') ) ) elif j == i: fuse_layer.append(None) else: conv3x3s = [] for k in range(i-j): if k == i - j - 1: num_outchannels_conv3x3 = num_inchannels[i] conv3x3s.append( nn.Sequential( nn.Conv2d( num_inchannels[j], num_outchannels_conv3x3, 3, 2, 1, bias=False ), nn.BatchNorm2d(num_outchannels_conv3x3) ) ) else: num_outchannels_conv3x3 = num_inchannels[j] conv3x3s.append( nn.Sequential( nn.Conv2d( num_inchannels[j], num_outchannels_conv3x3, 3, 2, 1, bias=False ), nn.BatchNorm2d(num_outchannels_conv3x3), nn.ReLU(True) ) ) fuse_layer.append(nn.Sequential(*conv3x3s)) fuse_layers.append(nn.ModuleList(fuse_layer)) return nn.ModuleList(fuse_layers) def get_num_inchannels(self): return self.num_inchannels def forward(self, x): if self.num_branches == 1: return [self.branches[0](x[0])] for i in range(self.num_branches): x[i] = self.branches[i](x[i]) x_fuse = [] for i in range(len(self.fuse_layers)): y = x[0] if i == 0 else self.fuse_layers[i][0](x[0]) for j in range(1, self.num_branches): if i == j: y = y + x[j] else: y = y + self.fuse_layers[i][j](x[j]) x_fuse.append(self.relu(y)) return x_fuse blocks_dict = { 'BASIC': BasicBlock, 'BOTTLENECK': Bottleneck } class PoseHighResolutionNet(nn.Module): def __init__(self, cfg, **kwargs): self.inplanes = 64 extra = cfg.MODEL.EXTRA super(PoseHighResolutionNet, self).__init__() # stem net self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False) self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM) self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM) self.relu = nn.ReLU(inplace=True) self.layer1 = self._make_layer(Bottleneck, 64, 4) self.stage2_cfg = cfg['MODEL']['EXTRA']['STAGE2'] num_channels = self.stage2_cfg['NUM_CHANNELS'] block = blocks_dict[self.stage2_cfg['BLOCK']] num_channels = [ num_channels[i] * block.expansion for i in range(len(num_channels)) ] self.transition1 = self._make_transition_layer([256], num_channels) self.stage2, pre_stage_channels = self._make_stage( self.stage2_cfg, num_channels) self.stage3_cfg = cfg['MODEL']['EXTRA']['STAGE3'] num_channels = self.stage3_cfg['NUM_CHANNELS'] block = blocks_dict[self.stage3_cfg['BLOCK']] num_channels = [ num_channels[i] * block.expansion for i in range(len(num_channels)) ] self.transition2 = self._make_transition_layer( pre_stage_channels, num_channels) self.stage3, pre_stage_channels = self._make_stage( self.stage3_cfg, num_channels) self.stage4_cfg = cfg['MODEL']['EXTRA']['STAGE4'] num_channels = self.stage4_cfg['NUM_CHANNELS'] block = blocks_dict[self.stage4_cfg['BLOCK']] num_channels = [ num_channels[i] * block.expansion for i in range(len(num_channels)) ] self.transition3 = self._make_transition_layer( pre_stage_channels, num_channels) self.stage4, pre_stage_channels = self._make_stage( self.stage4_cfg, num_channels, multi_scale_output=False) self.final_layer = nn.Conv2d( in_channels=pre_stage_channels[0], out_channels=cfg.MODEL.NUM_JOINTS, kernel_size=extra.FINAL_CONV_KERNEL, stride=1, padding=1 if extra.FINAL_CONV_KERNEL == 3 else 0 ) self.pretrained_layers = cfg['MODEL']['EXTRA']['PRETRAINED_LAYERS'] # add a pixel shuffle upsampling layer self.upsample_layer = nn.Sequential( nn.Conv2d(17, 17*16, kernel_size=1), nn.BatchNorm2d(17*16), nn.ReLU(inplace=True), nn.PixelShuffle(4) ) def _make_transition_layer( self, num_channels_pre_layer, num_channels_cur_layer): num_branches_cur = len(num_channels_cur_layer) num_branches_pre = len(num_channels_pre_layer) transition_layers = [] for i in range(num_branches_cur): if i < num_branches_pre: if num_channels_cur_layer[i] != num_channels_pre_layer[i]: transition_layers.append( nn.Sequential( nn.Conv2d( num_channels_pre_layer[i], num_channels_cur_layer[i], 3, 1, 1, bias=False ), nn.BatchNorm2d(num_channels_cur_layer[i]), nn.ReLU(inplace=True) ) ) else: transition_layers.append(None) else: conv3x3s = [] for j in range(i+1-num_branches_pre): inchannels = num_channels_pre_layer[-1] outchannels = num_channels_cur_layer[i] \ if j == i-num_branches_pre else inchannels conv3x3s.append( nn.Sequential( nn.Conv2d( inchannels, outchannels, 3, 2, 1, bias=False ), nn.BatchNorm2d(outchannels), nn.ReLU(inplace=True) ) ) transition_layers.append(nn.Sequential(*conv3x3s)) return nn.ModuleList(transition_layers) def _make_layer(self, block, planes, blocks, stride=1): downsample = None if stride != 1 or self.inplanes != planes * block.expansion: downsample = nn.Sequential( nn.Conv2d( self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False ), nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM), ) layers = [] layers.append(block(self.inplanes, planes, stride, downsample)) self.inplanes = planes * block.expansion for i in range(1, blocks): layers.append(block(self.inplanes, planes)) return nn.Sequential(*layers) def _make_stage(self, layer_config, num_inchannels, multi_scale_output=True): num_modules = layer_config['NUM_MODULES'] num_branches = layer_config['NUM_BRANCHES'] num_blocks = layer_config['NUM_BLOCKS'] num_channels = layer_config['NUM_CHANNELS'] block = blocks_dict[layer_config['BLOCK']] fuse_method = layer_config['FUSE_METHOD'] modules = [] for i in range(num_modules): # multi_scale_output is only used last module if not multi_scale_output and i == num_modules - 1: reset_multi_scale_output = False else: reset_multi_scale_output = True modules.append( HighResolutionModule( num_branches, block, num_blocks, num_inchannels, num_channels, fuse_method, reset_multi_scale_output ) ) num_inchannels = modules[-1].get_num_inchannels() return nn.Sequential(*modules), num_inchannels def forward(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.conv2(x) x = self.bn2(x) x = self.relu(x) x = self.layer1(x) x_list = [] for i in range(self.stage2_cfg['NUM_BRANCHES']): if self.transition1[i] is not None: x_list.append(self.transition1[i](x)) else: x_list.append(x) y_list = self.stage2(x_list) x_list = [] for i in range(self.stage3_cfg['NUM_BRANCHES']): if self.transition2[i] is not None: x_list.append(self.transition2[i](y_list[-1])) else: x_list.append(y_list[i]) y_list = self.stage3(x_list) x_list = [] for i in range(self.stage4_cfg['NUM_BRANCHES']): if self.transition3[i] is not None: x_list.append(self.transition3[i](y_list[-1])) else: x_list.append(y_list[i]) y_list = self.stage4(x_list) x = self.final_layer(y_list[0]) # Temp test: upsampling x = self.upsample_layer(x) return x def init_weights(self, pretrained=''): logger.info('=> init weights from normal distribution') for m in self.modules(): if isinstance(m, nn.Conv2d): # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') nn.init.normal_(m.weight, std=0.001) for name, _ in m.named_parameters(): if name in ['bias']: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) elif isinstance(m, nn.ConvTranspose2d): nn.init.normal_(m.weight, std=0.001) for name, _ in m.named_parameters(): if name in ['bias']: nn.init.constant_(m.bias, 0) if os.path.isfile(pretrained): pretrained_state_dict = torch.load(pretrained) logger.info('=> loading pretrained model {}'.format(pretrained)) need_init_state_dict = {} for name, m in pretrained_state_dict.items(): if name.split('.')[0] in self.pretrained_layers \ or self.pretrained_layers[0] is '*': need_init_state_dict[name] = m self.load_state_dict(need_init_state_dict, strict=False) elif pretrained: logger.error('=> please download pre-trained models first!') raise ValueError('{} is not exist!'.format(pretrained)) def load_my_state_dict(self, state_dict): own_state = self.state_dict() for name, param in state_dict.items(): if name not in own_state: continue param = param.data own_state[name].copy_(param) def get_pose_net(cfg, is_train, **kwargs): model = PoseHighResolutionNet(cfg, **kwargs) if is_train and cfg.MODEL.INIT_WEIGHTS: model.init_weights(cfg.MODEL.PRETRAINED) return model ================================================ FILE: libs/model/pose_resnet.py ================================================ # ------------------------------------------------------------------------------ # Copyright (c) Microsoft # Licensed under the MIT License. # Written by Bin Xiao (Bin.Xiao@microsoft.com) # ------------------------------------------------------------------------------ from __future__ import absolute_import from __future__ import division from __future__ import print_function import os import logging import torch import torch.nn as nn BN_MOMENTUM = 0.1 logger = logging.getLogger(__name__) def conv3x3(in_planes, out_planes, stride=1): """3x3 convolution with padding""" return nn.Conv2d( in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False ) class BasicBlock(nn.Module): expansion = 1 def __init__(self, inplanes, planes, stride=1, downsample=None): super(BasicBlock, self).__init__() self.conv1 = conv3x3(inplanes, planes, stride) self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) self.relu = nn.ReLU(inplace=True) self.conv2 = conv3x3(planes, planes) self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) self.downsample = downsample self.stride = stride def forward(self, x): residual = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) if self.downsample is not None: residual = self.downsample(x) out += residual out = self.relu(out) return out class Bottleneck(nn.Module): expansion = 4 def __init__(self, inplanes, planes, stride=1, downsample=None): super(Bottleneck, self).__init__() self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False) self.bn3 = nn.BatchNorm2d(planes * self.expansion, momentum=BN_MOMENTUM) self.relu = nn.ReLU(inplace=True) self.downsample = downsample self.stride = stride def forward(self, x): residual = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out = self.relu(out) out = self.conv3(out) out = self.bn3(out) if self.downsample is not None: residual = self.downsample(x) out += residual out = self.relu(out) return out class PoseResNet(nn.Module): def __init__(self, block, layers, cfg, **kwargs): self.inplanes = 64 extra = cfg.MODEL.EXTRA self.deconv_with_bias = extra.DECONV_WITH_BIAS super(PoseResNet, self).__init__() self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2) self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer(block, 512, layers[3], stride=2) # used for deconv layers self.deconv_layers = self._make_deconv_layer( extra.NUM_DECONV_LAYERS, extra.NUM_DECONV_FILTERS, extra.NUM_DECONV_KERNELS, ) self.final_layer = nn.Conv2d( in_channels=extra.NUM_DECONV_FILTERS[-1], out_channels=cfg.MODEL.NUM_JOINTS, kernel_size=extra.FINAL_CONV_KERNEL, stride=1, padding=1 if extra.FINAL_CONV_KERNEL == 3 else 0 ) def _make_layer(self, block, planes, blocks, stride=1): downsample = None if stride != 1 or self.inplanes != planes * block.expansion: downsample = nn.Sequential( nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM), ) layers = [] layers.append(block(self.inplanes, planes, stride, downsample)) self.inplanes = planes * block.expansion for i in range(1, blocks): layers.append(block(self.inplanes, planes)) return nn.Sequential(*layers) def _get_deconv_cfg(self, deconv_kernel, index): if deconv_kernel == 4: padding = 1 output_padding = 0 elif deconv_kernel == 3: padding = 1 output_padding = 1 elif deconv_kernel == 2: padding = 0 output_padding = 0 return deconv_kernel, padding, output_padding def _make_deconv_layer(self, num_layers, num_filters, num_kernels): assert num_layers == len(num_filters), \ 'ERROR: num_deconv_layers is different len(num_deconv_filters)' assert num_layers == len(num_kernels), \ 'ERROR: num_deconv_layers is different len(num_deconv_filters)' layers = [] for i in range(num_layers): kernel, padding, output_padding = \ self._get_deconv_cfg(num_kernels[i], i) planes = num_filters[i] layers.append( nn.ConvTranspose2d( in_channels=self.inplanes, out_channels=planes, kernel_size=kernel, stride=2, padding=padding, output_padding=output_padding, bias=self.deconv_with_bias)) layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)) layers.append(nn.ReLU(inplace=True)) self.inplanes = planes return nn.Sequential(*layers) def forward(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) x = self.deconv_layers(x) x = self.final_layer(x) return x def init_weights(self, pretrained=''): if os.path.isfile(pretrained): logger.info('=> init deconv weights from normal distribution') for name, m in self.deconv_layers.named_modules(): if isinstance(m, nn.ConvTranspose2d): logger.info('=> init {}.weight as normal(0, 0.001)'.format(name)) logger.info('=> init {}.bias as 0'.format(name)) nn.init.normal_(m.weight, std=0.001) if self.deconv_with_bias: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm2d): logger.info('=> init {}.weight as 1'.format(name)) logger.info('=> init {}.bias as 0'.format(name)) nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) logger.info('=> init final conv weights from normal distribution') for m in self.final_layer.modules(): if isinstance(m, nn.Conv2d): # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') logger.info('=> init {}.weight as normal(0, 0.001)'.format(name)) logger.info('=> init {}.bias as 0'.format(name)) nn.init.normal_(m.weight, std=0.001) nn.init.constant_(m.bias, 0) pretrained_state_dict = torch.load(pretrained) logger.info('=> loading pretrained model {}'.format(pretrained)) self.load_state_dict(pretrained_state_dict, strict=False) else: logger.info('=> init weights from normal distribution') for m in self.modules(): if isinstance(m, nn.Conv2d): # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') nn.init.normal_(m.weight, std=0.001) # nn.init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) elif isinstance(m, nn.ConvTranspose2d): nn.init.normal_(m.weight, std=0.001) if self.deconv_with_bias: nn.init.constant_(m.bias, 0) resnet_spec = { 18: (BasicBlock, [2, 2, 2, 2]), 34: (BasicBlock, [3, 4, 6, 3]), 50: (Bottleneck, [3, 4, 6, 3]), 101: (Bottleneck, [3, 4, 23, 3]), 152: (Bottleneck, [3, 8, 36, 3]) } def get_pose_net(cfg, is_train, **kwargs): num_layers = cfg.MODEL.EXTRA.NUM_LAYERS block_class, layers = resnet_spec[num_layers] model = PoseResNet(block_class, layers, cfg, **kwargs) if is_train and cfg.MODEL.INIT_WEIGHTS: model.init_weights(cfg.MODEL.PRETRAINED) return model ================================================ FILE: libs/optimizer/__init__.py ================================================ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Empty file. """ ================================================ FILE: libs/parser/__init__.py ================================================ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Empty file. """ ================================================ FILE: libs/parser/parse.py ================================================ import argparse def parse_arg(): parser = argparse.ArgumentParser(description='2Dto3Dnet.py') ## paths parser.add_argument('-save_root', type=str, default='../model/') ##-----------------------------------------------------------------------## ## model settings parser.add_argument('-save_name', type=str, default=None) # feed the current estimated 3D poses to the next stage parser.add_argument('-refine_3d', type=bool, default=False) parser.add_argument('-norm_twoD', type=bool, default=False) parser.add_argument('-num_blocks', type=int, default=2) # how many stages used for boosted regression parser.add_argument('-num_stages', type=int, default=2) # the length of 3D pose representation used in the network parser.add_argument('-linear_size', type=int, default=1024) # extra name for logging parser.add_argument('-extra_str', type=str, default='') # dropout parser.add_argument('-dropout', type=float, default=0.5) # leaky ReLu parser.add_argument('-leaky', type=bool, default=False) ##-----------------------------------------------------------------------## ## training settings parser.add_argument('-batch_size', type=int, default=8192) # random seed for reproduction of experiments parser.add_argument('-seed', type=int, default=2019) # number of threads to use when loading data parser.add_argument('-num_threads', type=int, default=4) # update leaf node distribution every certain number of network training parser.add_argument('-gpuid', type=int, default=0) parser.add_argument('-epochs', type=int, default=200) # report_every: 10 parser.add_argument('-report_every', type=int, default=100) # whether to perform evaluation on evaluation set during training parser.add_argument('-eval', type=bool, default=False) # whether to evaluate for each action during the training parser.add_argument('-eval_action_wise', type=bool, default=True) # what protocol to use for evaluation parser.add_argument('-protocols', type=list, default=['P1', 'P2']) # whether to record and report loss history at the end of training parser.add_argument('-eval_every', type=int, default=350) # path to the human3.6M dataset parser.add_argument('-data_dir', type=str, default='../data/human3.6M/') # actions to use for training parser.add_argument('-actions', type=str, default='All') # whether to do data augmentation for the training data parser.add_argument('-augmentation', type=bool, default=False) # using virtual cameras parser.add_argument('-virtual_cams', type=bool, default=False) # interpolate between 3D joints parser.add_argument('-interpolate', type=bool, default=False) # what input to use, synthetic or detected parser.add_argument('-twoD_source', type=str, default='synthetic') # what dataset to use as the evaluation set parser.add_argument('-test_source', type=str, default='h36m') # whether to use pre-augmented training data parser.add_argument('-pre_aug', type=bool, default=False) # the path to the pre-augmented dataset parser.add_argument('-pre_aug_dir', type=str, default='../data/augmented_evo_10.npy') # the path of pre-trained check-point parser.add_argument('-ckpt_dir', type=str) ##-----------------------------------------------------------------------## ## dataset settings # whether to only predict 14 joints parser.add_argument('-pred14', type=bool, default=False) # whether to add 3D poses fitted by SMPL model parser.add_argument('-SMPL', type=bool, default=False) # perform normalization for each pose instead of all the poses parser.add_argument('-norm_single', type=bool, default=False) # how much weight is given to the new poses of SMPL parser.add_argument('-SMPL_weight', type=float, default=0.5) # whether to change the image size of the cameras parser.add_argument('-change_size', type=bool, default=False) # virtual image size if changed parser.add_argument('-vir_img_size', type=int, default=256) # use only a subset of training examples for weakly-supervised experiments parser.add_argument('-ws', type=bool, default=False) # the path to evolved training examples for weakly-supervised experiments parser.add_argument('-evolved_path', type=str, default=None) # the training sample used if no path is provided parser.add_argument('-ws_name', type=str, default='S1') # whether to visualize the dataset parser.add_argument('-visualize', type=bool, default=False) # whether to show the ambiguous pairs in the dataset parser.add_argument('-show_ambi', type=bool, default=False) ##-----------------------------------------------------------------------## # Optimizer settings parser.add_argument('-optim_type', type=str, default='adam') parser.add_argument('-lr', type=float, default=0.001, help="sgd: 0.5, adam: 0.001") parser.add_argument('-weight_decay', type=float, default=0.0) parser.add_argument('-momentum', type=float, default=0.9, help="sgd: 0.9") # reduce the learning rate after each milestone #parser.add_argument('-milestones', type=list, default=[6, 12, 18]) parser.add_argument('-milestones', type=list, default=[50, 100, 150]) # how much to reduce the learning rate parser.add_argument('-gamma', type=float, default=1) ##-----------------------------------------------------------------------## ## usage configuration # whether to train a model or deploy a trained model parser.add_argument('-train', type=bool, default=False) parser.add_argument('-evaluate', type=bool, default=False) # evaluate a batch of models parser.add_argument('-evaluate_batch', type=bool, default=False) # whether to save the trained model parser.add_argument('-save', type=bool, default=True) # evaluate for each action parser.add_argument('-evaluate_action', type=bool, default=True) parser.add_argument('-produce', type=bool, default=False) opt = parser.parse_args() return opt ================================================ FILE: libs/skeleton/__init__.py ================================================ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Empty file. """ ================================================ FILE: libs/skeleton/anglelimits.py ================================================ """ Utility functions for the hierarchical human representation. A Python implementation for pose-conditioned joint angle limits is also included. Reference: "Pose-Conditioned Joint Angle Limits for 3D Human Pose Reconstruction" """ import logging import os import numpy as np import scipy.io as sio import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D #=============================================================================# # Load the joint angle constraints # These files are directly converted from .mat to .npy # The MATLAB implementation of the CVPR 15 paper has detailed documentation. root = "../resources/constraints" logging.info("Loading files from " + root) model_path = os.path.join(root, "jointAngleModel_v2.npy") joint_angle_limits = np.load(model_path, allow_pickle=True).item() angle_spread = joint_angle_limits['angleSprd'] # separation plane for conditional joint angle sepPlane = joint_angle_limits['sepPlane'] E2 = joint_angle_limits['E2'] bounds = joint_angle_limits['bounds'] # static pose and parameters used in coordinate transformation static_pose_path = os.path.join(root, "staticPose.npy") static_pose = np.load(static_pose_path, allow_pickle=True).item() di = static_pose['di'] a = static_pose['a'].reshape(3) # load the pre-computed conditinal distribution con_dis_path = os.path.join(root, "conditional_dis.npy") con_dis = np.load(con_dis_path, allow_pickle=True).item() #=============================================================================# # joint names of the CVPR 15 paper PRIOR_NAMES = ['back-bone', 'R-shldr', 'R-Uarm', 'R-Larm', 'L-shldr', 'L-Uarm', 'L-Larm', 'head', 'R-hip', 'R-Uleg', 'R-Lleg', 'R-feet', 'L-hip', 'L-Uleg', 'L-Lleg', 'L-feet' ] # Human 3.6M joint names are slightly different from the above H36M_NAMES = ['']*32 H36M_NAMES[0] = 'Hip' H36M_NAMES[1] = 'RHip' H36M_NAMES[2] = 'RKnee' H36M_NAMES[3] = 'RFoot' H36M_NAMES[6] = 'LHip' H36M_NAMES[7] = 'LKnee' H36M_NAMES[8] = 'LFoot' H36M_NAMES[12] = 'Spine' H36M_NAMES[13] = 'Thorax' H36M_NAMES[14] = 'Neck/Nose' H36M_NAMES[15] = 'Head' H36M_NAMES[17] = 'LShoulder' H36M_NAMES[18] = 'LElbow' H36M_NAMES[19] = 'LWrist' H36M_NAMES[25] = 'RShoulder' H36M_NAMES[26] = 'RElbow' H36M_NAMES[27] = 'RWrist' # correspondence of the joints # (key, value) -> (index in prior_names, index in H36M names) correspondence = {0:12, 1:13, 2:25, 3:26, 4:27, 5:17, 6:18, 7:19, 8:15, 9:1, 10:2, 11:3, 13:6, 14:7, 15:8} # number of bone vectors attached to a torso num_of_bones = 9 # descretization of spherical coordinates # bin edges for theta theta_edges = np.arange(0.5, 122, 1)# theta values: 1 to 121 (integer) # bin edges for phi phi_edges = np.arange(0.5, 62, 1) # phi values: 1 to 61 # color map used for visualization cmap = plt.cm.RdYlBu # indices used for computing bone vectors for non-torso bones nt_parent_indices = [13, 17, 18, 25, 26, 6, 7, 1, 2, 13] nt_child_indices = [15, 18, 19, 26, 27, 7, 8, 2, 3, 14] # map from bone index to the parent's di index # TODO di_indices = {2:5, 4:2, 6:13, 8:9} # map from angle index to record index record_indices = {0:4, 1:2, 3:0, 5:8, 7:5, 2:3, 4:1, 6:9, 8:6} # name for the bone vectors bone_name = { 1: 'thorax to head top', 2: 'left shoulder to left elbow', 3: 'left elbow to left wrist', 4: 'right shoulder to right elbow', 5: 'right elbow to right wrist', 6: 'left hip to left knee', 7: 'left knee to left ankle', 8: 'right hip to right knee', 9: 'right knee to right ankle' } #=============================================================================# def is_valid_local(skeleton_local, return_ang = False): """ Check if the limbs represented in local coordinate system are valid or not. """ valid_vec = np.ones((num_of_bones), dtype=np.bool) angles = to_spherical(skeleton_local) angles[:,1:] *= 180/np.pi # convert to valid range and discretize # theta: -180~180 degrees discretized into 120 bins # phi: -90~90 degrees discretized into 60 bins angles[:, 1] = np.floor((angles[:, 1]+180)/3 + 1) angles[:, 2] = np.floor((angles[:, 2]+90)/3 + 1) # go through each bone and check the angle-limits for angle_idx in range(len(angles)): angle = angles[angle_idx] record_idx = record_indices[angle_idx] theta, phi = int(angle[1]), int(angle[2]) if angle_idx in [0, 1, 3, 5, 7]: test_value = angle_spread[0, record_idx][theta-1, phi-1] if test_value == 0: valid_vec[angle_idx] == False else: angle_parent = angles[angle_idx - 1] theta_p, phi_p = int(angle_parent[1]), int(angle_parent[2]) vector = normalize(sepPlane[0, record_idx][theta_p-1, phi_p-1]) for value in vector: if np.isnan(value): valid_vec[angle_idx] = False continue if np.dot(np.hstack([skeleton_local[angle_idx], 1]), vector) > 0: valid_vec[angle_idx] = False else: e1 = vector[:-1] e2 = E2[0, record_idx][theta_p-1, phi_p-1] T = gram_schmidt_columns(np.hstack([e1.reshape(3,1), e2.reshape(3,1), np.cross(e1,e2).reshape(3,1)])) bnd = bounds[0, record_idx][theta_p-1, phi_p-1] u = (T[:, 1:]).T @ skeleton_local[angle_idx] if u[0] < bnd[0] or u[0] > bnd[1] or u[1] < bnd[2] or u[1] > bnd[3]: valid_vec[angle_idx] = False if return_ang: return valid_vec, angles else: return valid_vec def is_valid(skeleton, return_ang = False, camera = False): """ args: skeleton: input skeleton of shape [num_joints, 3] use the annotation of Human 3.6M dataset return: valid_vec: boolean vector specifying the validity for each bone. return 0 for invalid bones. camera: relative orientation of camera and human """ skeleton = skeleton.reshape(len(H36M_NAMES), -1) # the ordering of coordinate used by the Prior was x,z and y skeleton = skeleton[:, [0,2,1]] # convert bone vectors into local coordinate skeleton_local = to_local(skeleton) ret = is_valid_local(skeleton_local, return_ang=return_ang) if return_ang: return ret[0], ret[1] else: return ret def normalize(vector): """ Normalize a vector. """ return vector/np.linalg.norm(vector) def to_spherical(xyz): """ Convert from Cartisian coordinate to spherical coordinate theta: [-pi, pi] phi: [-pi/2, pi/2] note that xyz should be float number """ # return in r, phi, and theta (elevation angle from z axis down) return_value = np.zeros(xyz.shape, dtype=xyz.dtype) xy = xyz[:,0]**2 + xyz[:,1]**2 return_value[:,0] = np.sqrt(xy + xyz[:,2]**2) # r return_value[:,1] = np.arctan2(xyz[:,1], xyz[:,0]) # theta return_value[:,2] = np.arctan2(xyz[:,2], np.sqrt(xy)) # phi return return_value def to_xyz(rthetaphi): """ Convert from spherical coordinate to Cartisian coordinate theta: [0, 2*pi] or [-pi, pi] phi: [-pi/2, pi/2] """ return_value = np.zeros(rthetaphi.shape, dtype=rthetaphi.dtype) sintheta = np.sin(rthetaphi[:,1]) costheta = np.cos(rthetaphi[:,1]) sinphi = np.sin(rthetaphi[:,2]) cosphi = np.cos(rthetaphi[:,2]) return_value[:,0] = rthetaphi[:,0]*costheta*cosphi # x return_value[:,1] = rthetaphi[:,0]*sintheta*cosphi # y return_value[:,2] = rthetaphi[:,0]*sinphi #z return return_value def test_coordinate_conversion(): # theta: [-pi, pi] reference xyz = np.random.rand(1, 3)*2 - 1 rthetaphi = to_spherical(xyz) xyz2 = to_xyz(rthetaphi) print('maximum error:', np.max(np.abs(xyz - xyz2))) # theta: [0, 2*pi] reference xyz = np.random.rand(1, 3)*2 - 1 rthetaphi = to_spherical(xyz) indices = rthetaphi[:,1] < 0 rthetaphi[:,1][indices] += 2*np.pi xyz2 = to_xyz(rthetaphi) print('maximum error:', np.max(np.abs(xyz - xyz2))) return def gram_schmidt_columns(X): """ Apply Gram-Schmidt orthogonalization to obtain basis vectors. """ B = np.zeros(X.shape) B[:, 0] = (1/np.linalg.norm(X[:, 0]))*X[:, 0] for i in range(1, 3): v = X[:, i] U = B[:, 0:i] # subspace basis which has already been orthonormalized pc = U.T @ v # orthogonal projection coefficients of v onto U p = U@pc v = v - p if np.linalg.norm(v) < 2e-16: # vectors are not linearly independent! raise ValueError else: v = normalize(v) B[:, i] = v return B def direction_check(system, v1, v2, v3): if system[:,0].dot(v1) <0: system[:,0] *= -1 if system[:,1].dot(v2) <0: system[:,1] *= -1 if system[:,2].dot(v3) <0: system[:,2] *= -1 return system def get_normal(x1, a, x): """ Get normal vector. """ nth = 1e-4 # x and a are parallel if np.linalg.norm(x - a) < nth or np.linalg.norm(x + a) < nth: n = np.cross(x, x1) flag = True else: n = np.cross(a, x) flag = False return normalize(n), flag def get_basis1(skeleton): """ Compute local coordinate system from 3D joint positions. This system is used for upper-limbs. """ # compute the vector from the left shoulder to the right shoulder left_shoulder = skeleton[17] right_shoulder = skeleton[25] v1 = normalize(right_shoulder - left_shoulder) # compute the backbone vector from the thorax to the spine thorax = skeleton[13] spine = skeleton[12] v2 = normalize(spine - thorax) # v3 is the cross product of v1 and v2 (front-facing vector for upper-body) v3 = normalize(np.cross(v1, v2)) return v1, v2, v3 def to_local(skeleton): """ Represent the bone vectors in the local coordinate systems. """ v1, v2, v3 = get_basis1(skeleton) # compute the vector from the left hip to the right hip left_hip = skeleton[6] right_hip = skeleton[1] v4 = normalize(right_hip - left_hip) # v5 is the cross product of v4 and v2 (front-facing vector for lower-body) v5 = normalize(np.cross(v4, v2)) # compute orthogonal coordinate systems using GramSchmidt # for upper body, we use v1, v2 and v3 system1 = gram_schmidt_columns(np.hstack([v1.reshape(3,1), v2.reshape(3,1), v3.reshape(3,1)])) # make sure the directions rougly align #system1 = direction_check(system1, v1, v2, v3) # for lower body, we use v4, v2 and v5 system2 = gram_schmidt_columns(np.hstack([v4.reshape(3,1), v2.reshape(3,1), v5.reshape(3,1)])) #system2 = direction_check(system2, v4, v2, v5) bones = skeleton[nt_parent_indices, :] - skeleton[nt_child_indices, :] # convert bone vector to local coordinate system bones_local = np.zeros(bones.shape, dtype=bones.dtype) for bone_idx in range(len(bones)): # only compute bone vectors for non-torsos # the order of the non-torso bone vector is: # bone vector1: thorax to head top # bone vector2: left shoulder to left elbow # bone vector3: left elbow to left wrist # bone vector4: right shoulder to right elbow # bone vector5: right elbow to right wrist # bone vector6: left hip to left knee # bone vector7: left knee to left ankle # bone vector8: right hip to right knee # bone vector9: right knee to right ankle bone = normalize(bones[bone_idx]) if bone_idx in [0, 1, 3, 5, 7]: # bones that are directly connected to the torso if bone_idx in [0, 1, 3]: # upper body bones_local[bone_idx] = system1.T @ bone else: # lower body bones_local[bone_idx] = system2.T @ bone else: if bone_idx in [2, 4]: parent_R = system1 else: parent_R = system2 # parent bone index is smaller than 1 vector_u = normalize(bones[bone_idx - 1]) di_index = di_indices[bone_idx] vector_v, flag = get_normal(parent_R@di[:, di_index], parent_R@a, vector_u ) vector_w = np.cross(vector_u, vector_v) local_system = gram_schmidt_columns(np.hstack([vector_u.reshape(3,1), vector_v.reshape(3,1), vector_w.reshape(3,1)] ) ) bones_local[bone_idx] = local_system.T @ bone return bones_local def to_global(skeleton, bones_local, cache=False): """ Convert local coordinate back into global coordinate system. cache: return intermeadiate results """ return_value = {} v1, v2, v3 = get_basis1(skeleton) # compute the vector from the left hip to the right hip left_hip = skeleton[6] right_hip = skeleton[1] v4 = normalize(right_hip - left_hip) # v5 is the cross product of v4 and v2 (front-facing vector for lower-body) v5 = normalize(np.cross(v4, v2)) # compute orthogonal coordinate systems using GramSchmidt # for upper body, we use v1, v2 and v3 system1 = gram_schmidt_columns(np.hstack([v1.reshape(3,1), v2.reshape(3,1), v3.reshape(3,1)])) # make sure the directions rougly align #system1 = direction_check(system1, v1, v2, v3) # for lower body, we use v4, v2 and v5 system2 = gram_schmidt_columns(np.hstack([v4.reshape(3,1), v2.reshape(3,1), v5.reshape(3,1)])) #system2 = direction_check(system2, v4, v2, v5) if cache: return_value['cache'] = [system1, system2] return_value['bl'] = bones_local bones_global = np.zeros(bones_local.shape) # convert bone vector to local coordinate system for bone_idx in [0,1,3,5,7,2,4,6,8]: # the indices follow the order from torso to limbs # only compute bone vectors for non-torsos bone = normalize(bones_local[bone_idx]) if bone_idx in [0, 1, 3, 5, 7]: # bones that are directly connected to the torso if bone_idx in [0, 1, 3]: # upper body # this is the inverse transformation compared to the to_local # function bones_global[bone_idx] = system1 @ bone else: # lower body bones_global[bone_idx] = system2 @ bone else: if bone_idx in [2, 4]: parent_R = system1 else: parent_R = system2 # parent bone index is smaller than 1 vector_u = normalize(bones_global[bone_idx - 1]) di_index = di_indices[bone_idx] vector_v, flag = get_normal(parent_R@di[:, di_index], parent_R@a, vector_u) vector_w = np.cross(vector_u, vector_v) local_system = gram_schmidt_columns(np.hstack([vector_u.reshape(3,1), vector_v.reshape(3,1), vector_w.reshape(3,1)])) if cache: return_value['cache'].append(local_system) bones_global[bone_idx] = local_system @ bone return_value['bg'] = bones_global return return_value def test_global_local_conversion(): """ test for global and lobal coordinate conversion """ path='Your3DSkeleton.npy' index = 0 pose = np.load(path, allow_pickle=True)[index] pose = pose.reshape(32, -1) global_bones = pose[nt_parent_indices, :] - pose[nt_child_indices, :] for bone_idx in range(len(global_bones)): global_bones[bone_idx] = normalize(global_bones[bone_idx]) local_c = to_local(pose) global_c = to_global(pose, local_c)['bg'] maximum_error = np.max(np.abs(global_bones - global_c)) print('maximum error', maximum_error) return maximum_error def show3Dpose(channels, ax, lcolor="#3498db", rcolor="#e74c3c", add_labels=True, gt=False,pred=False,inv_z=False): # blue, orange vals = np.reshape( channels, (32, -1) ) I = np.array([1,2,3,1,7,8,1, 13,14,15,14,18,19,14,26,27])-1 # start points J = np.array([2,3,4,7,8,9,13,14,15,16,18,19,20,26,27,28])-1 # end points LR = np.array([1,1,1,0,0,0,0, 0, 0, 0, 0, 0, 0, 1, 1, 1], dtype=bool) # Make connection matrix for i in np.arange( len(I) ): x, y, z = [np.array( [vals[I[i], j], vals[J[i], j]] ) for j in range(3)] if gt: ax.plot(x,y, z, lw=2, c='k') elif pred: ax.plot(x,y, z, lw=2, c='r') else: ax.plot(x,y, z, lw=2, c=lcolor if LR[i] else rcolor) RADIUS = 750 # space around the subject xroot, yroot, zroot = vals[0,0], vals[0,1], vals[0,2] ax.set_xlim3d([-RADIUS+xroot, RADIUS+xroot]) ax.set_zlim3d([-RADIUS+zroot, RADIUS+zroot]) ax.set_ylim3d([-RADIUS+yroot, RADIUS+yroot]) # if add_labels: # ax.set_xlabel("x") # ax.set_ylabel("z") # ax.set_zlabel("y") if add_labels: ax.set_xlabel("x") ax.set_ylabel("y") ax.set_zlabel("z") ax.set_aspect('equal') # Get rid of the panes (actually, make them white) white = (1.0, 1.0, 1.0, 0.0) ax.w_xaxis.set_pane_color(white) ax.w_yaxis.set_pane_color(white) # Keep z pane # Get rid of the lines in 3d ax.w_xaxis.line.set_color(white) ax.w_yaxis.line.set_color(white) ax.w_zaxis.line.set_color(white) if inv_z: ax.invert_zaxis() def get_histogram2d(angles, validmap=None, smooth=True): """ Obtain a 2D histogram for discretized joint angles """ H, xedges, yedges = np.histogram2d(angles[:,0], angles[:,1], bins=(theta_edges, phi_edges) ) if validmap is not None: # rule out outliers mask = validmap != 0 H = H * mask H = H.reshape(-1) H = H/H.sum() return H def sample_from_histogram(histogram, x=np.arange(1,122,1), y=np.arange(1,62,1), total=1000, add_noise=False, bin_size=3 ): """ Sample from a pre-computed histogram. """ assert histogram.shape[0] == len(x) assert histogram.shape[1] == len(y) # normalize the histogram histogram = histogram/histogram.sum() # multiply to get final counts histogram = histogram*total none_zeros = histogram!=0 histogram = histogram.astype(np.int) histogram[none_zeros] = histogram[none_zeros] + 1 data = [] for x_id in x: for y_id in y: counts = histogram[x_id-1, y_id-1] if counts!=0: temp = np.array([[x[x_id - 1], y[y_id - 1]]]) data.append(np.repeat(temp, counts, axis=0)) data = np.vstack(data) if add_noise: noise = np.random.rand(*(data.shape))*bin_size data = (data - 1)*bin_size + noise return data[:total,:] def histogram_transform(histogram, gamma): """ Transform a distribution with power function. """ histogram = (histogram - histogram.min())/(histogram.max() - histogram.min()) histogram = np.power(histogram, gamma) return histogram #=============================================================================# # Visualization utilities. def smooth_histogram2d(data): """ Smooth a 2D histogram with kernel density estimation. """ from scipy.stats import kde # Evaluate a gaussian kde on a regular grid of nbins x nbins over data extents k = kde.gaussian_kde(data.T) xi, yi = np.mgrid[1:122, 1:62] zi = k(np.vstack([xi.flatten(), yi.flatten()])) # change the extent xi = xi*3 - 180 yi = yi*3 - 180 fig, axes = plt.subplots(ncols=1, nrows=3) # plot a density axes[0].set_title('Calculate Gaussian KDE') axes[0].pcolormesh(xi, yi, zi.reshape(xi.shape), cmap=plt.cm.BuGn_r) axes[0].set_aspect('equal') axes[0].invert_yaxis() # add shading axes[1].set_title('2D Density with shading') axes[1].pcolormesh(xi, yi, zi.reshape(xi.shape), shading='gouraud', cmap=plt.cm.BuGn_r) axes[1].set_aspect('equal') axes[1].invert_yaxis() # contour axes[2].set_title('Contour') axes[2].pcolormesh(xi, yi, zi.reshape(xi.shape), shading='gouraud', cmap=plt.cm.BuGn_r) axes[2].contour(xi, yi, zi.reshape(xi.shape)) axes[2].set_aspect('equal') axes[2].invert_yaxis() return def decorate_axis(ax, title=None): ax.set_xlabel('Theta: -180 to 180') ax.set_label('Phi:-90 to 90') if title is not None: ax.set_title(title) return def adjust_figure(left = 0, right = 1, bottom = 0.01, top = 0.95, wspace = 0, hspace = 0.4 ): plt.subplots_adjust(left, bottom, right, top, wspace, hspace) return def plot_relative_poses(skeletons, cameras=None): """ Visualize the distribution of front vector in camera coordinate system. """ # skeletons: 3D poses in world or camera coordinates # cameras: camera parameters vector_list = [] if cameras is None: for pose_id in range(len(skeletons)): _, _, front_vector = get_basis1(skeletons[pose_id].reshape(32, -1)) vector_list.append(front_vector) else: raise NotImplementedError vector_list = np.vstack(vector_list) spherical = to_spherical(vector_list) # convert to valid range and discretize spherical[:, 1:] *= 180/np.pi spherical[:, 1] = np.floor((spherical[:, 1]+180)/3 + 1) spherical[:, 2] = np.floor((spherical[:, 2]+90)/3 + 1) H, xedges, yedges = np.histogram2d(spherical[:,1], spherical[:,2], bins=(theta_edges, phi_edges)) plt.figure() ax = plt.subplot(111) plt.imshow(H.T, extent=[-180, 180, -90, 90], interpolation='bilinear') decorate_axis(ax, 'Relative camera pose in H36M') return vector_list def plot_distribution(H_temp, angle_idx, dataset_name, gamma, save_path='../viz' ): """ Visualize distribution of limb orientation. """ # angles: [n_samples, 2] in theta and phi order # plot the distribution of local joint angles and overlay valid regions plt.ioff() if not os.path.exists(save_path): os.makedirs(save_path) H = H_temp.copy() # normalize H = (H-H.min())/(H.max()-H.min()) # perform "gamma correction" H = np.power(H, gamma) # normalize again H = (H-H.min())/(H.max()-H.min()) H_return = H.copy().reshape(121,61) # map to color H = cmap(H).reshape((121, 61, 4)) H = [np.expand_dims(H[:,:,i].T, axis=2) for i in range(4)] H = np.concatenate(H, axis=2) if angle_idx in [0,1,3,5,7]: record_idx = record_indices[angle_idx] mask_temp = angle_spread[0, record_idx] mask = np.zeros((61, 121, 4)) mask[:,:,3] = mask_temp.T mask[:,:,1] = mask_temp.T f = plt.figure(figsize=(5, 6)) ax = plt.subplot(211) ax.imshow(H, extent=[-180, 180, -90, 90], interpolation='bilinear', alpha=1) decorate_axis(ax, 'Distribution of ' + dataset_name + ' for bone: ' + bone_name[angle_idx+1]) ax = plt.subplot(212) ax.imshow(mask, extent=[-180, 180, -90, 90],alpha=0.5) ax.imshow(H, extent=[-180, 180, -90, 90], interpolation='bilinear', alpha=0.5) decorate_axis(ax, 'Overlayed') plt.tight_layout() else: f = plt.figure() ax = plt.subplot(111) ax.imshow(H, extent=[-180, 180, -90, 90], interpolation='bilinear', alpha=1) decorate_axis(ax, 'Distribution of ' + dataset_name + ' for bone: ' + bone_name[angle_idx+1]) plt.tight_layout() adjust_figure(left = 0.135, right = 0.95, bottom = 0.05, top = 1, wspace = 0, hspace = 0 ) save_name = dataset_name + bone_name[angle_idx+1] + '_gamma_' + str(gamma) + '.jpg' f.savefig(os.path.join(save_path, save_name)) plt.close(f) return save_name, H_return #=============================================================================# # sampling utilities: sample 3D human skeleton from a pre-computed distribution template = np.load(os.path.join(root, 'template.npy'), allow_pickle=True).reshape(32,-1) template_bones = template[nt_parent_indices, :] - template[nt_child_indices, :] template_bone_lengths = to_spherical(template_bones)[:, 0] nt_parent_indices = [13, 17, 18, 25, 26, 6, 7, 1, 2] nt_child_indices = [15, 18, 19, 26, 27, 7, 8, 2, 3] def get_skeleton(bones, pose, bone_length=template_bone_lengths): """ Update the non-torso limb of a skeleton by specifying bone vectors. """ new_pose = pose.copy() for bone_idx in [0,1,3,5,7,2,4,6,8]: new_pose[nt_child_indices[bone_idx]] = new_pose[nt_parent_indices[bone_idx]] \ - bones[bone_idx]*bone_length[bone_idx] return new_pose def test_get_skeleton(): pose = template nt_parent_indices = [13, 17, 18, 25, 26, 6, 7, 1, 2] nt_child_indices = [15, 18, 19, 26, 27, 7, 8, 2, 3] global_bones = pose[nt_parent_indices, :] - pose[nt_child_indices, :] for bone_idx in range(len(global_bones)): global_bones[bone_idx] = normalize(global_bones[bone_idx]) local_c = to_local(pose) global_c = to_global(pose, local_c)['bg'] new_pose = get_skeleton(global_c, pose) maximum_error = np.max(np.abs(new_pose - pose)) print('maximum error', maximum_error) return maximum_error def grow_from_torso(poses, all_angles, cache=False): """ Update the non-torso limb of a skeleton by specifying limb orientations. """ new_poses = poses.copy() return_value = {} if cache: return_value['cache'] = [] return_value['bl'] = [] for pose_id in range(len(poses)): pose = poses[pose_id].reshape(32,-1) angles = all_angles[:, pose_id, :] # convert to spherical coordinate in radians spherical = np.ones((len(angles), 3)) spherical[:, 1:] = angles/180*np.pi spherical[:, 1] -= np.pi spherical[:, 2] -= np.pi/2 # convert to local cartisian coordinate local_xyz = to_xyz(spherical) # convert to global coordinate return_value_temp = to_global(pose, local_xyz, cache=cache) if cache: return_value['cache'].append(return_value_temp['cache']) return_value['bl'].append(return_value_temp['bl']) global_xyz = return_value_temp['bg'] new_poses[pose_id] = get_skeleton(global_xyz, new_poses[pose_id].reshape(32,-1)).reshape(-1) return_value['np'] = new_poses return return_value def test_grow_from_torso(): poses = template.reshape(1, 96) _, angles = is_valid(poses.reshape(32, -1), return_ang=True) local_coordinate = to_local(poses.reshape(32, -1)) angles = angles[:,1:].reshape(9, 1, 2) # to degrees angles *= 3 return_dic = grow_from_torso(poses, angles, cache=True) new_poses = return_dic['np'] bones_local = return_dic['bl'] print(np.max(np.abs(local_coordinate - bones_local))) maximum_error = np.max(np.abs(new_poses - poses)) print('maximum error1', maximum_error) _, new_angles = is_valid(new_poses, return_ang=True) maximum_error = np.max(np.abs(angles[:,0,:]/3 - new_angles[:,1:])) print('maximum error2', maximum_error) return def sample_lower_limbs(angles, bin_size=3): """ Sample limb orientations for lower limbs. """ # angles of shape [num_bones, sample_size, 2] for sample_id in range(angles.shape[1]): for angle_idx in [2, 4, 6, 8]: record_idx = record_indices[angle_idx] parent = angles[angle_idx - 1, sample_id, :] theta = np.floor(parent[0]/3) phi = np.floor(parent[1]/3) # convert to bins candidate_length = len(con_dis[record_idx][(theta, phi)]) # change some boundary points if candidate_length == 0: keys = list(con_dis[record_idx].keys()) while candidate_length == 0: temp_idx = np.random.choice(len(keys), 1) theta, phi = keys[temp_idx[0]] candidate_length = len(con_dis[record_idx][(theta, phi)]) chosen_idx = np.random.choice(candidate_length, 1) angles[angle_idx, sample_id, :] = con_dis[record_idx][(theta, phi)][chosen_idx] # convert to degrees with some noise angles[[2,4,6,8], :, :] = angles[[2,4,6,8], :, :]*bin_size angles[[2,4,6,8], :, :] += np.random.rand(4, angles.shape[1], 2)*bin_size return angles def sample_upper_limbs(angles, sample_num): """ Sample limb orientation for upper limbs. """ # sample torso limbs from the valid maps uniformly # angles of shape [num_bones, sample_size, 2] for angle_idx in [0, 1, 3, 5, 7]: valid_map = angle_spread[0, record_indices[angle_idx]] valid_map = valid_map.astype(np.float16) valid_map /= valid_map.sum() bone_angles = sample_from_histogram(valid_map, total=sample_num, add_noise=True) angles[angle_idx, :, :] = bone_angles return angles ================================================ FILE: libs/trainer/__init__.py ================================================ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Empty file. """ ================================================ FILE: libs/trainer/trainer.py ================================================ """ Utility functions for cascaded model training and evaluation. """ import libs.dataset.h36m.data_utils as data_utils import libs.model.model as model from libs.utils.utils import compute_similarity_transform import torch.nn.functional as F import torch import numpy as np import logging def train_cascade(train_dataset, eval_dataset, stats, action_eval_list, opt): """ Train a cascade of deep neural networks that lift 2D key-points to 3D pose. """ # initialize an empty cascade cascade = model.get_cascade() stage_record = [] input_size = len(stats['dim_use_2d']) output_size = len(stats['dim_use_3d']) # train each deep learner in the cascade sequentially for stage_id in range(opt.num_stages): # initialize a single deep learner stage_model = model.get_model(stage_id + 1, refine_3d=opt.refine_3d, norm_twoD=opt.norm_twoD, num_blocks=opt.num_blocks, input_size=input_size, output_size=output_size, linear_size=opt.linear_size, dropout=opt.dropout, leaky=opt.leaky) # record the stage number train_dataset.set_stage(stage_id+1) eval_dataset.set_stage(stage_id+1) for dataset in action_eval_list: dataset.set_stage(stage_id+1) # move the deep learner to GPU if opt.cuda: stage_model = stage_model.cuda() # prepare the optimizer and learning rate scheduler optim, sche = model.prepare_optim(stage_model, opt) # train the model record = train(train_dataset, eval_dataset, stage_model, optim, sche, stats, action_eval_list, opt) stage_model = record['model'] # record the training history stage_record.append((record['batch_idx'], record['loss'])) # update current estimates and regression target train_dataset.stage_update(stage_model, stats, opt) eval_dataset.stage_update(stage_model, stats, opt) # update evaluation datasets for each action if opt.evaluate_action: for dataset in action_eval_list: dataset.stage_update(stage_model, stats, opt) # put the trained model into the cascade cascade.append(stage_model.cpu()) # release memory del stage_model return {'cascade':cascade, 'record':stage_record} def evaluate_cascade(cascade, eval_dataset, stats, opt, save=False, save_path=None, action_wise=False, action_eval_list=None, apply_dropout=False ): """ Evaluate a cascaded model given a dataset object. """ loss, distance = None, None for stage_id in range(len(cascade)): print("#"+ "="*60 + "#") logging.info("Model performance after stage {:d}".format(stage_id + 1)) stage_model = cascade[stage_id] if opt.cuda: stage_model = stage_model.cuda() if action_wise: evaluate_action_wise(action_eval_list, stage_model, stats, opt) # update the current estimates and regression targets for dataset in action_eval_list: dataset.stage_update(stage_model, stats, opt) else: # evaluate up to this stage loss, distance = evaluate(eval_dataset, stage_model, stats, opt, save=save, save_path=save_path, procrustes=False, per_joint=True, apply_dropout=apply_dropout ) # update datasets eval_dataset.stage_update(stage_model, stats, opt) # release memory del stage_model return loss, distance def logger_print(epoch, batch_idx, batch_size, total_sample, total_batches, loss ): """ Log training history. """ msg = 'Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f} '.format( epoch, batch_idx * batch_size, total_sample, 100. * batch_idx / total_batches, loss.data.item()) logging.info(msg) return def train(train_dataset, eval_dataset, model, optim, sche, stats, action_eval_list, opt, plot_loss=False): """ Train a single deep learner. """ x_data = [] y_data = [] eval_loss, eval_distance = evaluate(eval_dataset, model, stats, opt) if plot_loss: import matplotlib.pyplot as plt # plot loss curve during training ax = plt.subplot(111) lines = ax.plot(x_data, y_data) plt.xlabel('batch') plt.ylabel('training loss') for epoch in range(1, opt.epochs + 1): model.train() # update the learning rate according to the scheduler sche.step() # data loader train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_threads ) num_batches = len(train_loader) for batch_idx, batch in enumerate(train_loader): data = batch[0] target = batch[1] if opt.cuda: with torch.no_grad(): # move to GPU data, target = data.cuda(), target.cuda() # erase all computed gradient optim.zero_grad() # forward pass to get prediction prediction = model(data) # compute loss loss = F.mse_loss(prediction, target) # smoothed l1 loss function #loss = F.smooth_l1_loss(prediction, target) # compute gradient in the computational graph loss.backward() # update parameters in the model optim.step() # logging if batch_idx % opt.report_every == 0: logger_print(epoch, batch_idx, opt.batch_size, len(train_dataset), len(train_loader), loss) x_data.append(num_batches*(epoch-1) + batch_idx) y_data.append(loss.data.item()) if plot_loss: lines[0].set_xdata(x_data) lines[0].set_ydata(y_data) ax.relim() # update ax.viewLim using the new dataLim ax.autoscale_view() plt.draw() plt.pause(0.05) # optinal evaluation if opt.eval and batch_idx!= 0 and batch_idx % opt.eval_every == 0: eval_loss, eval_distance = evaluate(eval_dataset, model, stats, opt) # update learning rate if needed #sche.step(eval_loss) # reset to training mode model.train() # evaluate after each epoch if opt.eval_action_wise and epoch % 50 == 0: evaluate_action_wise(action_eval_list, model, stats, opt) logging.info('Training finished.') return {'model':model, 'batch_idx':x_data, 'loss':y_data} def evaluate_action_wise(dataset_list, model, stats, opt): """ Evaluate for a list of dataset objects, where each contains inputs for one action. """ record_P1 = {} record_P2 = {} average_P1 = 0 average_P2 = 0 protocols = opt.protocols for dataset in dataset_list: action = dataset.action_name if 'P1' in protocols: eval_loss, eval_distance = evaluate(dataset, model, stats, opt, verbose=False, procrustes = False) record_P1[action] = (eval_loss, eval_distance) average_P1 += eval_distance if 'P2' in protocols: eval_loss, eval_distance = evaluate(dataset, model, stats, opt, verbose=False, procrustes = True) record_P2[action] = (eval_loss, eval_distance) average_P2 += eval_distance average_P1 /= len(dataset_list) average_P2 /= len(dataset_list) # logging for protocol in protocols: logging.info("MPJPE under protocol {:s}".format(protocol)) record = record_P1 if protocol == 'P1' else record_P2 average = average_P1 if protocol == 'P1' else average_P2 for key in record.keys(): logging.info("Action: {:s}, error: {:.2f}".format(key, record[key][1])) logging.info("Average error over actions: {:.2f}".format(average)) return [record_P1, record_P2] def align_skeleton(skeletons_pred, skeletons_gt, num_of_joints): """ Apply per-frame procrustes alignment before computing MPJPE. """ for j in range(len(skeletons_gt)): gt = np.reshape(skeletons_gt[j,:], [-1,3]) out = np.reshape(skeletons_pred[j,:],[-1,3]) _, Z, T, b, c = compute_similarity_transform(gt, out, compute_optimal_scale=True ) out = (b * out.dot(T)) + c skeletons_pred[j,:] = np.reshape(out,[-1, (num_of_joints - 1) * 3]) return skeletons_pred def evaluate(eval_dataset, model, stats, opt, save = False, save_path=None, verbose = True, procrustes = False, per_joint = False, apply_dropout=False ): """ Evaluate a 2D-to-3D lifting model on a given PyTorch dataset. Adapted from ICCV 2017 baseline https://github.com/una-dinosauria/3d-pose-baseline """ num_of_joints = 14 if opt.pred14 else 17 all_dists = [] model.eval() if apply_dropout: def apply_dropout(m): if type(m) == torch.nn.Dropout: m.train() # enable the dropout layers to produce a loss similar to the training # loss (only for debugging purpose) model.apply(apply_dropout) eval_loader = torch.utils.data.DataLoader(eval_dataset, batch_size = opt.batch_size, shuffle = False, num_workers = opt.num_threads ) total_loss = 0 for batch_idx, batch in enumerate(eval_loader): data = batch[0] target = batch[1] if opt.cuda: with torch.no_grad(): data, target = data.cuda(), target.cuda() # forward pass to get prediction prediction = model(data) # mean squared loss loss = F.mse_loss(prediction, target, reduction='sum') total_loss += loss.data.item() # unnormalize the data skeleton_3d_gt = data_utils.unNormalizeData(target.data.cpu().numpy(), stats['mean_3d'], stats['std_3d'], stats['dim_ignore_3d'] ) skeleton_3d_pred = data_utils.unNormalizeData(prediction.data.cpu().numpy(), stats['mean_3d'], stats['std_3d'], stats['dim_ignore_3d'] ) # pick the joints that are used dim_use = stats['dim_use_3d'] skeleton_3d_gt_use = skeleton_3d_gt[:, dim_use] skeleton_3d_pred_use = skeleton_3d_pred[:, dim_use] # error after a regid alignment, corresponding to protocol #2 in the paper if procrustes: skeleton_3d_pred_use = align_skeleton(skeleton_3d_pred_use, skeleton_3d_gt_use, num_of_joints ) # Compute Euclidean distance error per joint sqerr = (skeleton_3d_gt_use - skeleton_3d_pred_use)**2 # Squared error between prediction and expected output dists = np.zeros((sqerr.shape[0], num_of_joints)) # Array with L2 error per joint in mm dist_idx = 0 for k in np.arange(0, num_of_joints*3, 3): # Sum across X,Y, and Z dimenstions to obtain L2 distance dists[:,dist_idx] = np.sqrt(np.sum(sqerr[:, k:k+3], axis=1)) dist_idx = dist_idx + 1 all_dists.append(dists) all_dists = np.vstack(all_dists) if per_joint: # show average error for each joint error_per_joint = all_dists.mean(axis = 0) logging.info('Average error for each joint: ') print(error_per_joint) avg_loss = total_loss/(len(eval_dataset)*16*3) if save: record = {'error':all_dists} np.save(save_path, np.array(record)) avg_distance = all_dists.mean() if verbose: logging.info('Evaluation set: average loss: {:.4f} '.format(avg_loss)) logging.info('Evaluation set: average joint distance: {:.4f} '.format(avg_distance)) return avg_loss, avg_distance ================================================ FILE: libs/utils/__init__.py ================================================ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Empty file. """ ================================================ FILE: libs/utils/utils.py ================================================ """ Utility functions. """ import libs.dataset.h36m.data_utils as data_utils import libs.dataset.h36m.cameras as cameras import libs.dataset.h36m.pth_dataset as dataset import libs.visualization.viz as viz import logging import time import numpy as np import torch import matplotlib.pyplot as plt import os input_size = 32 output_size = 48 # global configurations camera_frame = True # camera coordinate predict_14 = False # predict 14 joints def save_ckpt(opt, record, stats): """ Save training results. """ cascade = record['cascade'] if not opt.save: return False if opt.save_name is None: save_name = time.asctime() save_name += ('stages_' + str(opt.num_stages) + 'blocks_' + str(opt.num_blocks) + opt.extra_str ) save_dir = os.path.join(opt.save_root, save_name) if not os.path.exists(save_dir): os.makedirs(save_dir) torch.save(cascade, os.path.join(save_dir, 'model.th')) np.save(os.path.join(save_dir, 'stats.npy'), stats) print('Model saved at ' + save_dir) return True def load_ckpt(opt): cascade = torch.load(os.path.join(opt.ckpt_dir, 'model.th')) stats = np.load(os.path.join(opt.ckpt_dir, 'stats.npy'), allow_pickle=True).item() if opt.cuda: cascade.cuda() return cascade, stats def list_remove(list_a, list_b): list_c = [] for item in list_a: if item not in list_b: list_c.append(item) return list_c def get_all_data(data_x, data_y, camera_frame): """ Obtain a list of all the batches, randomly permutted Args data_x: dictionary with 2d inputs data_y: dictionary with 3d expected outputs camera_frame: whether the 3d data is in camera coordinates training: True if this is a training batch. False otherwise. Returns encoder_inputs: list of 2d batches decoder_outputs: list of 3d batches """ # Figure out how many frames we have n = 0 for key2d in data_x.keys(): n2d, _ = data_x[ key2d ].shape n = n + n2d encoder_inputs = np.zeros((n, input_size), dtype=float) decoder_outputs = np.zeros((n, output_size), dtype=float) # Put all the data into big arrays idx = 0 for key2d in data_x.keys(): (subj, b, fname) = key2d # keys should be the same if 3d is in camera coordinates key3d = key2d if (camera_frame) else (subj, b, '{0}.h5'.format(fname.split('.')[0])) key3d = (subj, b, fname[:-3]) if fname.endswith('-sh') and camera_frame else key3d n2d, _ = data_x[ key2d ].shape encoder_inputs[idx:idx+n2d, :] = data_x[ key2d ] decoder_outputs[idx:idx+n2d, :] = data_y[ key3d ] idx = idx + n2d return encoder_inputs, decoder_outputs def adjust_figure(left = 0, right = 1, bottom = 0.01, top = 0.95, wspace = 0, hspace = 0.4): plt.subplots_adjust(left, bottom, right, top, wspace, hspace) return def visualize(eval_dataset, model, stats, opt, save=False, save_dir=None): # visualze model prediction batch by batch batch_size = 9 # how many batches to save if save: num_batches = 10 current_batch = 1 model.eval() eval_loader = torch.utils.data.DataLoader(eval_dataset, batch_size, shuffle = True, num_workers = opt.num_threads) for batch_idx, batch in enumerate(eval_loader): if save and current_batch > num_batches: break data = batch[0] target = batch[1] if opt.cuda: with torch.no_grad(): # move to GPU data, target = data.cuda(), target.cuda() # forward pass to get prediction prediction = model(data) # un-normalize the data skeleton_2d = data_utils.unNormalizeData(data.data.cpu().numpy(), stats['mean_2d'], stats['std_2d'], stats['dim_ignore_2d']) skeleton_3d_gt = data_utils.unNormalizeData(target.data.cpu().numpy(), stats['mean_3d'], stats['std_3d'], stats['dim_ignore_3d']) skeleton_3d_pred = data_utils.unNormalizeData(prediction.data.cpu().numpy(), stats['mean_3d'], stats['std_3d'], stats['dim_ignore_3d']) # visualizing if save: plt.ioff() f = plt.figure(figsize=(16, 8)) axes = [] for sample_idx in range(batch_size): ax = plt.subplot(3, 9, 3*sample_idx + 1) viz.show2Dpose(skeleton_2d[sample_idx], ax) plt.gca().invert_yaxis() ax = plt.subplot(3, 9, 3*sample_idx + 2, projection='3d') viz.show3Dpose(skeleton_3d_gt[sample_idx], ax) ax = plt.subplot(3, 9, 3*sample_idx + 3, projection='3d') viz.show3Dpose(skeleton_3d_pred[sample_idx], ax, pred=True) viz.show3Dpose(skeleton_3d_gt[sample_idx], ax, gt=True) axes.append(ax) adjust_figure(left = 0.05, right = 0.95, bottom = 0.05, top = 0.95, wspace = 0.3, hspace = 0.3) if not save: plt.pause(0.5) # rotate the axes and update for angle in range(0, 360, 5): for ax in axes: ax.view_init(30, angle) plt.draw() plt.pause(.001) input('Press enter to view next batch.') else: # save plot f.savefig(save_dir +'/'+ str(current_batch) + '.png') plt.close(f) del axes if save: current_batch += 1 return def temp_visualize(eval_dataset, model, stats, opt): # visualze model prediction batch by batch model.eval() data = np.load('./pics/pts1.npy').astype(np.float32) data = data[:,2:] # normalize the data mean_vec = stats['mean_2d'][stats['dim_use_2d']] std_vec = stats['std_2d'][stats['dim_use_2d']] data = (data-mean_vec)/std_vec data = torch.from_numpy(data.astype(np.float32)) data = data.cuda() # forward pass to get prediction prediction = model(data) # un-normalize the data skeleton_2d = data_utils.unNormalizeData(data.data.cpu().numpy(), stats['mean_2d'], stats['std_2d'], stats['dim_ignore_2d']) skeleton_3d_pred = data_utils.unNormalizeData(prediction.data.cpu().numpy(), stats['mean_3d'], stats['std_3d'], stats['dim_ignore_3d']) # visualizing plt.figure() ax = plt.subplot(1, 2, 1) viz.show2Dpose(skeleton_2d[0], ax) plt.gca().invert_yaxis() ax = plt.subplot(1, 2, 2, projection='3d') viz.show3Dpose(skeleton_3d_pred[0], ax, pred=True) plt.show() # rotate the axes and update # for angle in range(0, 360, 5): # for ax in axes: # ax.view_init(30, angle) # plt.draw() # plt.pause(.001) # input('Press enter to view next batch.') return def visualize_cascade(eval_dataset, cascade, stats, opt, save=False, save_dir=None): num_stages = len(cascade) # visualze model prediction batch by batch batch_size = 5 # how many batches to save if save: num_batches = 10 current_batch = 1 for stage_model in cascade: stage_model.eval() eval_loader = torch.utils.data.DataLoader(eval_dataset, batch_size, shuffle = False, num_workers = opt.num_threads) for batch_idx, batch in enumerate(eval_loader): if save and current_batch > num_batches: break data = batch[0] ## debug # enc_in = np.array([[648., 266], [679, 311], [688, 320], [693, 161], # [620, 244], [526, 156], [642, 160], [590, 310], # [505, 350], [380, 375], [491, 285], # [543, 190], [572, 119], [515, 417], [518, 514], # [512, 638]],dtype=np.float32) enc_in = data enc_in = enc_in.reshape(1, 32) # normalize data_mean_2d = stats['mean_2d'] dim_to_use_2d = stats['dim_use_2d'] data_std_2d = stats['std_2d'] enc_in = (enc_in - data_mean_2d[dim_to_use_2d])/data_std_2d[dim_to_use_2d] data = torch.from_numpy(enc_in.astype(np.float32)) ## End experiment 2019/10/16 target = batch[1] # store predictions for each stage prediction_stages = [] if opt.cuda: with torch.no_grad(): # move to GPU data, target = data.cuda(), target.cuda() # forward pass to get prediction for the first stage prediction = cascade[0](data) prediction_stages.append(prediction) # prediction for later stages for stage_idx in range(1, num_stages): prediction = cascade[stage_idx](data) prediction_stages.append(prediction_stages[stage_idx-1] + prediction) # un-normalize the data skeleton_2d = data_utils.unNormalizeData(data.data.cpu().numpy(), stats['mean_2d'], stats['std_2d'], stats['dim_ignore_2d']) skeleton_3d_gt = data_utils.unNormalizeData(target.data.cpu().numpy(), stats['mean_3d'], stats['std_3d'], stats['dim_ignore_3d']) for stage_idx in range(num_stages): prediction_stages[stage_idx] = data_utils.unNormalizeData(prediction_stages[stage_idx].data.cpu().numpy(), stats['mean_3d'], stats['std_3d'], stats['dim_ignore_3d']) ## save intermediate results # import scipy.io as sio # p3d = prediction_stages[0] # sio.savemat('./teaser_pose3d.mat', {'pred_3d':p3d.reshape(32,3), # 'pred_2d':np.array([[648., 266], [679, 311], [688, 320], [693, 161], # [620, 244], [526, 156], [642, 160], [590, 310], # [505, 350], [447, 348], [380, 375], [491, 285], # [543, 190], [572, 119], [515, 417], [518, 514], # [512, 638]])}) ## End Experiment 2019/10/16 # visualizing if save: plt.ioff() f = plt.figure(figsize=(16, 8)) axes = [] for sample_idx in range(batch_size): for stage_idx in range(num_stages): ax = plt.subplot(batch_size, num_stages+1, 1+(num_stages+1)*sample_idx) viz.show2Dpose(skeleton_2d[sample_idx], ax) plt.gca().invert_yaxis() ax = plt.subplot(batch_size, num_stages+1, 2+stage_idx+(num_stages+1)*sample_idx, projection='3d') viz.show3Dpose(prediction_stages[stage_idx][sample_idx], ax, pred=True) viz.show3Dpose(skeleton_3d_gt[sample_idx], ax, gt=True) axes.append(ax) adjust_figure(left = 0.05, right = 0.95, bottom = 0.05, top = 0.95, wspace = 0.3, hspace = 0.3) if not save: plt.pause(0.5) # rotate the axes and update # for angle in range(0, 360, 5): # for ax in axes: # ax.view_init(30, angle) # plt.draw() # plt.pause(.001) input('Press enter to view next batch.') else: # save plot f.savefig(save_dir +'/'+ str(current_batch) + '.png') plt.close(f) del axes if save: current_batch += 1 return def compute_similarity_transform(X, Y, compute_optimal_scale=False): """ A port of MATLAB's `procrustes` function to Numpy. Adapted from http://stackoverflow.com/a/18927641/1884420 Args X: array NxM of targets, with N number of points and M point dimensionality Y: array NxM of inputs compute_optimal_scale: whether we compute optimal scale or force it to be 1 Returns: d: squared error after transformation Z: transformed Y T: computed rotation b: scaling c: translation """ muX = X.mean(0) muY = Y.mean(0) X0 = X - muX Y0 = Y - muY ssX = (X0**2.).sum() ssY = (Y0**2.).sum() # centred Frobenius norm normX = np.sqrt(ssX) normY = np.sqrt(ssY) # scale to equal (unit) norm X0 = X0 / normX Y0 = Y0 / normY # optimum rotation matrix of Y A = np.dot(X0.T, Y0) U,s,Vt = np.linalg.svd(A,full_matrices=False) V = Vt.T T = np.dot(V, U.T) # Make sure we have a rotation detT = np.linalg.det(T) V[:,-1] *= np.sign( detT ) s[-1] *= np.sign( detT ) T = np.dot(V, U.T) traceTA = s.sum() if compute_optimal_scale: # Compute optimum scaling of Y. b = traceTA * normX / normY d = 1 - traceTA**2 Z = normX*traceTA*np.dot(Y0, T) + muX else: # If no scaling allowed b = 1 d = 1 + ssY/ssX - 2 * traceTA * normY / normX Z = normY*np.dot(Y0, T) + muX c = muX - b*np.dot(muY, T) return d, Z, T, b, c ================================================ FILE: libs/visualization/__init__.py ================================================ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Empty file. """ ================================================ FILE: libs/visualization/viz.py ================================================ """ Functions to visualize human poses """ import matplotlib.pyplot as plt import libs.dataset.h36m.data_utils as data_utils import numpy as np # import h5py import os from mpl_toolkits.mplot3d import Axes3D def show3Dpose(channels, ax, lcolor="#3498db", rcolor="#e74c3c", add_labels=True, gt=False,pred=False): # blue, orange """ Visualize a 3d skeleton Args channels: 96x1 vector. The pose to plot. ax: matplotlib 3d axis to draw on lcolor: color for left part of the body rcolor: color for right part of the body add_labels: whether to add coordinate labels Returns Nothing. Draws on ax. """ assert channels.size == len(data_utils.H36M_NAMES)*3, "channels should have 96 entries, it has %d instead" % channels.size vals = np.reshape( channels, (len(data_utils.H36M_NAMES), -1) ) I = np.array([1,2,3,1,7,8,1, 13,14,15,14,18,19,14,26,27])-1 # start points J = np.array([2,3,4,7,8,9,13,14,15,16,18,19,20,26,27,28])-1 # end points LR = np.array([1,1,1,0,0,0,0, 0, 0, 0, 0, 0, 0, 1, 1, 1], dtype=bool) # Make connection matrix for i in np.arange( len(I) ): x, y, z = [np.array( [vals[I[i], j], vals[J[i], j]] ) for j in range(3)] if gt: ax.plot(x,z, -y, lw=2, c='k') # ax.plot(x,y, z, lw=2, c='k') elif pred: ax.plot(x,z, -y, lw=2, c='r') # ax.plot(x,y, z, lw=2, c='r') else: # ax.plot(x,z, -y, lw=2, c=lcolor if LR[i] else rcolor) ax.plot(x,y, z, lw=2, c=lcolor if LR[i] else rcolor) RADIUS = 750 # space around the subject xroot, yroot, zroot = vals[0,0], vals[0,1], vals[0,2] ax.set_xlim3d([-RADIUS+xroot, RADIUS+xroot]) ax.set_zlim3d([-RADIUS+zroot, RADIUS+zroot]) ax.set_ylim3d([-RADIUS+yroot, RADIUS+yroot]) if add_labels: ax.set_xlabel("x") ax.set_ylabel("y") ax.set_zlabel("z") # Get rid of the ticks and tick labels # ax.set_xticks([]) # ax.set_yticks([]) # ax.set_zticks([]) # # ax.get_xaxis().set_ticklabels([]) # ax.get_yaxis().set_ticklabels([]) # ax.set_zticklabels([]) ax.set_aspect('equal') # Get rid of the panes (actually, make them white) white = (1.0, 1.0, 1.0, 0.0) ax.w_xaxis.set_pane_color(white) ax.w_yaxis.set_pane_color(white) # Keep z pane # Get rid of the lines in 3d ax.w_xaxis.line.set_color(white) ax.w_yaxis.line.set_color(white) ax.w_zaxis.line.set_color(white) def show2Dpose(channels, ax, lcolor="#3498db", rcolor="#e74c3c", add_labels=False): """ Visualize a 2d skeleton Args channels: 64x1 vector. The pose to plot. ax: matplotlib axis to draw on lcolor: color for left part of the body rcolor: color for right part of the body add_labels: whether to add coordinate labels Returns Nothing. Draws on ax. """ assert channels.size == len(data_utils.H36M_NAMES)*2, "channels should have 64 entries, it has %d instead" % channels.size vals = np.reshape( channels, (-1, 2) ) #plt.plot(vals[:,0], vals[:,1], 'ro') I = np.array([1,2,3,1,7,8,1, 13,14,14,18,19,14,26,27])-1 # start points J = np.array([2,3,4,7,8,9,13,14,16,18,19,20,26,27,28])-1 # end points LR = np.array([1,1,1,0,0,0,0, 0, 0, 0, 0, 0, 1, 1, 1], dtype=bool) # Make connection matrix for i in np.arange( len(I) ): x, y = [np.array( [vals[I[i], j], vals[J[i], j]] ) for j in range(2)] ax.plot(x, y, lw=2, c=lcolor if LR[i] else rcolor) # Get rid of the ticks # ax.set_xticks([]) # ax.set_yticks([]) # # # Get rid of tick labels # ax.get_xaxis().set_ticklabels([]) # ax.get_yaxis().set_ticklabels([]) RADIUS = 350 # space around the subject xroot, yroot = vals[0,0], vals[0,1] ax.set_xlim([-RADIUS+xroot, RADIUS+xroot]) ax.set_ylim([-RADIUS+yroot, RADIUS+yroot]) if add_labels: ax.set_xlabel("x") ax.set_ylabel("z") ax.set_aspect('equal') ================================================ FILE: resources/.gitignore ================================================ # Ignore everything in this directory * # Except this file !.gitignore ================================================ FILE: spec-list.txt ================================================ # This file may be used to create an environment using: # $ conda create --name --file # platform: linux-64 @EXPLICIT https://repo.anaconda.com/pkgs/main/linux-64/conda-env-2.6.0-1.tar.bz2 https://repo.anaconda.com/pkgs/free/linux-64/libgfortran-3.0.0-1.tar.bz2 https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2019.3.9-hecc5488_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/cudatoolkit-9.0-h13b8566_0.tar.bz2 https://repo.anaconda.com/pkgs/free/linux-64/freeglut-2.8.1-0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/gmp-6.1.2-h6c8ec71_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/icu-58.2-h9c2bf20_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/intel-openmp-2019.3-199.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/libffi-3.2.1-hd88cf55_4.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/libgcc-ng-8.2.0-hdf63c60_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/libgfortran-ng-7.3.0-hdf63c60_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/libstdcxx-ng-8.2.0-hdf63c60_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/lzo-2.10-h49e0be7_2.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/snappy-1.1.7-hbae5bb6_3.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/yaml-0.1.7-had09818_2.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/bzip2-1.0.6-h14c3975_5.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/cudnn-7.3.1-cuda9.0_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/expat-2.2.6-he6710b0_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/fribidi-1.0.5-h7b6447c_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/gettext-0.19.8.1-hd7bead4_3.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/giflib-5.1.4-h14c3975_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/graphite2-1.3.13-h23475e2_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/jbig-2.1-hdba287a_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/jpeg-9b-h024ee3a_2.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/libgcc-7.2.0-h69d50b8_2.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/libglu-9.0.0-hf484d3e_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/libiconv-1.15-h63c8f33_5.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/liblief-0.9.0-h7725739_2.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/libopenblas-0.3.3-h5a2b251_3.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/libopus-1.3-h7b6447c_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/libsodium-1.0.16-h1bed415_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/libtool-2.4.6-h7b6447c_5.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/libuuid-1.0.3-h1bed415_2.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/libvpx-1.7.0-h439df22_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/libxcb-1.13-h1bed415_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/lz4-c-1.8.1.2-h14c3975_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/mkl-2018.0.3-1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/mpfr-3.1.5-h11a74b3_2.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/nccl-1.3.5-cuda9.0_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/ncurses-6.1-he6710b0_1.tar.bz2 https://conda.anaconda.org/conda-forge/linux-64/nettle-3.3-0.tar.bz2 https://conda.anaconda.org/conda-forge/linux-64/openblas-0.3.5-h9ac9557_1001.tar.bz2 https://conda.anaconda.org/conda-forge/linux-64/openssl-1.1.1b-h14c3975_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/patchelf-0.9-he6710b0_3.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pcre-8.43-he6710b0_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pixman-0.38.0-h7b6447c_0.tar.bz2 https://conda.anaconda.org/conda-forge/linux-64/x264-1!152.20180806-h14c3975_0.tar.bz2 https://conda.anaconda.org/conda-forge/linux-64/xorg-kbproto-1.0.7-h14c3975_1002.tar.bz2 https://conda.anaconda.org/conda-forge/linux-64/xorg-libice-1.0.9-h516909a_1004.tar.bz2 https://conda.anaconda.org/conda-forge/linux-64/xorg-renderproto-0.11.1-h14c3975_1002.tar.bz2 https://conda.anaconda.org/conda-forge/linux-64/xorg-xextproto-7.3.0-h14c3975_1002.tar.bz2 https://conda.anaconda.org/conda-forge/linux-64/xorg-xproto-7.0.31-h14c3975_1007.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/xz-5.2.4-h14c3975_4.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/zlib-1.2.11-h7b6447c_3.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/blosc-1.15.0-hd408876_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/glib-2.56.2-hd408876_0.tar.bz2 https://conda.anaconda.org/conda-forge/linux-64/gnutls-3.5.19-h2a4e5f8_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/hdf5-1.10.2-hba1933b_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/jasper-2.0.14-h07fcdf6_0.tar.bz2 https://conda.anaconda.org/conda-forge/linux-64/libblas-3.8.0-4_openblas.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/libedit-3.1.20181209-hc058e9b_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/libpng-1.6.36-hbc83047_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/libprotobuf-3.5.2-h6f1eeef_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/libssh2-1.8.0-h1ba5d50_4.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/libxml2-2.9.9-he19cac6_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/mpc-1.0.3-hec55b23_5.tar.bz2 https://conda.anaconda.org/conda-forge/linux-64/openh264-1.7.0-0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pandoc-2.2.3.2-0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/readline-7.0-h7b6447c_5.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/tk-8.6.8-hbc83047_0.tar.bz2 https://conda.anaconda.org/conda-forge/linux-64/xorg-libsm-1.2.2-h470a237_5.tar.bz2 https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.6.7-h14c3975_1000.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/zeromq-4.2.5-hf484d3e_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/zstd-1.3.7-h0b5b093_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/dbus-1.13.6-h746ee38_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/freetype-2.9.1-h8a8886c_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/gstreamer-1.14.0-hb453b48_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/krb5-1.16.1-h173b8e3_7.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/libarchive-3.3.3-h5d8350f_5.tar.bz2 https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.8.0-4_openblas.tar.bz2 https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.8.0-4_openblas.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/libtiff-4.0.10-h2733197_2.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/libxslt-1.1.33-h7d1a2b0_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/sqlite-3.27.2-h7b6447c_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/unixodbc-2.3.7-h14c3975_0.tar.bz2 https://conda.anaconda.org/conda-forge/linux-64/xorg-libxext-1.3.4-h516909a_0.tar.bz2 https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrender-0.9.10-h516909a_1002.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/ffmpeg-4.0-hcdf2ecd_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/fontconfig-2.13.0-h9420a91_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/gst-plugins-base-1.14.0-hbbd80ab_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/libcurl-7.64.0-h20c2e04_2.tar.bz2 https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.8.0-4_openblas.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/libwebp-1.0.0-h222930b_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/python-3.6.8-h0371630_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/alabaster-0.7.12-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/appdirs-1.4.3-py36h28b3542_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/asn1crypto-0.24.0-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/atomicwrites-1.3.0-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/attrs-19.1.0-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/backcall-0.1.0-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/backports-1.0-py36_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/bitarray-0.8.3-py36h14c3975_0.tar.bz2 https://conda.anaconda.org/conda-forge/linux-64/blas-2.4-openblas.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/boto-2.49.0-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/cairo-1.14.12-h8948797_3.tar.bz2 https://conda.anaconda.org/conda-forge/linux-64/certifi-2019.3.9-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/chardet-3.0.4-py36_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/click-7.0-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/cloudpickle-0.7.0-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/colorama-0.4.1-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/constantly-15.1.0-py36h28b3542_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/contextlib2-0.5.5-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/cryptography-vectors-2.6.1-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/curl-7.64.0-hbc83047_2.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/dask-core-1.1.4-py36_1.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/decorator-4.4.0-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/defusedxml-0.5.0-py36_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/docutils-0.14-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/entrypoints-0.3-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/et_xmlfile-1.0.1-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/fastcache-1.0.2-py36h14c3975_2.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/filelock-3.0.10-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/future-0.17.1-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/glob2-0.6-py36_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/gmpy2-2.0.8-py36hc8893dd_2.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/greenlet-0.4.15-py36h7b6447c_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/heapdict-1.0.0-py36_2.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/idna-2.8-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/imagesize-1.1.0-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/incremental-17.5.0-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/ipython_genutils-0.2.0-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/itsdangerous-1.1.0-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/jdcal-1.4-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/jeepney-0.4-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/kiwisolver-1.0.1-py36hf484d3e_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/lazy-object-proxy-1.3.1-py36h14c3975_2.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/llvmlite-0.28.0-py36hd408876_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/locket-0.2.0-py36_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/lxml-4.3.2-py36hefd8a0e_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/markupsafe-1.1.1-py36h7b6447c_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/mccabe-0.6.1-py36_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/mistune-0.8.4-py36h7b6447c_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/mkl-service-1.1.2-py36h651fb7a_4.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/more-itertools-6.0.0-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/mpmath-1.1.0-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/msgpack-python-0.6.1-py36hfd86e86_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/ninja-1.9.0-py36hfd86e86_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/olefile-0.46-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pandocfilters-1.4.2-py36_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/parso-0.3.4-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pep8-1.7.1-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pickleshare-0.7.5-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pkginfo-1.5.0.1-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pluggy-0.9.0-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/ply-3.11-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/prometheus_client-0.6.0-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/psutil-5.6.1-py36h7b6447c_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/ptyprocess-0.6.0-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/py-1.8.0-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/py-lief-0.9.0-py36h7725739_2.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/pyasn1-0.4.5-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pycodestyle-2.5.0-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pycosat-0.6.3-py36h14c3975_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pycparser-2.19-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pycrypto-2.6.1-py36h14c3975_9.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pycurl-7.43.0.2-py36h1ba5d50_0.tar.bz2 https://conda.anaconda.org/conda-forge/noarch/pydicom-1.2.2-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pyflakes-2.1.1-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pyodbc-4.0.26-py36he6710b0_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pyparsing-2.3.1-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pysocks-1.6.8-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/python-libarchive-c-2.8-py36_6.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pytz-2018.9-py36_0.tar.bz2 https://conda.anaconda.org/conda-forge/linux-64/pyyaml-5.1-py36h14c3975_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pyzmq-17.1.2-py36h14c3975_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/qt-5.9.7-h5867ecd_1.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/qtpy-1.7.0-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/rope-0.12.0-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/ruamel_yaml-0.15.46-py36h14c3975_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/send2trash-1.5.0-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/simplegeneric-0.8.1-py36_2.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/sip-4.19.8-py36hf484d3e_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/six-1.12.0-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/snowballstemmer-1.2.1-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/sortedcontainers-2.1.0-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/soupsieve-1.8-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/sphinxcontrib-1.0-py36_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/sqlalchemy-1.3.1-py36h7b6447c_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/tblib-1.3.2-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/testpath-0.4.2-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/toolz-0.9.0-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/tornado-6.0.2-py36h7b6447c_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/tqdm-4.31.1-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/typed-ast-1.3.1-py36h7b6447c_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/typing-3.6.4-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/unicodecsv-0.14.1-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/wcwidth-0.1.7-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/webencodings-0.5.1-py36_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/werkzeug-0.14.1-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/wrapt-1.11.1-py36h7b6447c_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/wurlitzer-1.0.2-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/xlrd-1.2.0-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/xlsxwriter-1.1.5-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/xlwt-1.3.0-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/zipp-0.3.3-py36_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/zope-1.0-py36_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/automat-0.7.0-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/babel-2.6.0-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/backports.os-0.1.1-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/backports.shutil_get_terminal_size-1.0.0-py36_2.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/beautifulsoup4-4.7.1-py36_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/cffi-1.12.2-py36h2e261b9_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/cycler-0.10.0-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/cytoolz-0.9.0.1-py36h14c3975_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/harfbuzz-1.8.8-hffaf4a1_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/html5lib-1.0.1-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/hyperlink-18.0.0-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/importlib_metadata-0.8-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/jedi-0.13.3-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/multipledispatch-0.6.0-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/nltk-3.4-py36_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/nomkl-3.0-0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/numpy-base-1.16.2-py36h2f8d375_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/openpyxl-2.6.1-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/packaging-19.0-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/partd-0.3.10-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pathlib2-2.3.3-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pexpect-4.6.0-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pillow-5.4.1-py36h34e0f95_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/protobuf-3.5.2-py36hf484d3e_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pyasn1-modules-0.2.4-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pyhamcrest-1.9.0-py36_2.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pyqt-5.9.2-py36h05f1152_2.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pyrsistent-0.14.11-py36h7b6447c_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/python-dateutil-2.8.0-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/qtawesome-0.5.7-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/setuptools-40.8.0-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/singledispatch-3.4.0.3-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/snakeviz-1.0.0-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/sortedcollections-1.1.2-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/sphinxcontrib-websupport-1.1.0-py36_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/sympy-1.3-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/terminado-0.8.1-py36_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/traitlets-4.3.2-py36_0.tar.bz2 https://conda.anaconda.org/conda-forge/noarch/yacs-0.1.6-py_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/zict-0.1.4-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/zope.interface-4.6.0-py36h7b6447c_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/astroid-2.2.5-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/bleach-3.1.0-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/clyent-1.2.2-py36_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/cryptography-2.6.1-py36h1ba5d50_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/cython-0.29.6-py36he6710b0_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/distributed-1.26.0-py36_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/get_terminal_size-1.0.0-haa9412d_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/gevent-1.4.0-py36h7b6447c_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/isort-4.3.16-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/jinja2-2.10-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/jsonschema-3.0.1-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/jupyter_core-4.4.0-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/libopencv-3.4.2-hb342d67_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/networkx-2.2-py36_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/nose-1.3.7-py36_2.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/numpy-1.16.2-py36h99e49ec_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/openblas-devel-0.3.3-3.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pango-1.42.4-h049681c_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/path.py-11.5.0-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pygments-2.3.1-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pytest-4.3.1-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/wheel-0.33.1-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/conda-verify-3.1.1-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/flask-1.0.2-py36_1.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/graphviz-2.40.1-h21bd128_2.tar.bz2 https://conda.anaconda.org/anaconda/linux-64/h5py-2.8.0-py36h989c5e5_3.tar.bz2 https://conda.anaconda.org/conda-forge/linux-64/imageio-2.5.0-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/jupyter_client-5.2.4-py36_0.tar.bz2 https://conda.anaconda.org/anaconda/linux-64/matplotlib-3.0.3-py36h5429711_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/nbformat-4.4.0-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pip-19.0.3-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/prompt_toolkit-2.0.9-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pylint-2.3.1-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pyopenssl-19.0.0-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pytest-openfiles-0.3.2-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/pytest-remotedata-0.3.1-py36_0.tar.bz2 https://conda.anaconda.org/pytorch/linux-64/pytorch-1.0.1-py3.6_cuda9.0.176_cudnn7.4.2_2.tar.bz2 https://conda.anaconda.org/anaconda/linux-64/pywavelets-1.0.2-py36hdd07704_0.tar.bz2 https://conda.anaconda.org/anaconda/linux-64/scipy-1.2.1-py36he2b7bc3_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/secretstorage-3.1.1-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/service_identity-18.1.0-py36h28b3542_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/flask-cors-3.0.7-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/ipython-7.4.0-py36h39e3cac_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/keyring-18.0.0-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/noarch/nbconvert-5.4.1-py_2.tar.bz2 https://conda.anaconda.org/anaconda/linux-64/scikit-image-0.14.2-py36he6710b0_0.tar.bz2 https://conda.anaconda.org/anaconda/linux-64/scikit-learn-0.20.3-py36h22eb022_0.tar.bz2 https://conda.anaconda.org/pytorch/noarch/torchvision-0.2.2-py_3.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/twisted-18.9.0-py36h7b6447c_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/urllib3-1.24.1-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/ipykernel-5.1.0-py36h39e3cac_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/requests-2.21.0-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/anaconda-client-1.7.2-py36_0.tar.bz2 https://conda.anaconda.org/conda-forge/linux-64/conda-4.6.14-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/jupyter_console-6.0.0-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/notebook-5.7.8-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/qtconsole-4.4.3-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/sphinx-1.8.5-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/spyder-kernels-0.4.2-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/anaconda-navigator-1.9.7-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/anaconda-project-0.8.2-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/conda-build-3.17.8-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/numpydoc-0.8.0-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/widgetsnbextension-3.4.2-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/ipywidgets-7.4.2-py36_0.tar.bz2 https://repo.anaconda.com/pkgs/main/linux-64/spyder-3.3.3-py36_0.tar.bz2 ================================================ FILE: tools/2Dto3Dnet.py ================================================ """ 3D pose estimation based on 2D key-point coordinates as inputs. Author: Shichao Li Email: nicholas.li@connect.ust.hk """ import logging import os import sys sys.path.append("../") import torch import numpy as np import libs.parser.parse as parse import libs.utils.utils as utils import libs.dataset.h36m.data_utils as data_utils import libs.trainer.trainer as trainer def main(): # logging configuration logging.basicConfig(level=logging.INFO, format="[%(asctime)s]: %(message)s" ) # parse command line input opt = parse.parse_arg() # Set GPU opt.cuda = opt.gpuid >= 0 if opt.cuda: torch.cuda.set_device(opt.gpuid) else: logging.info("GPU is disabled.") # dataset preparation train_dataset, eval_dataset, stats, action_eval_list = \ data_utils.prepare_dataset(opt) if opt.train: # train a cascaded 2D-to-3D pose estimation model record = trainer.train_cascade(train_dataset, eval_dataset, stats, action_eval_list, opt ) utils.save_ckpt(opt, record, stats) if opt.visualize: # visualize the inference results of a pre-trained model cascade = torch.load(opt.ckpt_path) if opt.cuda: cascade.cuda() utils.visualize_cascade(eval_dataset, cascade, stats, opt) if opt.evaluate: # evalaute a pre-trained cascade cascade, stats = utils.load_ckpt(opt) trainer.evaluate_cascade(cascade, eval_dataset, stats, opt, action_wise=opt.eval_action_wise, action_eval_list=action_eval_list ) if __name__ == "__main__": main() ================================================ FILE: tools/annotate_2D.py ================================================ """ Annotate 2D key-points interactively. Press Q to exit the tool. Press C to remove the annotation. Press N to go to the next image. Press Z to save the annotation. Mouse click to annotate 2D key-points. """ import imageio import matplotlib.pyplot as plt import numpy as np import argparse import os import time from glob import glob ''' ANNOTATION CONTROLS ''' names = ['ra', # 0 right ankle 'rk', # 1 right knee2 'rh', # 2 right hip 'lh', # 3 left hip 'lk', # 4 left knee 'la', # 5 left ankle 'rw', # 6 right wrist 're', # 7 right elbow 'rs', # 8 right shoulder 'ls', # 9 left shoulder 'le', # 10 left elbow 'lw', # 11 left wrist 'ne', # 12 neck 'ht', # 13 head top 'sp', # 14 spine 'th', # 15 thorax 'ns'] # 16 nose MAX_CLICK_LENGTH = 0.3 # in seconds; anything longer is a pan/zoom motion joints = np.array([]).reshape(0, 2) fig = None ax = None cid = None plots = None img_path_list = None img_idx = None annotation = None annotation_path = None # whether the image has veen updated or not updated = False n = len(names) time_onclick = None def initialization(opt): global img_path_list, img_idx, annotation,annotation_path, updated img_path_list = sorted(glob(os.path.join(opt.dataset_dir, '*.jpg'))) img_path_list += sorted(glob(os.path.join(opt.dataset_dir, '*.png'))) assert len(img_path_list) != 0, "Can not find image files." img_idx = -1 annotation_path = os.path.join(opt.dataset_dir, 'annotation.npy') if os.path.exists(annotation_path): annotation = np.load(annotation_path).item() else: annotation = {} return def plot_image(img_path): global plots, updated, joints ax.clear() img = imageio.imread(img_path) joints = np.array([]).reshape(0, 2) ax.imshow(img) plots = ax.plot([], [], 'ro') fig.canvas.draw() updated = True return def onclick(event): global time_onclick time_onclick = time.time() def onrelease(event): global joints, fig, plots, updated, time_onclick if event.button == 1 and ((time.time() - time_onclick) < MAX_CLICK_LENGTH): if len(joints) < n and updated: ind = len(joints) joint = np.array([event.xdata, event.ydata]) joints = np.vstack((joints, joint)) plots[0].remove() plots = plt.plot(joints[:, 0], joints[:, 1], 'ro') fig.canvas.draw() print(names[ind] + ": " + str(joint)) if len(joints) == n: # record the annotation img_name = img_path_list[img_idx].split(os.sep)[-1] annotation[img_name] = {'p2d':joints} joints = np.array([]).reshape(0, 2) updated = False print('Please go on to the next image.') def save_results(): np.save(annotation_path, annotation) print('A Python dictionary has been saved at ' + annotation_path) def onkey(event): global joints, fig, cid, plots, img_idx if event.key == 'c': # remove the annotation on the image if len(joints) > 0: joints = np.array([]).reshape(0, 2) plots[0].remove() plots = plt.plot([], [], 'ro') fig.canvas.draw() if event.key == 'n': # go to next image if img_idx <= len(img_path_list) - 1: # look for the next unannotated image img_idx += 1 while img_path_list[img_idx].split(os.sep)[-1] in annotation: img_idx += 1 img_idx = len(img_path_list) - 1 if img_idx == len(img_path_list) else img_idx plot_image(img_path_list[img_idx]) else: print('Already the last image.') save_results() if event.key == 'z': # save the annotation save_results() def main(opt): global joints, fig, ax, cid, plots, img_idx, img_path_list, annotation # show one unannotated image for idx in range(len(img_path_list)): img_name = img_path_list[idx].split(os.sep)[-1] if img_name not in annotation: # start with this unannotated image img_idx = idx break if img_idx == -1: print('No unannotated image found.') return fig = plt.gcf() ax = plt.gca() plot_image(img_path_list[img_idx]) fig.canvas.mpl_connect('button_press_event', onclick) fig.canvas.mpl_connect('button_release_event', onrelease) cid = fig.canvas.mpl_connect('key_press_event', onkey) plt.show() return if __name__ == '__main__': parser = argparse.ArgumentParser(description='2D Annotation') parser.add_argument('-d', '--dataset_dir', default=None, type=str) opt = parser.parse_args() initialization(opt) main(opt) ================================================ FILE: tools/annotate_3D.py ================================================ """ Interactive annotation tool for 3D human pose estimation. Given an image and a coarse 3D skeleton estimation, the user can interactively modify the 3D parameters and save them as the ground truth. """ import matplotlib.pyplot as plt import numpy as np import argparse import imageio import sys import os from mpl_toolkits.mplot3d import Axes3D from scipy.spatial.transform import Rotation as R from cv2 import projectPoints sys.path.append("../") from libs.skeleton.anglelimits import get_basis1, normalize, gram_schmidt_columns from libs.skeleton.anglelimits import nt_parent_indices, nt_child_indices, di_indices from libs.skeleton.anglelimits import get_normal, di, a, to_spherical, to_xyz, bone_name ''' GLOBAL VARIABLES ''' angle_idx = 0 # Bone angle to adjust direction = 0 # Direction to rotate, (0 - x, 1 - y, 2 - z) for upper arm only step = 3 # 3 degrees for step size step_radian = step * np.pi / 180 local_system_map = {1:0, 3:0, 5:1, 7:1, 2:2, 4:3, 6:4, 8:5} line_index_map = {1:11, 3:14, 5:4, 7:1, 2:12, 4:15, 6:5, 8:2} parent_indices = np.array([1,2,3,1,7,8,1, 13,14,15,14,18,19,14,26,27])-1 child_indices = np.array([2,3,4,7,8,9,13,14,15,16,18,19,20,26,27,28])-1 direction_name = ['x', 'y', 'z'] # translation vector of the camera t = None # focal length of the camera f = None # intrinsic matrix for camera projection intrinsic_mat = None # Objects for ploting fig = None plot_ax = None img_ax = None skeleton = None lines = None points = None RADIUS = 1 # Space around the subject # hierarchical representation local_systems = None need_to_update_lc = False bones_global = None bones_local = None angles = None # file path annotation_path = None annotation = None img_name = None # some joint correspondence index_list = [13, 14, 129, 145] H36M_IDS = [0, 2, 5, 8, 1, 4, 7, 3, 12, 15, 24, 16, 18, 20, 17, 19, 21] USE_DIMS = [0, 1, 2, 3, 6, 7, 8, 12, 13, 14, 15, 17, 18, 19, 25, 26, 27] # keyboard inputs bone_idx_keys = ['1', '2', '3', '4', '5', '6', '7', '8', '9'] global_rot_key ='0' inc_step_key = 'd' dec_step_key = 'f' ang_inc_key = 'up' ang_dec_key = 'down' ang_cw_key = 'right' ang_ccw_key = 'left' save_key = 'm' def press(event): """ Call-back function when user press any key. """ global angle_idx, direction, need_to_update_lc global bones_global, bones_local, skeleton, angles, local_systems if event.key == 'p': plot_ax.plot([np.random.rand()], [np.random.rand()], [np.random.rand()], 'ro') fig.canvas.draw() if event.key in bone_idx_keys: angle_idx = int(event.key) - 1 if event.key == global_rot_key: angle_idx = None if event.key == inc_step_key: direction = (direction + 1) % 3 if event.key == dec_step_key: direction = (direction - 1) % 3 if event.key == ang_inc_key or event.key == ang_dec_key: update_skeleton(angle_idx, event.key) if event.key == ang_cw_key or event.key == ang_ccw_key: if angle_idx in [2, 4, 6, 8]: update_skeleton(angle_idx, event.key) if event.key == save_key: save_skeleton() if angle_idx is not None: notes = 'current limb: ' + bone_name[angle_idx + 1] # update local coordinate systems if needed if need_to_update_lc: # compute the local coordinate system bones_global, bones_local, local_systems = to_local(skeleton) # convert the local coordinates into spherical coordinates angles = to_spherical(bones_local) angles[:,1:] *= 180/np.pi # need to update local coordinate system once after global rotation need_to_update_lc = False else: notes = 'global rotation: ' if angle_idx in [None, 1, 3, 5, 7]: notes += ' direction: ' + direction_name[direction] if event.key not in ['up', 'down', 'right', 'left']: print(notes) plot_ax.set_xlabel(notes) fig.canvas.draw_idle() def show3Dpose(channels, ax, lcolor="#3498db", rcolor="#e74c3c", add_labels=True, gt=False, pred=False, inv_z=False ): vals = np.reshape( channels, (32, -1) ) I = parent_indices J = child_indices LR = np.array([1,1,1,0,0,0,0, 0, 0, 0, 0, 0, 0, 1, 1, 1], dtype=bool) lines = [] # Make connection matrix for i in np.arange( len(I) ): x, y, z = [np.array( [vals[I[i], j], vals[J[i], j]] ) for j in range(3)] line = ax.plot(x,y, z, lw=2, c=lcolor if LR[i] else rcolor) lines.append(line) xroot, yroot, zroot = vals[0,0], vals[0,1], vals[0,2] ax.set_xlim3d([-RADIUS+xroot, RADIUS+xroot]) ax.set_zlim3d([-RADIUS+zroot, RADIUS+zroot]) ax.set_ylim3d([-RADIUS+yroot, RADIUS+yroot]) if add_labels: ax.set_xlabel("x") ax.set_ylabel("y") ax.set_zlabel("z") ax.set_aspect('auto') # Get rid of the panes (actually, make them white) white = (1.0, 1.0, 1.0, 0.0) ax.w_xaxis.set_pane_color(white) ax.w_yaxis.set_pane_color(white) # Get rid of the lines in 3d ax.w_xaxis.line.set_color(white) ax.w_yaxis.line.set_color(white) ax.w_zaxis.line.set_color(white) if inv_z: ax.invert_zaxis() return lines def to_local(skeleton): """ Convert bone vector in skeleton format to local coordinate system """ global local_systems v1, v2, v3 = get_basis1(skeleton) # Compute vector of left hip to right hip left_hip = skeleton[6] right_hip = skeleton[1] v4 = normalize(right_hip - left_hip) # v5 is the cross product of v4 and v2 (front-facing vector for lower-body) v5 = normalize(np.cross(v4, v2)) # Compute orthogonal coordinate systems using GramSchmidt # Make sure the directions roughly align # For upper body, we use v1, v2 and v3 # For lower body, we use v4, v2 and v5 system1 = gram_schmidt_columns(np.hstack([v1.reshape(3,1), v2.reshape(3,1), v3.reshape(3,1)])) system2 = gram_schmidt_columns(np.hstack([v4.reshape(3,1), v2.reshape(3,1), v5.reshape(3,1)])) local_systems = [system1, system2] bones = skeleton[nt_parent_indices, :] - skeleton[nt_child_indices, :] # convert bone vector to local coordinate system bones_local = np.zeros(bones.shape, dtype=bones.dtype) for bone_idx in range(len(bones)): # only compute bone vectors for non-torsos # the order of the non-torso bone vector is: # bone vector1: thorax to head top # bone vector2: left shoulder to left elbow # bone vector3: left elbow to left wrist # bone vector4: right shoulder to right elbow # bone vector5: right elbow to right wrist # bone vector6: left hip to left knee # bone vector7: left knee to left ankle # bone vector8: right hip to right knee # bone vector9: right knee to right ankle bone = bones[bone_idx] if bone_idx in [0, 1, 3, 5, 7]: # Bones directly connected to torso # Upper body - 0, 1, 3 # Lower body - 5, 7 if bone_idx in [0, 1, 3]: bones_local[bone_idx] = system1.T @ bone else: bones_local[bone_idx] = system2.T @ bone else: if bone_idx in [2, 4]: parent_R = system1 else: parent_R = system2 # parent bone index is smaller than 1 vector_u = normalize(bones[bone_idx - 1]) di_index = di_indices[bone_idx] vector_v, flag = get_normal(parent_R@di[:, di_index], parent_R@a, vector_u) vector_w = np.cross(vector_u, vector_v) local_system = gram_schmidt_columns(np.hstack([vector_u.reshape(3,1), vector_v.reshape(3,1), vector_w.reshape(3,1)])) local_systems.append(local_system) bones_local[bone_idx] = local_system.T @ bone return bones, bones_local, local_systems def update_line(line_idx, parent_idx, child_idx): """ Update 3D line segments. """ global lines # update 3D lines parent, child = skeleton[parent_idx], skeleton[child_idx] x = np.array([parent[0], child[0]]) y = np.array([parent[1], child[1]]) z = np.array([parent[2], child[2]]) lines[line_idx][0].set_data(x, y) lines[line_idx][0].set_3d_properties(z) fig.canvas.draw_idle() def update_global(angle_idx): """ Update bone vectors. """ global bones_global, bones_local, local_systems, skeleton bones_global[angle_idx] = local_systems[local_system_map[angle_idx]] @ bones_local[angle_idx] skeleton[nt_child_indices[angle_idx]] = skeleton[nt_parent_indices[angle_idx]] \ - bones_global[angle_idx] line_idx = line_index_map[angle_idx] parent_idx = nt_parent_indices[angle_idx] child_idx = nt_child_indices[angle_idx] update_line(line_idx, parent_idx, child_idx) def rotate_global(rot): """ Change the global orientation of the 3D skeleton. """ global skeleton hip = skeleton[0].reshape(1,3) temp_skeleton = skeleton - hip skeleton = (rot.as_matrix() @ temp_skeleton.T).T + hip for line_idx in range(len(parent_indices)): update_line(line_idx, parent_indices[line_idx], child_indices[line_idx] ) def update_skeleton(angle_idx, key_name): """ Update the 3D skeleton with a specified keyboard input. """ global need_to_update_lc, local_systems # Rotate the lower-limb if angle_idx in [2, 4, 6, 8]: if key_name == 'up': angles[angle_idx, 1] += step elif key_name == 'down': angles[angle_idx, 1] -= step elif key_name == 'left': angles[angle_idx, 2] += step elif key_name == 'right': angles[angle_idx, 2] -= step temp = angles[angle_idx].copy() temp[1:] *= np.pi / 180 bones_local[angle_idx] = to_xyz(temp.reshape(1,3)) update_global(angle_idx) # Rotate the upper-limb with respect to the torso coordinate system if angle_idx in [1, 3, 5, 7]: # Local rotation vector rot_vec = np.array([0., 0., 0.]) rot_vec[direction] = 1. if key_name == 'up' else -1. rot = R.from_rotvec(rot_vec*step_radian) bones_local[angle_idx] = rot.apply(bones_local[angle_idx]) # Global rotation vector rot_vec2 = local_systems[local_system_map[angle_idx]][:, direction].copy() rot_vec2 *= 1. if key_name == 'up' else -1. rot2 = R.from_rotvec(rot_vec2*step_radian) # Local rotation vector for child/lower limb temp = local_systems[local_system_map[angle_idx + 1]] local_systems[local_system_map[angle_idx + 1]] = rot2.as_matrix() @ temp # update parent and child bone update_global(angle_idx) update_global(angle_idx + 1) # Global rotation if angle_idx is None and key_name in ['up', 'down']: need_to_update_lc = True rot_vec = np.array([0., 0., 0.]) rot_vec[direction] = 1. if key_name == 'up' else -1. rot = R.from_rotvec(rot_vec*step_radian) rotate_global(rot) # Update the 2D Projection update_projection() def update_projection(): """ Update the 2D projection of the 3D key-points. """ global points points.pop(0).remove() proj2d = projectPoints(skeleton, np.zeros((3)), t, intrinsic_mat, np.zeros((5)) ) proj2d = proj2d[0].reshape(-1,2) points = img_ax.plot(proj2d[:,0], proj2d[:,1], 'ro') fig.canvas.draw_idle() def save_skeleton(): """ Save the annotation file. """ global annotation annotation[img_name]['p3d'] = skeleton np.save(annotation_path, annotation) print('Annotated 3D parameters saved at ' + annotation_path) def visualize(pose, skeleton, img): """ Initialize the 3D and 2D plots. """ global lines, points, fig, plot_ax, img_ax, intrinsic_mat fig = plt.figure() fig.canvas.mpl_disconnect(fig.canvas.manager.key_press_handler_id) # 3D pose plot plot_ax = plt.subplot(121, projection='3d') lines = show3Dpose(pose, plot_ax) fig.canvas.mpl_connect('key_press_event', press) plot_ax.set_title('1-9: limb selection, 0: global rotation, arrow keys: rotation') # Image plot img_ax = plt.subplot(122) img_ax.imshow(img) intrinsic_mat = np.array([[f[0], 0.00e+00, float(img.shape[1])/2], [0.00e+00, f[1], float(img.shape[0])/2], [0.00e+00, 0.00e+00, 1.00e+00]]) proj2d = projectPoints(skeleton, np.zeros((3)), t, intrinsic_mat, np.zeros((5)) ) proj2d = proj2d[0].reshape(-1,2) points = img_ax.plot(proj2d[:,0], proj2d[:,1], 'ro') # Show the plot plt.show() def create_python3_file(opt): """ The fitted parameters are stored using python 2. Create a Python 3 file from it when this script is executed for the first time. """ fitted_path = os.path.join(opt.dataset_dir, "fitted.npy") annotation_path = os.path.join(opt.dataset_dir, "annot_3d.npy") if not os.path.exists(annotation_path): # The fitting parameters are obtianed in Python 2 environment, # thus the encoding argument is used here fitted = np.load(fitted_path, encoding='latin1', allow_pickle=True).item() np.save(annotation_path, fitted) return def main(opt): global t, f, angles, bones_global, bones_local, need_to_update_lc, \ local_system, skeleton, annotation_path, img_name, annotation create_python3_file(opt) annotation_path = os.path.join(opt.dataset_dir, "annot_3d.npy") annotation = np.load(annotation_path, allow_pickle=True).item() for img_name in annotation.keys(): # e.g., img_name = '260.jpg' img_name = '260.jpg' # select one unannotated image and start the interactive annotation if 'p3d' in annotation[img_name]: continue fitting_params = annotation[img_name]['fitting_params'] img_path = os.path.join(opt.dataset_dir, img_name) img = imageio.imread(img_path) # Convert smpl format to Human 3.6M Format skeleton_smpl = fitting_params['v'].reshape(-1, 3) skeleton = np.zeros((32, 3)) skeleton[USE_DIMS] = skeleton_smpl[H36M_IDS] pose = skeleton.reshape(-1) t = fitting_params['cam_t'] f = fitting_params['f'] # Convert skeleton to local coordinate system bones_global, bones_local, local_system = to_local(skeleton) # Convert the local coordinates to spherical coordinates angles = to_spherical(bones_local) angles[:, 1:] *= 180 / np.pi # Set update local coordinate flag only after global rotation need_to_update_lc = False # Visualize visualize(pose, skeleton, img) # annotate only one image at a time break if __name__ == '__main__': parser = argparse.ArgumentParser(description='3D Interactive Tool') parser.add_argument('-d', '--dataset_dir', default=None, type=str) opt = parser.parse_args() main(opt) ================================================ FILE: tools/evolve.py ================================================ """ Evolution of 3D human skeleton. author: Nicholas Li contact: nicholas.li@connect.ust.hk """ import sys sys.path.append("../") from libs.evolution.genetic import evolution from libs.evolution.parameter import parse_arg import os import logging from scipy.spatial.transform import Rotation as R import numpy as np def cast_to_float(dic, dtype=np.float32): # cast to float 32 for space saving for key in dic.keys(): dic[key] = dic[key].astype(dtype) return dic def random_rotation(pose, sigma=360): # apply random rotation to equivalently augment viewpoints pose = pose.reshape(32, 3) hip = pose[0].copy().reshape(1, 3) x = np.random.normal(scale=sigma) y = np.random.normal(scale=sigma) z = np.random.normal(scale=sigma) r = R.from_euler('xyz', [x, y, z], degrees=True) rotated = r.as_dcm() @ (pose-hip).T return (rotated.T + hip).reshape(-1) def initialize_population(data_dic, opt): """ Initialize a population for later evolution. """ # down-sample the raw data if used for weakly-supervised experiments if opt.WS and opt.SS.startswith("0.") and opt.SS.endswith("S1"): # a fraction of S1 data for H36M ratio = float(opt.SS.split('S')[0]) # randomly sample a portion of 3D data sampled_dic = {} # sample each video for key in data_dic.keys(): if key[0] != 1: continue total = len(data_dic[key]) sampled_num = int(ratio*total) chosen_indices = np.random.choice(total, sampled_num, replace=False) sampled_dic[key] = data_dic[key][chosen_indices].copy() initial_population = np.concatenate(list(sampled_dic.values()), axis=0) elif opt.WS and opt.SS.startswith("S"): # a collection of data from a few subjects # delete unused subjects sub_list = [int(opt.SS[i]) for i in range(1, len(opt.SS))] keys_to_delete = [] for key in data_dic.keys(): if key[0] not in sub_list: keys_to_delete.append(key) for key in keys_to_delete: del data_dic[key] initial_population = np.concatenate(list(data_dic.values()), axis=0) else: # do not perform down-sampling initial_population = np.concatenate(list(data_dic.values()), axis=0) return initial_population def initialize_model_file(opt): if opt.A: import torch model = torch.load(os.path.join(opt.ckpt_dir, "model.th")) stats = np.load(os.path.join(opt.ckpt_dir, "stats.npy")).item() cameras = np.load("../data/human3.6M/cameras.npy").item() model_file = {"model":model, "stats":stats, "cams":list(cameras.values())} else: model_file = None return model_file def split_and_save(evolved_population): """ Split and save the evolved dataset into training and validation set. """ training_indices = np.random.choice(len(evolved_population), int(0.95*len(evolved_population)), replace=False) testing_indices = np.delete(np.arange(len(evolved_population)), training_indices) training_poses = evolved_population[training_indices] testing_poses = evolved_population[testing_indices] temp_subject_list = [1, 5, 6, 7, 8] train_set_3d = {} poses_list = np.array_split(training_poses, len(temp_subject_list)) for subject_idx in range(len(temp_subject_list)): train_set_3d[(temp_subject_list[subject_idx], 'n/a', 'n/a')] =\ poses_list[subject_idx] # testing testing_poses = evolved_population[testing_indices] temp_subject_list = [9,11] test_set_3d = {} poses_list = np.array_split(testing_poses, len(temp_subject_list)) for subject_idx in range(len(temp_subject_list)): test_set_3d[(temp_subject_list[subject_idx], 'n/a', 'n/a')] =\ poses_list[subject_idx] np.save('../data/human3.6M/h36m/numpy/threeDPose_train_split.npy', train_set_3d) np.save('../data/human3.6M/h36m/numpy/threeDPose_test.npy', test_set_3d) return def visualize(initial_population, evolved_population): """ Visualize the augmented dataset """ import matplotlib.pyplot as plt from genetic import show3Dpose def get_zmin(pose): return pose.reshape(32,3)[:,2].min() # initial population chosen_indices = np.random.choice(len(initial_population), 9, replace=False) plt.figure() for idx in range(9): ax = plt.subplot(3, 3, idx+1, projection='3d') pose = initial_population[chosen_indices[idx]] show3Dpose(pose, ax) plt.title("{:d}:{:.2f}".format(chosen_indices[idx], get_zmin(pose))) plt.tight_layout() # after evolution chosen_indices = np.random.choice(len(evolved_population) - len(initial_population), 9, replace=False) plt.figure() for idx in range(9): ax = plt.subplot(3, 3, idx+1, projection='3d') pose = evolved_population[chosen_indices[idx] + len(initial_population)] show3Dpose(pose, ax) plt.title("{:d}:{:.2f}".format(chosen_indices[idx] + len(initial_population), get_zmin(pose))) plt.tight_layout() return def main(): logging.basicConfig(level=logging.INFO, format="[%(asctime)s]: %(message)s" ) # parse command line input opt = parse_arg() if opt.generate: # get the training set of human 3.6M data_dic = np.load(opt.data_path, allow_pickle=True).item() initial_population = initialize_population(data_dic, opt) # load a pre-trained model for active searching (optional) model_file = initialize_model_file(opt) evolved_population = evolution(initial_population, opt, model_file=model_file ) if opt.split: split_and_save(evolved_population) if opt.visualize: visualize(initial_population, evolved_population) if __name__ == "__main__": main() ================================================ FILE: tools/imgto2Dnet.py ================================================ # ------------------------------------------------------------------------------ # Copyright (c) Microsoft # Licensed under the MIT License. # Written by Bin Xiao (Bin.Xiao@microsoft.com) # Modified by Shichao Li (nicholas.li@connect.ust.hk) # ------------------------------------------------------------------------------ """ Training and inference of a high-resolution heatmap regression model. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import argparse import os import pprint import shutil import torch import torch.nn.parallel import torch.backends.cudnn as cudnn import torch.optim import torch.utils.data import torch.utils.data.distributed import torchvision.transforms as transforms import sys sys.path.append("../") from libs.hhr.config import cfg from libs.hhr.config import update_config from libs.hhr.core.loss import JointsMSELoss, JointsCoordinateLoss, WingLoss from libs.hhr.core.function import train from libs.hhr.core.function import validate_pixel from libs.hhr.utils.utils import get_optimizer from libs.hhr.utils.utils import save_checkpoint from libs.hhr.utils.utils import create_logger from libs.hhr.utils.utils import get_model_summary from libs.model.pose_hrnet import get_pose_net import libs.dataset.h36m from libs.dataset.h36m.h36m_pose import H36MDataset # run with your configuration file as follows: # --cfg "./models/experiments/h36m/hrnet/w48_384x288_adam_lr1e-3.yaml" def parse_args(): parser = argparse.ArgumentParser(description='Train keypoints network') # general parser.add_argument('--cfg', help='experiment configure file name', required=True, type=str) parser.add_argument('opts', help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER) # philly parser.add_argument('--modelDir', help='model directory', type=str, default='') parser.add_argument('--logDir', help='log directory', type=str, default='') parser.add_argument('--dataDir', help='data directory', type=str, default='') parser.add_argument('--prevModelDir', help='prev Model directory', type=str, default='') args = parser.parse_args() return args def main(): args = parse_args() update_config(cfg, args) logger, final_output_dir, tb_log_dir = create_logger(cfg, args.cfg, 'train') logger.info(pprint.pformat(args)) logger.info(cfg) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED model = get_pose_net(cfg, is_train=True) dump_input = torch.rand( (1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0]) ) logger.info(get_model_summary(model, dump_input)) model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() # define loss function (criterion) and optimizer criterion = JointsMSELoss( use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT ).cuda() # coordinate loss with soft arg-max # criterion = JointsCoordinateLoss( # use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT # ).cuda() # Wing Loss # criterion = WingLoss( # use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT # ).cuda() # Data loading code normalize = transforms.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] ) train_dataset = H36MDataset( cfg, True, cfg.DATASET.TRAIN_PATH, transforms.Compose([ transforms.ToTensor(), normalize, ]) ) valid_dataset = H36MDataset( cfg, False, cfg.DATASET.VALID_PATH, transforms.Compose([ transforms.ToTensor(), normalize, ]) ) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU*len(cfg.GPUS), shuffle=True, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY, ) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=cfg.TEST.BATCH_SIZE_PER_GPU*len(cfg.GPUS), shuffle=False, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY ) best_perf = 0.0 best_model = False last_epoch = -1 optimizer = get_optimizer(cfg, model) begin_epoch = cfg.TRAIN.BEGIN_EPOCH checkpoint_file = os.path.join( final_output_dir, 'checkpoint.pth' ) if cfg.AUTO_RESUME and os.path.exists(checkpoint_file): logger.info("=> loading checkpoint '{}'".format(checkpoint_file)) checkpoint = torch.load(checkpoint_file) begin_epoch = checkpoint['epoch'] best_perf = checkpoint['perf'] last_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger.info("=> loaded checkpoint '{}' (epoch {})".format( checkpoint_file, checkpoint['epoch'])) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR, last_epoch=last_epoch ) # inference # perf_indicator = validate_pixel( # cfg, valid_loader, valid_dataset, model, criterion, # final_output_dir, tb_log_dir, save=True, split='test') # return # training # train with hard arg-max with MSE loss first and then fine-tune with # soft-argmax coordinate loss works well in practice #iterations = [3000, 3000, 6000, 3000, 3000] # fine-tune with L1 loss #iterations = [6000, 3000, 3000] iterations = [6000, 6000, 6000, 3000, 3000] for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH): lr_scheduler.step() # set total iterations if epoch - begin_epoch < len(iterations): total_iters = iterations[epoch - begin_epoch] logger.info("Total iterations to train: {}".format(total_iters)) else: total_iters = None # perform validation during training # perf_indicator = validate_pixel( # cfg, valid_loader, valid_dataset, model, criterion, # final_output_dir, tb_log_dir) # train for one epoch train(cfg, train_loader, model, criterion, optimizer, epoch, final_output_dir, tb_log_dir, total_iters=total_iters) #perf_indicator = 0.0 # evaluate on validation set perf_indicator = validate_pixel( cfg, valid_loader, valid_dataset, model, criterion, final_output_dir, tb_log_dir) if perf_indicator >= best_perf: best_perf = perf_indicator best_model = True else: best_model = False logger.info('=> saving checkpoint to {}'.format(final_output_dir)) save_checkpoint({ 'epoch': epoch + 1, 'model': cfg.MODEL.NAME, 'state_dict': model.state_dict(), 'best_state_dict': model.module.state_dict(), 'perf': perf_indicator, 'optimizer': optimizer.state_dict(), }, best_model, final_output_dir) final_model_state_file = os.path.join( final_output_dir, 'final_state.pth' ) logger.info('=> saving final model state to {}'.format( final_model_state_file) ) torch.save(model.module.state_dict(), final_model_state_file) if __name__ == '__main__': main()